From 62b1d118f85564d70b00616a8c2a9a936d3235ee Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 15 Aug 2022 20:18:57 -0400 Subject: [PATCH 01/61] using requests to download soft files --- geofetch/geofetch.py | 57 +++++++++++++++++++++++--------------------- geofetch/utils.py | 32 +++++++++++++++++-------- 2 files changed, 52 insertions(+), 37 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 82244d3..c5ca7d5 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -16,7 +16,7 @@ # import tarfile import time -from .utils import ( +from utils import ( Accession, parse_accessions, parse_SOFT_line, @@ -24,7 +24,7 @@ clean_soft_files, run_subprocess, ) -from ._version import __version__ +from _version import __version__ import logmuse from ubiquerg import expandpath, is_command_callable @@ -277,24 +277,26 @@ def fetch_all(self, input, name=None): # The GSM file has metadata describing each sample, which we will use to # produce a sample annotation sheet. if not os.path.isfile(file_gse) or self.refresh_metadata: - Accession(acc_GSE).fetch_metadata(file_gse) + file_gse_content = Accession(acc_GSE).fetch_metadata(file_gse, clean=self.discard_soft) else: self._LOGGER.info(f"Found previous GSE file: {file_gse}") + gse_file_obj = open(file_gse, "r") + file_gse_content = gse_file_obj.read().split('\n') if not os.path.isfile(file_gsm) or self.refresh_metadata: - Accession(acc_GSE).fetch_metadata(file_gsm, typename="GSM") + file_gsm_content = Accession(acc_GSE).fetch_metadata(file_gsm, typename="GSM", clean=self.discard_soft) else: self._LOGGER.info(f"Found previous GSM file: {file_gsm}") - - # if not os.path.isfile(file_gsm) or not os.path.isfile(file_gse): + gsm_file_obj = open(file_gsm, "r") + file_gsm_content = gsm_file_obj.read().split('\n') # download processed data if self.processed: - try: + #try: ( meta_processed_samples, meta_processed_series, - ) = self.get_list_of_processed_files(file_gse, file_gsm) + ) = self.get_list_of_processed_files(file_gse_content, file_gsm_content) # taking into account list of GSM that is specified in the input file gsm_list = acc_GSE_list[acc_GSE] @@ -307,6 +309,7 @@ def fetch_all(self, input, name=None): ) meta_processed_series = self.unify_list_keys(meta_processed_series) + # samples list_of_keys = self.get_list_of_keys(meta_processed_samples) self._LOGGER.info("Expanding metadata list...") for key_in_list in list_of_keys: @@ -314,6 +317,7 @@ def fetch_all(self, input, name=None): meta_processed_samples, key_in_list ) + # series list_of_keys_series = self.get_list_of_keys(meta_processed_series) self._LOGGER.info("Expanding metadata list...") for key_in_list in list_of_keys_series: @@ -409,17 +413,17 @@ def fetch_all(self, input, name=None): ] for file_url in processed_series_files: self.download_processed_file(file_url, data_geo_folder) - except Exception as processed_exception: - failed_runs.append(acc_GSE) - self._LOGGER.warning(f"Error occurred: {processed_exception}") + # except Exception as processed_exception: + # failed_runs.append(acc_GSE) + # self._LOGGER.warning(f"Error occurred: {processed_exception}") else: # download gsm metadata - gsm_metadata = self.get_gsm_metadata(acc_GSE, acc_GSE_list, file_gsm) + gsm_metadata = self.get_gsm_metadata(acc_GSE, acc_GSE_list, file_gsm_content) metadata_dict[acc_GSE] = gsm_metadata # download gsm metadata - SRP_list_result = self.get_SRA_meta(file_gse, gsm_metadata, file_sra) + SRP_list_result = self.get_SRA_meta(file_gse_content, gsm_metadata, file_sra) if not SRP_list_result: # delete current acc if no raw data was found # del metadata_dict[acc_GSE] @@ -1345,18 +1349,18 @@ def download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): else: self._LOGGER.info(f"\033[38;5;242mFile {full_filepath} exists.\033[0m") - def get_list_of_processed_files(self, file_gse, file_gsm): + def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: list): """ Given a paths to GSE and GSM metafile create a list of dicts of metadata of processed files - :param str file_gse: the path to gse metafile - :param str file_gsm: the path to gse metafile + :param list file_gse_content: list of lines of gse metafile + :param list file_gsm_content: list of lines of gse metafile :return list: list of metadata of processed files """ tar_re = re.compile(r".*\.tar$") gse_numb = None meta_processed_samples = [] meta_processed_series = {"GSE": "", "files": []} - for line in open(file_gse, "r"): + for line in file_gse_content: if re.compile(r"!Series_geo_accession").search(line): gse_numb = self.get_value(line) @@ -1373,7 +1377,7 @@ def get_list_of_processed_files(self, file_gse, file_gsm): if tar_re.search(filename): # find and download filelist - file with information about files in tar index = file_url.rfind("/") - tar_files_list_url = file_url[: index + 1] + "filelist.txt" + tar_files_list_url = "https" + file_url[3 : index + 1] + "filelist.txt" # file_list_name filelist_path = os.path.join( self.metadata_expanded, gse_numb + "_file_list.txt" @@ -1385,7 +1389,7 @@ def get_list_of_processed_files(self, file_gse, file_gsm): ) nb = len(meta_processed_samples) - 1 - for line_gsm in open(file_gsm, "r"): + for line_gsm in file_gsm_content: if line_gsm[0] == "^": nb = len(self.check_file_existance(meta_processed_samples)) meta_processed_samples.append( @@ -1589,7 +1593,7 @@ def run_size_filter(self, meta_list, col_name="file_size"): return filtered_list @staticmethod - def read_tar_filelist(file_path): + def read_tar_filelist(file_path: str): """ Creating list for supplementary files that are listed in "filelist.txt" :param str file_path: path to the file with information about files that are zipped ("filelist.txt") @@ -1621,7 +1625,6 @@ def get_value(all_line): return line_value.split(": ")[-1].rstrip("\n") def download_processed_file(self, file_url, data_folder): - """ Given a url for a file, download it, and extract anything passing the filter. :param str file_url: the URL of the file to download @@ -1664,16 +1667,16 @@ def download_processed_file(self, file_url, data_folder): if ntry > 4: raise e - def get_SRA_meta(self, file_gse, gsm_metadata, file_sra=None): + def get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): """ Parse out the SRA project identifier from the GSE file - :param str file_gse: full path to GSE.soft metafile + :param list file_gse_content: list of content of file_sde_content :param dict gsm_metadata: dict of GSM metadata :param str file_sra: full path to SRA.csv metafile that has to be downloaded """ # acc_SRP = None - for line in open(file_gse, "r"): + for line in file_gse_content: found = re.findall(PROJECT_PATTERN, line) if found: acc_SRP = found[0] @@ -1780,13 +1783,13 @@ def get_SRP_list(self, srp_number: str) -> list: return SRP_list - def get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm): + def get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): """ A simple state machine to parse SOFT formatted files (Here, the GSM file) :param str acc_GSE: GSE number (Series accession) :param dict acc_GSE_list: list of GSE - :param str file_gsm: full path to GSM.soft metafile + :param list file_gsm_content: list of contents of gsm file :return dict: dictionary of experiment information (gsm_metadata) """ gsm_metadata = {} @@ -1798,7 +1801,7 @@ def get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm): current_sample_id = None current_sample_srx = False samples_list = [] - for line in open(file_gsm, "r"): + for line in file_gsm_content: line = line.rstrip() if len(line) == 0: # Apparently SOFT files can contain blank lines continue diff --git a/geofetch/utils.py b/geofetch/utils.py index 7835196..8c95da4 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -5,6 +5,7 @@ import subprocess import sys import re +import requests __author__ = [ @@ -181,18 +182,17 @@ def __init__(self, accn, strict=True): self.accn = accn self.typename = typename.upper() - def fetch_metadata(self, outpath=None, typename=None): + def fetch_metadata(self, outpath: str = None, typename: str = None, clean: bool = False) -> list: """ Fetch the metadata associated with this accession. - :param str outpath: path to file to which to write output, optional :param str typename: type indicating URL format, use type parsed at construction if unspecified + :param str outpath: path to file to which to write output, optional + :param bool clean: if true, files won't be saved + :return: list of lines in soft file """ - # TODO: note this sort of type-dependent strategy suggests subclassing. - # For now, class is small, but that should maybe be done if it grows. - typename = (typename or self.typename).upper() if not is_known_type(typename=typename): raise self.accn_type_exception(self.accn, typename) @@ -210,7 +210,16 @@ def fetch_metadata(self, outpath=None, typename=None): raise _LOGGER.debug("Fetching: '%s'", full_url) - if outpath: + result = requests.get(full_url) + if result.ok: + result_text = result.text + result_list = result_text.replace("\r", "").split("\n") + result_list = [elem for elem in result_list if len(elem) > 0] + + else: + raise Exception(f"Error in requesting fileL: {full_url}") + + if outpath and not clean: # Ensure we have filepath and that needed directories exist. if not os.path.splitext(outpath)[1]: _LOGGER.debug("Looks like folder, not file: %s", outpath) @@ -222,11 +231,12 @@ def fetch_metadata(self, outpath=None, typename=None): if not os.path.exists(dirpath): _LOGGER.debug("Forging path to '%s'", dirpath) os.makedirs(dirpath) - cmd = "wget -O {} {}".format(outpath, full_url) - else: - cmd = "wget {}".format(full_url) - run_subprocess(cmd.split(" ")) + # save file: + with open(outpath, 'w') as f: + f.write(result_text) + + return result_list @staticmethod def _validate(accn): @@ -335,3 +345,5 @@ def run_subprocess(*args, **kwargs): except OSError as ose: _LOGGER.warn(f"Exception raised during subprocess termination: {ose}") sys.exit(1) + +#file_gse_content \ No newline at end of file From 51ee83a7c8f2b4fd02ea32d4127a5d9d64a0e905 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 16 Aug 2022 00:21:27 -0400 Subject: [PATCH 02/61] added requests for additional files --- geofetch/geofetch.py | 58 ++++++++++++++++++++++++++------------------ geofetch/utils.py | 4 +-- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index c5ca7d5..31afe51 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -28,6 +28,7 @@ import logmuse from ubiquerg import expandpath, is_command_callable +from io import StringIO _STRING_TYPES = str _LOGGER = None @@ -282,6 +283,7 @@ def fetch_all(self, input, name=None): self._LOGGER.info(f"Found previous GSE file: {file_gse}") gse_file_obj = open(file_gse, "r") file_gse_content = gse_file_obj.read().split('\n') + file_gse_content = [elem for elem in file_gse_content if len(elem) > 0] if not os.path.isfile(file_gsm) or self.refresh_metadata: file_gsm_content = Accession(acc_GSE).fetch_metadata(file_gsm, typename="GSM", clean=self.discard_soft) @@ -289,6 +291,7 @@ def fetch_all(self, input, name=None): self._LOGGER.info(f"Found previous GSM file: {file_gsm}") gsm_file_obj = open(file_gsm, "r") file_gsm_content = gsm_file_obj.read().split('\n') + file_gsm_content = [elem for elem in file_gsm_content if len(elem) > 0] # download processed data if self.processed: @@ -1377,16 +1380,24 @@ def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: if tar_re.search(filename): # find and download filelist - file with information about files in tar index = file_url.rfind("/") - tar_files_list_url = "https" + file_url[3 : index + 1] + "filelist.txt" + tar_files_list_url = "https" + file_url[3: index + 1] + "filelist.txt" # file_list_name filelist_path = os.path.join( self.metadata_expanded, gse_numb + "_file_list.txt" ) - self.download_file( - tar_files_list_url, - self.metadata_expanded, - gse_numb + "_file_list.txt", - ) + + if not os.path.isfile(filelist_path) or self.refresh_metadata: + result = requests.get(tar_files_list_url) + if result.ok: + filelist_raw_text = result.text + if not self.discard_soft: + with open(filelist_path, 'w') as f: + f.write(filelist_raw_text) + else: + self._LOGGER.info(f"Found previous GSM file: {filelist_path}") + filelist_obj = open(filelist_path, "r") + filelist_raw_text = filelist_obj.read() + nb = len(meta_processed_samples) - 1 for line_gsm in file_gsm_content: @@ -1449,7 +1460,7 @@ def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: ) # expand meta_processed_samples with information about type and size - file_info_add = self.read_tar_filelist(filelist_path) + file_info_add = self.read_tar_filelist(filelist_raw_text) for index_nr in range(len(meta_processed_samples)): file_name = meta_processed_samples[index_nr]["file"] meta_processed_samples[index_nr].update( @@ -1593,29 +1604,28 @@ def run_size_filter(self, meta_list, col_name="file_size"): return filtered_list @staticmethod - def read_tar_filelist(file_path: str): + def read_tar_filelist(raw_text: str): """ Creating list for supplementary files that are listed in "filelist.txt" :param str file_path: path to the file with information about files that are zipped ("filelist.txt") :return dict: dict of supplementary file names and additional information """ - + f = StringIO(raw_text) files_info = {} - with open(file_path, newline="") as csvfile: - csv_reader = csv.reader(csvfile, delimiter="\t") - line_count = 0 - for row in csv_reader: - if line_count == 0: - name_index = row.index("Name") - size_index = row.index("Size") - type_index = row.index("Type") - - line_count += 1 - else: - files_info[row[name_index]] = { - "file_size": row[size_index], - "type": row[type_index], - } + csv_reader = csv.reader(f, delimiter="\t") + line_count = 0 + for row in csv_reader: + if line_count == 0: + name_index = row.index("Name") + size_index = row.index("Size") + type_index = row.index("Type") + + line_count += 1 + else: + files_info[row[name_index]] = { + "file_size": row[size_index], + "type": row[type_index], + } return files_info diff --git a/geofetch/utils.py b/geofetch/utils.py index 8c95da4..270123f 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -233,8 +233,8 @@ def fetch_metadata(self, outpath: str = None, typename: str = None, clean: bool os.makedirs(dirpath) # save file: - with open(outpath, 'w') as f: - f.write(result_text) + with open(outpath, 'w') as f: + f.write(result_text) return result_list From 533d3a2746bc8496a33ed6aa27e6c04c6474da6e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 16 Aug 2022 10:38:19 -0400 Subject: [PATCH 03/61] Refactored const --- geofetch/const.py | 40 ++++++++++++++++++++++++++++++ geofetch/geofetch.py | 58 ++++++++------------------------------------ geofetch/utils.py | 14 +---------- 3 files changed, 51 insertions(+), 61 deletions(-) create mode 100644 geofetch/const.py diff --git a/geofetch/const.py b/geofetch/const.py new file mode 100644 index 0000000..6c0f1cf --- /dev/null +++ b/geofetch/const.py @@ -0,0 +1,40 @@ +import re +_LOGGER = None + +# A set of hard-coded keys if you want to limit to just a few instead of taking +# all information provided in GEO. Use with `--use-key-subset` +ANNOTATION_SHEET_KEYS = [ + "sample_name", + "protocol", + "read_type", + "organism", + "data_source", + "Sample_title", + "Sample_source_name_ch1", + "Sample_organism_ch1", + "Sample_library_selection", + "Sample_library_strategy", + "Sample_type", + "SRR", + "SRX", + "Sample_geo_accession", + "Sample_series_id", + "Sample_instrument_model", +] + +# Regex to parse out SRA accession identifiers +PROJECT_PATTERN = re.compile(r"(SRP\d{4,8})") +EXPERIMENT_PATTERN = re.compile(r"(SRX\d{4,8})") +GSE_PATTERN = re.compile(r"(GSE\d{4,8})") +SUPP_FILE_PATTERN = re.compile("Sample_supplementary_file") +SER_SUPP_FILE_PATTERN = re.compile("Series_supplementary_file") + +SAMPLE_SUPP_METADATA_FILE = "_samples.csv" +EXP_SUPP_METADATA_FILE = "_series.csv" + +# How many times should we retry failing prefetch call? +NUM_RETRIES = 3 +REQUEST_SLEEP = 0.4 + +NCBI_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra&term={SRP_NUMBER}&retmax=999&rettype=uilist&retmode=json" +NCBI_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id={ID}&rettype=runinfo&retmode=xml" \ No newline at end of file diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 31afe51..ef52647 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -2,7 +2,6 @@ __author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"] - import argparse import copy import csv @@ -16,6 +15,8 @@ # import tarfile import time +from _version import __version__ +from const import * from utils import ( Accession, parse_accessions, @@ -24,54 +25,11 @@ clean_soft_files, run_subprocess, ) -from _version import __version__ -import logmuse +import logmuse from ubiquerg import expandpath, is_command_callable from io import StringIO -_STRING_TYPES = str -_LOGGER = None - -# A set of hard-coded keys if you want to limit to just a few instead of taking -# all information provided in GEO. Use with `--use-key-subset` -ANNOTATION_SHEET_KEYS = [ - "sample_name", - "protocol", - "read_type", - "organism", - "data_source", - "Sample_title", - "Sample_source_name_ch1", - "Sample_organism_ch1", - "Sample_library_selection", - "Sample_library_strategy", - "Sample_type", - "SRR", - "SRX", - "Sample_geo_accession", - "Sample_series_id", - "Sample_instrument_model", -] - -# Regex to parse out SRA accession identifiers -PROJECT_PATTERN = re.compile(r"(SRP\d{4,8})") -EXPERIMENT_PATTERN = re.compile(r"(SRX\d{4,8})") -GSE_PATTERN = re.compile(r"(GSE\d{4,8})") -SUPP_FILE_PATTERN = re.compile("Sample_supplementary_file") -SER_SUPP_FILE_PATTERN = re.compile("Series_supplementary_file") - -SAMPLE_SUPP_METADATA_FILE = "_samples.csv" -EXP_SUPP_METADATA_FILE = "_series.csv" - -# How many times should we retry failing prefetch call? -NUM_RETRIES = 3 -REQUEST_SLEEP = 0.4 - -NCBI_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra&term={SRP_NUMBER}&retmax=999&rettype=uilist&retmode=json" -NCBI_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id={ID}&rettype=runinfo&retmode=xml" - - class Geofetcher: def __init__( self, @@ -485,7 +443,7 @@ def fetch_all(self, input, name=None): ) if ( - isinstance(gsm_metadata[experiment]["SRR"], _STRING_TYPES) + isinstance(gsm_metadata[experiment]["SRR"], str) and experiment not in gsm_multi_table ): # Only one has been stuck in so far, make a list @@ -806,14 +764,16 @@ def write_gsm_annotation(self, gsm_metadata, file_annotation, use_key_subset=Fal # keys = gsm_metadata[gsm_metadata.keys().next()].keys() keys = list(list(gsm_metadata.values())[0].keys()) - self._LOGGER.info(f"Sample annotation sheet: {file_annotation}") + self._LOGGER.info(f"Sample annotation sheet: {file_annotation} . Saving....") fp = expandpath(file_annotation) - self._LOGGER.info(f"Writing: {fp}") with open(fp, "w") as of: w = csv.DictWriter(of, keys, extrasaction="ignore") w.writeheader() for item in gsm_metadata: w.writerow(gsm_metadata[item]) + self._LOGGER.info( + "\033[92mFile has been saved successfully\033[0m" + ) return fp def write_processed_annotation(self, processed_metadata, file_annotation_path): @@ -1393,6 +1353,8 @@ def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: if not self.discard_soft: with open(filelist_path, 'w') as f: f.write(filelist_raw_text) + else: + raise Exception(f"error in requesting tar_files_list") else: self._LOGGER.info(f"Found previous GSM file: {filelist_path}") filelist_obj = open(filelist_path, "r") diff --git a/geofetch/utils.py b/geofetch/utils.py index 270123f..efbc7ff 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -7,20 +7,8 @@ import re import requests - -__author__ = [ - "Oleksandr Khoroshevskyi", - "Vince Reuter", - "Nathan Sheffield", -] -__email__ = "bnt4me@virginia.edu" - -__all__ = ["parse_accessions"] - - _LOGGER = logging.getLogger(__name__) - # This dict provides NCBI lookup URLs for different accession types. SRX # identifiers can be used to grab metadata from SRA for a single sample, just as # an SRP identifier is used to grab the same table for multiple samples, so @@ -343,7 +331,7 @@ def run_subprocess(*args, **kwargs): p.terminate() print("Pipeline aborted.") except OSError as ose: - _LOGGER.warn(f"Exception raised during subprocess termination: {ose}") + _LOGGER.warning(f"Exception raised during subprocess termination: {ose}") sys.exit(1) #file_gse_content \ No newline at end of file From 69fc8ea909f13e9851d2e3c11ba10934491e5755 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 16 Aug 2022 12:39:15 -0400 Subject: [PATCH 04/61] #81 first stage - done --- geofetch/geofetch.py | 135 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 20 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index ef52647..535e2c3 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 +from __future__ import annotations + __author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"] import argparse @@ -29,6 +31,8 @@ import logmuse from ubiquerg import expandpath, is_command_callable from io import StringIO +from typing import List, Union, Dict +import peppy class Geofetcher: def __init__( @@ -173,7 +177,82 @@ def __init__( if bam_conversion and not just_metadata and not self.which("samtools"): raise SystemExit("For SAM/BAM processing, samtools should be on PATH.") - def fetch_all(self, input, name=None): + def get_project_obj(self, input: str) -> Dict[peppy.Project]: + """ + Function for fetching projects from GEO|SRA and obtaining peppy project + :param input: GSE number, or path to file of GSE numbers + :return: peppy project or list of project, if acc_anno is set. + """ + acc_GSE_list = parse_accessions( + input, self.metadata_expanded, self.just_metadata + ) + + raw_project_dict = {} + + if self.processed: + if self.supp_by == "all": + data_source_all = True + else: + data_source_all = False + + import pandas as pd + + if self.acc_anno: + self.acc_anno = False + for acc_GSE in acc_GSE_list.keys(): + if data_source_all: + # samples + self.supp_by = "samples" + samples_list = self.fetch_all(input=acc_GSE, just_object=True) + if len(samples_list) > 0: + raw_project_dict[acc_GSE + "_samples"] = pd.DataFrame(samples_list) + + # series + self.supp_by = "series" + series_list = self.fetch_all(input=acc_GSE, just_object=True) + if len(series_list) > 0: + raw_project_dict[acc_GSE + "_series"] = pd.DataFrame(series_list) + else: + ser_list = self.fetch_all(input=acc_GSE, just_object=True) + if len(ser_list) > 0: + raw_project_dict[acc_GSE+"_"+self.supp_by] = pd.DataFrame(ser_list) + else: + if data_source_all: + # samples + self.supp_by = "samples" + samples_list = self.fetch_all(input=input, just_object=True) + if len(samples_list) > 0: + raw_project_dict["project_samples"] = pd.DataFrame(samples_list) + + # series + self.supp_by = "series" + series_list = self.fetch_all(input=input, just_object=True) + if len(series_list) > 0: + raw_project_dict["project_series"] = pd.DataFrame(series_list) + + else: + ser_list = self.fetch_all(input=input, just_object=True) + if len(ser_list) > 0: + raw_project_dict["project_" + self.supp_by] = pd.DataFrame(ser_list) + print(raw_project_dict) + + else: + if self.acc_anno: + self.acc_anno = False + for acc_GSE in acc_GSE_list.keys(): + project_dict = self.fetch_all(input=input, just_object=True) + if len(project_dict) > 0: + raw_project_dict[acc_GSE+"_raw_samples"] = project_dict + + else: + ser_dict = self.fetch_all(input=input, just_object=True) + if len(ser_dict) > 0: + raw_project_dict["raw_samples"] = ser_dict + + + return peppy.Project() + + def fetch_all(self, input: str, name: str = None, just_object: bool = False): """Main script driver/workflow""" if name: @@ -578,28 +657,44 @@ def fetch_all(self, input, name=None): ) elif self.supp_by == "samples": - supp_sample_path_meta = os.path.join( - self.metadata_raw, - "PEP_samples", - self.project_name + SAMPLE_SUPP_METADATA_FILE, - ) - self.write_processed_annotation( - processed_metadata_samples, supp_sample_path_meta - ) + if just_object: + return processed_metadata_samples + else: + supp_sample_path_meta = os.path.join( + self.metadata_raw, + "PEP_samples", + self.project_name + SAMPLE_SUPP_METADATA_FILE, + ) + self.write_processed_annotation( + processed_metadata_samples, supp_sample_path_meta + ) elif self.supp_by == "series": - supp_series_path_meta = os.path.join( - self.metadata_raw, - "PEP_series", - self.project_name + EXP_SUPP_METADATA_FILE, - ) - self.write_processed_annotation( - processed_metadata_exp, supp_series_path_meta - ) + if just_object: + return processed_metadata_exp + else: + supp_series_path_meta = os.path.join( + self.metadata_raw, + "PEP_series", + self.project_name + EXP_SUPP_METADATA_FILE, + ) + self.write_processed_annotation( + processed_metadata_exp, supp_series_path_meta + ) # saving PEPs for raw data else: - self.write_raw_annotation(metadata_dict, subannotation_dict) + if not just_object: + self.write_raw_annotation(metadata_dict, subannotation_dict) + else: + raw_meta_list = [] + for meta_key in metadata_dict.keys(): + for srx_key in metadata_dict[meta_key].keys(): + metadata_dict[meta_key][srx_key]["gse_number"] = meta_key + metadata_dict[meta_key][srx_key]["srx_number"] = srx_key + raw_meta_list.append(metadata_dict[meta_key][srx_key]) + # TODO: add subannotation_dict!!!! + return raw_meta_list def expand_metadata_list(self, metadata_list, dict_key): """ @@ -869,7 +964,7 @@ def write_raw_annotation(self, metadata_dict, subannotation_dict): Combining individual accessions into project-level annotations, and writeing individual accession files (if requested) :param dict metadata_dict: dictionary of metadata - :param dict subannotation_dict: dictionary of sub-annotation metadata + :param dict sub-annotation_dict: dictionary of sub-annotation metadata """ if self.discard_soft: @@ -2109,7 +2204,7 @@ def main(): args = _parse_cmdl(sys.argv[1:]) args_dict = vars(args) args_dict["args"] = args - Geofetcher(**args_dict).fetch_all(args_dict["input"]) + Geofetcher(**args_dict).get_project_obj(args_dict["input"]) if __name__ == "__main__": From 7439526782880bdd6c9345672e847a513460ddc4 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 16 Aug 2022 15:43:02 -0400 Subject: [PATCH 05/61] to peppy(done) + requirements + black --- geofetch/_version.py | 2 +- geofetch/const.py | 3 +- geofetch/geofetch.py | 295 +++++++++++++++--------------- geofetch/utils.py | 9 +- requirements/requirements-all.txt | 2 + 5 files changed, 163 insertions(+), 148 deletions(-) diff --git a/geofetch/_version.py b/geofetch/_version.py index 1f4c4d4..ae6db5f 100644 --- a/geofetch/_version.py +++ b/geofetch/_version.py @@ -1 +1 @@ -__version__ = "0.10.1" +__version__ = "0.11.0" diff --git a/geofetch/const.py b/geofetch/const.py index 6c0f1cf..6bc0ff4 100644 --- a/geofetch/const.py +++ b/geofetch/const.py @@ -1,4 +1,5 @@ import re + _LOGGER = None # A set of hard-coded keys if you want to limit to just a few instead of taking @@ -37,4 +38,4 @@ REQUEST_SLEEP = 0.4 NCBI_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra&term={SRP_NUMBER}&retmax=999&rettype=uilist&retmode=json" -NCBI_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id={ID}&rettype=runinfo&retmode=xml" \ No newline at end of file +NCBI_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id={ID}&rettype=runinfo&retmode=xml" diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 535e2c3..7b689f8 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -17,9 +17,9 @@ # import tarfile import time -from _version import __version__ -from const import * -from utils import ( +from ._version import __version__ +from .const import * +from .utils import ( Accession, parse_accessions, parse_SOFT_line, @@ -33,6 +33,8 @@ from io import StringIO from typing import List, Union, Dict import peppy +import pandas as pd + class Geofetcher: def __init__( @@ -183,6 +185,7 @@ def get_project_obj(self, input: str) -> Dict[peppy.Project]: :param input: GSE number, or path to file of GSE numbers :return: peppy project or list of project, if acc_anno is set. """ + self.just_metadata = True acc_GSE_list = parse_accessions( input, self.metadata_expanded, self.just_metadata ) @@ -195,8 +198,6 @@ def get_project_obj(self, input: str) -> Dict[peppy.Project]: else: data_source_all = False - import pandas as pd - if self.acc_anno: self.acc_anno = False for acc_GSE in acc_GSE_list.keys(): @@ -205,52 +206,57 @@ def get_project_obj(self, input: str) -> Dict[peppy.Project]: self.supp_by = "samples" samples_list = self.fetch_all(input=acc_GSE, just_object=True) if len(samples_list) > 0: - raw_project_dict[acc_GSE + "_samples"] = pd.DataFrame(samples_list) + raw_project_dict[acc_GSE + "_samples"] = samples_list # series self.supp_by = "series" series_list = self.fetch_all(input=acc_GSE, just_object=True) if len(series_list) > 0: - raw_project_dict[acc_GSE + "_series"] = pd.DataFrame(series_list) + raw_project_dict[acc_GSE + "_series"] = series_list else: ser_list = self.fetch_all(input=acc_GSE, just_object=True) if len(ser_list) > 0: - raw_project_dict[acc_GSE+"_"+self.supp_by] = pd.DataFrame(ser_list) + raw_project_dict[acc_GSE + "_" + self.supp_by] = ser_list else: if data_source_all: # samples self.supp_by = "samples" samples_list = self.fetch_all(input=input, just_object=True) if len(samples_list) > 0: - raw_project_dict["project_samples"] = pd.DataFrame(samples_list) + raw_project_dict["project_samples"] = samples_list # series self.supp_by = "series" series_list = self.fetch_all(input=input, just_object=True) if len(series_list) > 0: - raw_project_dict["project_series"] = pd.DataFrame(series_list) + raw_project_dict["project_series"] = series_list else: ser_list = self.fetch_all(input=input, just_object=True) if len(ser_list) > 0: - raw_project_dict["project_" + self.supp_by] = pd.DataFrame(ser_list) - print(raw_project_dict) + raw_project_dict["project_" + self.supp_by] = ser_list else: + # Not sure about below code... if self.acc_anno: self.acc_anno = False for acc_GSE in acc_GSE_list.keys(): project_dict = self.fetch_all(input=input, just_object=True) if len(project_dict) > 0: - raw_project_dict[acc_GSE+"_raw_samples"] = project_dict + raw_project_dict[acc_GSE + "_raw_samples"] = project_dict else: ser_dict = self.fetch_all(input=input, just_object=True) if len(ser_dict) > 0: raw_project_dict["raw_samples"] = ser_dict + new_dict = {} + for proj_key in raw_project_dict.keys(): + new_dict[proj_key] = peppy.Project( + pd_object=pd.DataFrame(raw_project_dict[proj_key]) + ) - return peppy.Project() + return new_dict def fetch_all(self, input: str, name: str = None, just_object: bool = False): """Main script driver/workflow""" @@ -315,155 +321,157 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): # The GSM file has metadata describing each sample, which we will use to # produce a sample annotation sheet. if not os.path.isfile(file_gse) or self.refresh_metadata: - file_gse_content = Accession(acc_GSE).fetch_metadata(file_gse, clean=self.discard_soft) + file_gse_content = Accession(acc_GSE).fetch_metadata( + file_gse, clean=self.discard_soft + ) else: self._LOGGER.info(f"Found previous GSE file: {file_gse}") gse_file_obj = open(file_gse, "r") - file_gse_content = gse_file_obj.read().split('\n') + file_gse_content = gse_file_obj.read().split("\n") file_gse_content = [elem for elem in file_gse_content if len(elem) > 0] if not os.path.isfile(file_gsm) or self.refresh_metadata: - file_gsm_content = Accession(acc_GSE).fetch_metadata(file_gsm, typename="GSM", clean=self.discard_soft) + file_gsm_content = Accession(acc_GSE).fetch_metadata( + file_gsm, typename="GSM", clean=self.discard_soft + ) else: self._LOGGER.info(f"Found previous GSM file: {file_gsm}") gsm_file_obj = open(file_gsm, "r") - file_gsm_content = gsm_file_obj.read().split('\n') - file_gsm_content = [elem for elem in file_gsm_content if len(elem) > 0] + file_gsm_content = gsm_file_obj.read().split("\n") + file_gsm_content = [elem for elem in file_gsm_content if len(elem) > 0] # download processed data if self.processed: - #try: - ( - meta_processed_samples, - meta_processed_series, - ) = self.get_list_of_processed_files(file_gse_content, file_gsm_content) - - # taking into account list of GSM that is specified in the input file - gsm_list = acc_GSE_list[acc_GSE] - meta_processed_samples = self.filter_gsm( - meta_processed_samples, gsm_list + # try: + ( + meta_processed_samples, + meta_processed_series, + ) = self.get_list_of_processed_files(file_gse_content, file_gsm_content) + + # taking into account list of GSM that is specified in the input file + gsm_list = acc_GSE_list[acc_GSE] + meta_processed_samples = self.filter_gsm( + meta_processed_samples, gsm_list + ) + # Unify keys: + meta_processed_samples = self.unify_list_keys(meta_processed_samples) + meta_processed_series = self.unify_list_keys(meta_processed_series) + + # samples + list_of_keys = self.get_list_of_keys(meta_processed_samples) + self._LOGGER.info("Expanding metadata list...") + for key_in_list in list_of_keys: + meta_processed_samples = self.expand_metadata_list( + meta_processed_samples, key_in_list ) - # Unify keys: - meta_processed_samples = self.unify_list_keys( - meta_processed_samples + + # series + list_of_keys_series = self.get_list_of_keys(meta_processed_series) + self._LOGGER.info("Expanding metadata list...") + for key_in_list in list_of_keys_series: + meta_processed_series = self.expand_metadata_list( + meta_processed_series, key_in_list ) - meta_processed_series = self.unify_list_keys(meta_processed_series) - # samples - list_of_keys = self.get_list_of_keys(meta_processed_samples) - self._LOGGER.info("Expanding metadata list...") - for key_in_list in list_of_keys: - meta_processed_samples = self.expand_metadata_list( - meta_processed_samples, key_in_list + # convert column names to lowercase and underscore + meta_processed_samples = self.standardize_colnames( + meta_processed_samples + ) + meta_processed_series = self.standardize_colnames(meta_processed_series) + + if not self.acc_anno: + # adding metadata from current experiment to the project + processed_metadata_samples.extend(meta_processed_samples) + processed_metadata_exp.extend(meta_processed_series) + + # save PEP for each accession if acc-anno flag is true + if self.acc_anno and len(acc_GSE_list.keys()) > 1: + if self.supp_by == "all": + # samples + pep_acc_path_sample = os.path.join( + self.metadata_raw, + f"{acc_GSE}_samples", + acc_GSE + SAMPLE_SUPP_METADATA_FILE, + ) + self.write_processed_annotation( + meta_processed_samples, pep_acc_path_sample ) - # series - list_of_keys_series = self.get_list_of_keys(meta_processed_series) - self._LOGGER.info("Expanding metadata list...") - for key_in_list in list_of_keys_series: - meta_processed_series = self.expand_metadata_list( - meta_processed_series, key_in_list + # series + pep_acc_path_exp = os.path.join( + self.metadata_raw, + f"{acc_GSE}_series", + acc_GSE + EXP_SUPP_METADATA_FILE, + ) + self.write_processed_annotation( + meta_processed_series, pep_acc_path_exp + ) + elif self.supp_by == "samples": + pep_acc_path_sample = os.path.join( + self.metadata_raw, + f"{acc_GSE}_samples", + acc_GSE + SAMPLE_SUPP_METADATA_FILE, + ) + self.write_processed_annotation( + meta_processed_samples, pep_acc_path_sample + ) + elif self.supp_by == "series": + pep_acc_path_exp = os.path.join( + self.metadata_raw, + f"{acc_GSE}_series", + acc_GSE + EXP_SUPP_METADATA_FILE, + ) + self.write_processed_annotation( + meta_processed_series, pep_acc_path_exp ) - # convert column names to lowercase and underscore - meta_processed_samples = self.standardize_colnames( - meta_processed_samples - ) - meta_processed_series = self.standardize_colnames( - meta_processed_series - ) + if not self.just_metadata: + data_geo_folder = os.path.join(self.geo_folder, acc_GSE) + self._LOGGER.debug("Data folder: " + data_geo_folder) - if not self.acc_anno: - # adding metadata from current experiment to the project - processed_metadata_samples.extend(meta_processed_samples) - processed_metadata_exp.extend(meta_processed_series) - - # save PEP for each accession if acc-anno flag is true - if self.acc_anno and len(acc_GSE_list.keys()) > 1: - if self.supp_by == "all": - # samples - pep_acc_path_sample = os.path.join( - self.metadata_raw, - f"{acc_GSE}_samples", - acc_GSE + SAMPLE_SUPP_METADATA_FILE, - ) - self.write_processed_annotation( - meta_processed_samples, pep_acc_path_sample - ) + if self.supp_by == "all": + processed_samples_files = [ + each_file["file_url"] + for each_file in meta_processed_samples + ] + for file_url in processed_samples_files: + self.download_processed_file(file_url, data_geo_folder) - # series - pep_acc_path_exp = os.path.join( - self.metadata_raw, - f"{acc_GSE}_series", - acc_GSE + EXP_SUPP_METADATA_FILE, - ) - self.write_processed_annotation( - meta_processed_series, pep_acc_path_exp - ) - elif self.supp_by == "samples": - pep_acc_path_sample = os.path.join( - self.metadata_raw, - f"{acc_GSE}_samples", - acc_GSE + SAMPLE_SUPP_METADATA_FILE, - ) - self.write_processed_annotation( - meta_processed_samples, pep_acc_path_sample - ) - elif self.supp_by == "series": - pep_acc_path_exp = os.path.join( - self.metadata_raw, - f"{acc_GSE}_series", - acc_GSE + EXP_SUPP_METADATA_FILE, - ) - self.write_processed_annotation( - meta_processed_series, pep_acc_path_exp - ) + processed_series_files = [ + each_file["file_url"] for each_file in meta_processed_series + ] + for file_url in processed_series_files: + self.download_processed_file(file_url, data_geo_folder) - if not self.just_metadata: - data_geo_folder = os.path.join(self.geo_folder, acc_GSE) - self._LOGGER.debug("Data folder: " + data_geo_folder) - - if self.supp_by == "all": - processed_samples_files = [ - each_file["file_url"] - for each_file in meta_processed_samples - ] - for file_url in processed_samples_files: - self.download_processed_file(file_url, data_geo_folder) - - processed_series_files = [ - each_file["file_url"] - for each_file in meta_processed_series - ] - for file_url in processed_series_files: - self.download_processed_file(file_url, data_geo_folder) - - elif self.supp_by == "samples": - processed_samples_files = [ - each_file["file_url"] - for each_file in meta_processed_samples - ] - for file_url in processed_samples_files: - self.download_processed_file(file_url, data_geo_folder) - - elif self.supp_by == "series": - processed_series_files = [ - each_file["file_url"] - for each_file in meta_processed_series - ] - for file_url in processed_series_files: - self.download_processed_file(file_url, data_geo_folder) - # except Exception as processed_exception: - # failed_runs.append(acc_GSE) - # self._LOGGER.warning(f"Error occurred: {processed_exception}") + elif self.supp_by == "samples": + processed_samples_files = [ + each_file["file_url"] + for each_file in meta_processed_samples + ] + for file_url in processed_samples_files: + self.download_processed_file(file_url, data_geo_folder) + + elif self.supp_by == "series": + processed_series_files = [ + each_file["file_url"] for each_file in meta_processed_series + ] + for file_url in processed_series_files: + self.download_processed_file(file_url, data_geo_folder) + # except Exception as processed_exception: + # failed_runs.append(acc_GSE) + # self._LOGGER.warning(f"Error occurred: {processed_exception}") else: # download gsm metadata - gsm_metadata = self.get_gsm_metadata(acc_GSE, acc_GSE_list, file_gsm_content) + gsm_metadata = self.get_gsm_metadata( + acc_GSE, acc_GSE_list, file_gsm_content + ) metadata_dict[acc_GSE] = gsm_metadata # download gsm metadata - SRP_list_result = self.get_SRA_meta(file_gse_content, gsm_metadata, file_sra) + SRP_list_result = self.get_SRA_meta( + file_gse_content, gsm_metadata, file_sra + ) if not SRP_list_result: # delete current acc if no raw data was found # del metadata_dict[acc_GSE] @@ -866,9 +874,7 @@ def write_gsm_annotation(self, gsm_metadata, file_annotation, use_key_subset=Fal w.writeheader() for item in gsm_metadata: w.writerow(gsm_metadata[item]) - self._LOGGER.info( - "\033[92mFile has been saved successfully\033[0m" - ) + self._LOGGER.info("\033[92mFile has been saved successfully\033[0m") return fp def write_processed_annotation(self, processed_metadata, file_annotation_path): @@ -1407,7 +1413,9 @@ def download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): else: self._LOGGER.info(f"\033[38;5;242mFile {full_filepath} exists.\033[0m") - def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: list): + def get_list_of_processed_files( + self, file_gse_content: list, file_gsm_content: list + ): """ Given a paths to GSE and GSM metafile create a list of dicts of metadata of processed files :param list file_gse_content: list of lines of gse metafile @@ -1435,7 +1443,9 @@ def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: if tar_re.search(filename): # find and download filelist - file with information about files in tar index = file_url.rfind("/") - tar_files_list_url = "https" + file_url[3: index + 1] + "filelist.txt" + tar_files_list_url = ( + "https" + file_url[3 : index + 1] + "filelist.txt" + ) # file_list_name filelist_path = os.path.join( self.metadata_expanded, gse_numb + "_file_list.txt" @@ -1446,7 +1456,7 @@ def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: if result.ok: filelist_raw_text = result.text if not self.discard_soft: - with open(filelist_path, 'w') as f: + with open(filelist_path, "w") as f: f.write(filelist_raw_text) else: raise Exception(f"error in requesting tar_files_list") @@ -1455,7 +1465,6 @@ def get_list_of_processed_files(self, file_gse_content: list, file_gsm_content: filelist_obj = open(filelist_path, "r") filelist_raw_text = filelist_obj.read() - nb = len(meta_processed_samples) - 1 for line_gsm in file_gsm_content: if line_gsm[0] == "^": @@ -2204,7 +2213,7 @@ def main(): args = _parse_cmdl(sys.argv[1:]) args_dict = vars(args) args_dict["args"] = args - Geofetcher(**args_dict).get_project_obj(args_dict["input"]) + Geofetcher(**args_dict).fetch_all(args_dict["input"]) if __name__ == "__main__": diff --git a/geofetch/utils.py b/geofetch/utils.py index efbc7ff..f3f97ff 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -170,7 +170,9 @@ def __init__(self, accn, strict=True): self.accn = accn self.typename = typename.upper() - def fetch_metadata(self, outpath: str = None, typename: str = None, clean: bool = False) -> list: + def fetch_metadata( + self, outpath: str = None, typename: str = None, clean: bool = False + ) -> list: """ Fetch the metadata associated with this accession. @@ -221,7 +223,7 @@ def fetch_metadata(self, outpath: str = None, typename: str = None, clean: bool os.makedirs(dirpath) # save file: - with open(outpath, 'w') as f: + with open(outpath, "w") as f: f.write(result_text) return result_list @@ -334,4 +336,5 @@ def run_subprocess(*args, **kwargs): _LOGGER.warning(f"Exception raised during subprocess termination: {ose}") sys.exit(1) -#file_gse_content \ No newline at end of file + +# file_gse_content diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index a38f1f0..3853373 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -7,3 +7,5 @@ logmuse>=0.2.7 ubiquerg>=0.6.0 requests>=2.28.1 xmltodict>=0.13.0 +pandas>=1.4.3 +peppy @ git+https://github.com/pepkit/peppy.git@dev_pd_init#egg=peppy From 3c896a6fdffc15d43f512cae471d58db90e85a63 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 17 Aug 2022 15:43:42 -0400 Subject: [PATCH 06/61] Refactoring names of the methods in Geofetcher --- geofetch/geofetch.py | 310 +++++++++++++++++------------------------ tests/test_geofetch.py | 10 +- 2 files changed, 133 insertions(+), 187 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 7b689f8..291282a 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -37,40 +37,43 @@ class Geofetcher: + """ + Class to download or get projects, metadata, data from GEO and SRA + """ + def __init__( self, - name="", - metadata_root="", - metadata_folder="", - just_metadata=False, - refresh_metadata=False, - config_template=None, - pipeline_samples=None, - pipeline_project=None, + name: str = "", + metadata_root: str = "", + metadata_folder: str = "", + just_metadata: bool = False, + refresh_metadata: bool = False, + config_template: str = None, + pipeline_samples: str = None, + pipeline_project: str = None, skip=0, - acc_anno=False, - use_key_subset=False, - processed=True, - data_source="samples", - filter=None, - filter_size=None, - geo_folder=".", - split_experiments=False, - bam_folder="", - fq_folder="", - sra_folder="", + acc_anno: bool = False, + use_key_subset: bool = False, + processed: bool = True, + data_source: str = "samples", + filter: str = None, + filter_size: str = None, + geo_folder: str = ".", + split_experiments: bool = False, + bam_folder: str = "", + fq_folder: str = "", + sra_folder: str = "", bam_conversion=False, - picard_path="", - input=None, - const_limit_project=50, - const_limit_discard=250, - attr_limit_truncate=500, - discard_soft=False, - add_dotfile=False, + picard_path: str = "", + input: str = None, + const_limit_project: int = 50, + const_limit_discard: int = 250, + attr_limit_truncate: int = 500, + discard_soft: bool = False, + add_dotfile: bool = False, opts=None, **kwargs, ): - global _LOGGER if opts is not None: _LOGGER = logmuse.logger_via_cli(opts) @@ -176,7 +179,7 @@ def __init__( ) # Some sanity checks before proceeding - if bam_conversion and not just_metadata and not self.which("samtools"): + if bam_conversion and not just_metadata and not self._which("samtools"): raise SystemExit("For SAM/BAM processing, samtools should be on PATH.") def get_project_obj(self, input: str) -> Dict[peppy.Project]: @@ -346,38 +349,30 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): ( meta_processed_samples, meta_processed_series, - ) = self.get_list_of_processed_files(file_gse_content, file_gsm_content) + ) = self._get_list_of_processed_files(file_gse_content, file_gsm_content) # taking into account list of GSM that is specified in the input file gsm_list = acc_GSE_list[acc_GSE] - meta_processed_samples = self.filter_gsm( - meta_processed_samples, gsm_list - ) + meta_processed_samples = self._filter_gsm(meta_processed_samples, gsm_list) # Unify keys: - meta_processed_samples = self.unify_list_keys(meta_processed_samples) - meta_processed_series = self.unify_list_keys(meta_processed_series) + meta_processed_samples = self._unify_list_keys(meta_processed_samples) + meta_processed_series = self._unify_list_keys(meta_processed_series) # samples - list_of_keys = self.get_list_of_keys(meta_processed_samples) + list_of_keys = self._get_list_of_keys(meta_processed_samples) self._LOGGER.info("Expanding metadata list...") for key_in_list in list_of_keys: - meta_processed_samples = self.expand_metadata_list( - meta_processed_samples, key_in_list - ) + meta_processed_samples = self._expand_metadata_list(meta_processed_samples, key_in_list) # series - list_of_keys_series = self.get_list_of_keys(meta_processed_series) + list_of_keys_series = self._get_list_of_keys(meta_processed_series) self._LOGGER.info("Expanding metadata list...") for key_in_list in list_of_keys_series: - meta_processed_series = self.expand_metadata_list( - meta_processed_series, key_in_list - ) + meta_processed_series = self._expand_metadata_list(meta_processed_series, key_in_list) # convert column names to lowercase and underscore - meta_processed_samples = self.standardize_colnames( - meta_processed_samples - ) - meta_processed_series = self.standardize_colnames(meta_processed_series) + meta_processed_samples = self._standardize_colnames(meta_processed_samples) + meta_processed_series = self._standardize_colnames(meta_processed_series) if not self.acc_anno: # adding metadata from current experiment to the project @@ -393,9 +388,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): f"{acc_GSE}_samples", acc_GSE + SAMPLE_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - meta_processed_samples, pep_acc_path_sample - ) + self._write_processed_annotation(meta_processed_samples, pep_acc_path_sample) # series pep_acc_path_exp = os.path.join( @@ -403,27 +396,21 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): f"{acc_GSE}_series", acc_GSE + EXP_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - meta_processed_series, pep_acc_path_exp - ) + self._write_processed_annotation(meta_processed_series, pep_acc_path_exp) elif self.supp_by == "samples": pep_acc_path_sample = os.path.join( self.metadata_raw, f"{acc_GSE}_samples", acc_GSE + SAMPLE_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - meta_processed_samples, pep_acc_path_sample - ) + self._write_processed_annotation(meta_processed_samples, pep_acc_path_sample) elif self.supp_by == "series": pep_acc_path_exp = os.path.join( self.metadata_raw, f"{acc_GSE}_series", acc_GSE + EXP_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - meta_processed_series, pep_acc_path_exp - ) + self._write_processed_annotation(meta_processed_series, pep_acc_path_exp) if not self.just_metadata: data_geo_folder = os.path.join(self.geo_folder, acc_GSE) @@ -435,13 +422,13 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): for each_file in meta_processed_samples ] for file_url in processed_samples_files: - self.download_processed_file(file_url, data_geo_folder) + self._download_processed_file(file_url, data_geo_folder) processed_series_files = [ each_file["file_url"] for each_file in meta_processed_series ] for file_url in processed_series_files: - self.download_processed_file(file_url, data_geo_folder) + self._download_processed_file(file_url, data_geo_folder) elif self.supp_by == "samples": processed_samples_files = [ @@ -449,29 +436,25 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): for each_file in meta_processed_samples ] for file_url in processed_samples_files: - self.download_processed_file(file_url, data_geo_folder) + self._download_processed_file(file_url, data_geo_folder) elif self.supp_by == "series": processed_series_files = [ each_file["file_url"] for each_file in meta_processed_series ] for file_url in processed_series_files: - self.download_processed_file(file_url, data_geo_folder) + self._download_processed_file(file_url, data_geo_folder) # except Exception as processed_exception: # failed_runs.append(acc_GSE) # self._LOGGER.warning(f"Error occurred: {processed_exception}") else: # download gsm metadata - gsm_metadata = self.get_gsm_metadata( - acc_GSE, acc_GSE_list, file_gsm_content - ) + gsm_metadata = self._get_gsm_metadata(acc_GSE, acc_GSE_list, file_gsm_content) metadata_dict[acc_GSE] = gsm_metadata # download gsm metadata - SRP_list_result = self.get_SRA_meta( - file_gse_content, gsm_metadata, file_sra - ) + SRP_list_result = self._get_SRA_meta(file_gse_content, gsm_metadata, file_sra) if not SRP_list_result: # delete current acc if no raw data was found # del metadata_dict[acc_GSE] @@ -510,17 +493,13 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): ) if not sample_name or sample_name == "": temp = gsm_metadata[experiment]["Sample_title"] - sample_name = self.sanitize_name(temp) + sample_name = self._sanitize_name(temp) # Otherwise, record that there's SRA data for this run. # And set a few columns that are used as input to the Looper # print("Updating columns for looper") - self.update_columns( - gsm_metadata, - experiment, - sample_name=sample_name, - read_type=line["LibraryLayout"], - ) + self._update_columns(gsm_metadata, experiment, sample_name=sample_name, + read_type=line["LibraryLayout"]) # Some experiments are flagged in SRA as having multiple runs. if gsm_metadata[experiment].get("SRR") is not None: @@ -595,7 +574,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): else: if not self.just_metadata: try: - self.download_SRA_file(run_name) + self._download_SRA_file(run_name) except Exception as err: failed_runs.append(run_name) self._LOGGER.warning( @@ -607,7 +586,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): if self.bam_conversion and self.bam_folder != "": try: # converting sra to bam using - self.sra_bam_conversion(bam_file, run_name) + self._sra_bam_conversion(bam_file, run_name) # checking if bam_file converted correctly, if not --> use fastq-dump st = os.stat(bam_file) @@ -615,9 +594,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): self._LOGGER.warning( "Bam conversion failed with sam-dump. Trying fastq-dump..." ) - self.sra_bam_conversion2( - bam_file, run_name, self.picard_path - ) + self._sra_bam_conversion2(bam_file, run_name, self.picard_path) except FileNotFoundError as err: self._LOGGER.info( @@ -651,18 +628,14 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): "PEP_samples", self.project_name + SAMPLE_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - processed_metadata_samples, supp_sample_path_meta - ) + self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) supp_series_path_meta = os.path.join( self.metadata_raw, "PEP_series", self.project_name + EXP_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - processed_metadata_exp, supp_series_path_meta - ) + self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) elif self.supp_by == "samples": if just_object: @@ -673,9 +646,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): "PEP_samples", self.project_name + SAMPLE_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - processed_metadata_samples, supp_sample_path_meta - ) + self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) elif self.supp_by == "series": if just_object: @@ -686,14 +657,12 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): "PEP_series", self.project_name + EXP_SUPP_METADATA_FILE, ) - self.write_processed_annotation( - processed_metadata_exp, supp_series_path_meta - ) + self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) # saving PEPs for raw data else: if not just_object: - self.write_raw_annotation(metadata_dict, subannotation_dict) + self._write_raw_annotation(metadata_dict, subannotation_dict) else: raw_meta_list = [] for meta_key in metadata_dict.keys(): @@ -704,7 +673,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): # TODO: add subannotation_dict!!!! return raw_meta_list - def expand_metadata_list(self, metadata_list, dict_key): + def _expand_metadata_list(self, metadata_list, dict_key): """ Expanding list items in the list by creating new items or joining them @@ -776,7 +745,7 @@ def expand_metadata_list(self, metadata_list, dict_key): self._LOGGER.warning("Value Error: %s" % err1) return metadata_list - def filter_gsm(self, meta_processed_samples: list, gsm_list: dict) -> list: + def _filter_gsm(self, meta_processed_samples: list, gsm_list: dict) -> list: """ Getting metadata list of all samples of one experiment and filtering it by the list of GSM that was specified in the input files. @@ -801,7 +770,7 @@ def filter_gsm(self, meta_processed_samples: list, gsm_list: dict) -> list: return meta_processed_samples @staticmethod - def get_list_of_keys(list_of_dict): + def _get_list_of_keys(list_of_dict): """ Getting list of all keys that are in the dictionaries in the list @@ -815,7 +784,7 @@ def get_list_of_keys(list_of_dict): list_of_keys.extend(list(element.keys())) return list(set(list_of_keys)) - def unify_list_keys(self, processed_meta_list): + def _unify_list_keys(self, processed_meta_list): """ Unifying list of dicts with metadata, so every dict will have same keys @@ -824,18 +793,18 @@ def unify_list_keys(self, processed_meta_list): :return str: list of unified dicts with metadata """ - list_of_keys = self.get_list_of_keys(processed_meta_list) + list_of_keys = self._get_list_of_keys(processed_meta_list) for k in list_of_keys: for list_elem in range(len(processed_meta_list)): if k not in processed_meta_list[list_elem]: processed_meta_list[list_elem][k] = "" return processed_meta_list - def find_genome(self, metadata_list): + def _find_genome(self, metadata_list): """ Create new genome table by joining few columns """ - list_keys = self.get_list_of_keys(metadata_list) + list_keys = self._get_list_of_keys(metadata_list) genome_keys = [ "assembly", "genome_build", @@ -849,7 +818,7 @@ def find_genome(self, metadata_list): metadata_list[sample[0]]["sample_genome"] = sample_genome return metadata_list - def write_gsm_annotation(self, gsm_metadata, file_annotation, use_key_subset=False): + def _write_gsm_annotation(self, gsm_metadata, file_annotation, use_key_subset=False): """ Write metadata sheet out as an annotation file. @@ -877,7 +846,7 @@ def write_gsm_annotation(self, gsm_metadata, file_annotation, use_key_subset=Fal self._LOGGER.info("\033[92mFile has been saved successfully\033[0m") return fp - def write_processed_annotation(self, processed_metadata, file_annotation_path): + def _write_processed_annotation(self, processed_metadata, file_annotation_path): """ Saving annotation file by providing list of dictionaries with files metadata :param list processed_metadata: list of dictionaries with files metadata @@ -896,18 +865,14 @@ def write_processed_annotation(self, processed_metadata, file_annotation_path): os.makedirs(pep_file_folder) self._LOGGER.info("Unifying and saving of metadata... ") - processed_metadata = self.unify_list_keys(processed_metadata) + processed_metadata = self._unify_list_keys(processed_metadata) # delete rare keys - processed_metadata = self.find_genome(processed_metadata) + processed_metadata = self._find_genome(processed_metadata) # filtering huge annotation strings that are repeating for each sample - processed_metadata, proj_meta = self.separate_common_meta( - processed_metadata, - self.const_limit_project, - self.const_limit_discard, - self.attr_limit_truncate, - ) + processed_metadata, proj_meta = self._separate_common_meta(processed_metadata, self.const_limit_project, + self.const_limit_discard, self.attr_limit_truncate) meta_list_str = [ f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta ] @@ -948,12 +913,12 @@ def write_processed_annotation(self, processed_metadata, file_annotation_path): # save .pep.yaml file if self.add_dotfile: dot_yaml_path = os.path.join(pep_file_folder, ".pep.yaml") - self.create_dot_yaml(dot_yaml_path, yaml_name) + self._create_dot_yaml(dot_yaml_path, yaml_name) return True @staticmethod - def sanitize_name(name_str: str): + def _sanitize_name(name_str: str): """ Function that sanitizing strings. (Replace all odd characters) :param str name_str: Any string value that has to be sanitized. @@ -965,7 +930,7 @@ def sanitize_name(name_str: str): new_str = new_str.replace(" ", "_").replace("__", "_") return new_str - def write_raw_annotation(self, metadata_dict, subannotation_dict): + def _write_raw_annotation(self, metadata_dict, subannotation_dict): """ Combining individual accessions into project-level annotations, and writeing individual accession files (if requested) @@ -997,9 +962,7 @@ def write_raw_annotation(self, metadata_dict, subannotation_dict): fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] # sanitize sample names - fixed_dict[key_sample]["sample_name"] = self.sanitize_name( - fixed_dict[key_sample]["sample_name"] - ) + fixed_dict[key_sample]["sample_name"] = self._sanitize_name(fixed_dict[key_sample]["sample_name"]) metadata_dict[key] = fixed_dict @@ -1009,20 +972,13 @@ def write_raw_annotation(self, metadata_dict, subannotation_dict): self.metadata_expanded, acc_GSE + "_annotation.csv" ) if self.acc_anno: - self.write_gsm_annotation( - gsm_metadata, - file_annotation, - use_key_subset=self.use_key_subset, - ) + self._write_gsm_annotation(gsm_metadata, file_annotation, use_key_subset=self.use_key_subset) metadata_dict_combined.update(gsm_metadata) # filtering huge annotation strings that are repeating for each sample - metadata_dict_combined, proj_meta = self.separate_common_meta( - metadata_dict_combined, - self.const_limit_project, - self.const_limit_discard, - self.attr_limit_truncate, - ) + metadata_dict_combined, proj_meta = self._separate_common_meta(metadata_dict_combined, self.const_limit_project, + self.const_limit_discard, + self.attr_limit_truncate) meta_list_str = [ f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta ] @@ -1034,7 +990,7 @@ def write_raw_annotation(self, metadata_dict, subannotation_dict): self.metadata_expanded, acc_GSE + "_subannotation.csv" ) if self.acc_anno: - self.write_subannotation(gsm_multi_table, file_subannotation) + self._write_subannotation(gsm_multi_table, file_subannotation) subannotation_dict_combined.update(gsm_multi_table) self._LOGGER.info( "Creating complete project annotation sheets and config file..." @@ -1045,17 +1001,13 @@ def write_raw_annotation(self, metadata_dict, subannotation_dict): file_annotation = os.path.join( self.metadata_raw, self.project_name + "_annotation.csv" ) - self.write_gsm_annotation( - metadata_dict_combined, - file_annotation, - use_key_subset=self.use_key_subset, - ) + self._write_gsm_annotation(metadata_dict_combined, file_annotation, use_key_subset=self.use_key_subset) # Write combined subannotation table if len(subannotation_dict_combined) > 0: file_subannotation = os.path.join( self.metadata_raw, self.project_name + "_subannotation.csv" ) - self.write_subannotation(subannotation_dict_combined, file_subannotation) + self._write_subannotation(subannotation_dict_combined, file_subannotation) else: file_subannotation = "null" # Write project config file @@ -1084,10 +1036,10 @@ def write_raw_annotation(self, metadata_dict, subannotation_dict): # save .pep.yaml file if self.add_dotfile: dot_yaml_path = os.path.join(self.metadata_raw, ".pep.yaml") - self.create_dot_yaml(dot_yaml_path, yaml_name) + self._create_dot_yaml(dot_yaml_path, yaml_name) @staticmethod - def create_dot_yaml(file_path: str, yaml_path: str): + def _create_dot_yaml(file_path: str, yaml_path: str): """ Function that creates .pep.yaml file that points to actual yaml file :param str file_path: Path to the .pep.yaml file that we want to create @@ -1096,7 +1048,7 @@ def create_dot_yaml(file_path: str, yaml_path: str): with open(file_path, "w+") as file: file.writelines(f"config_file: {yaml_path}") - def separate_common_meta( + def _separate_common_meta( self, meta_list, max_len=50, del_limit=250, attr_limit_truncate=500 ): """ @@ -1120,7 +1072,7 @@ def separate_common_meta( meta_list = new_meta_list - list_of_keys = self.get_list_of_keys(meta_list) + list_of_keys = self._get_list_of_keys(meta_list) list_keys_diff = [] # finding columns with common values for this_key in list_of_keys: @@ -1179,20 +1131,20 @@ def separate_common_meta( return meta_list, new_meta_project - def standardize_colnames(self, meta_list): + def _standardize_colnames(self, meta_list): """ Standardize column names by lower-casing and underscore :param list meta_list: list of dictionaries of samples :return : list of dictionaries of samples with standard colnames """ new_metalist = [] - list_keys = self.get_list_of_keys(meta_list) + list_keys = self._get_list_of_keys(meta_list) for item_nb, values in enumerate(meta_list): new_metalist.append({}) for key in list_keys: try: new_key_name = key.lower().strip() - new_key_name = self.sanitize_name(new_key_name) + new_key_name = self._sanitize_name(new_key_name) new_metalist[item_nb][new_key_name] = values[key] @@ -1201,7 +1153,7 @@ def standardize_colnames(self, meta_list): return new_metalist - def download_SRA_file(self, run_name): + def _download_SRA_file(self, run_name): """ Downloading SRA file by ising 'prefetch' utility from the SRA Toolkit more info: (http://www.ncbi.nlm.nih.gov/books/NBK242621/) @@ -1230,7 +1182,7 @@ def download_SRA_file(self, run_name): time.sleep(t * 2) @staticmethod - def which(program): + def _which(program): """ return str: the path to a program to make sure it exists """ @@ -1250,7 +1202,7 @@ def is_exe(fp): if is_exe(exe_file): return exe_file - def sra_bam_conversion(self, bam_file, run_name): + def _sra_bam_conversion(self, bam_file, run_name): """ Converting of SRA file to BAM file by using samtools function "sam-dump" :param str bam_file: path to BAM file that has to be created @@ -1275,7 +1227,7 @@ def sra_bam_conversion(self, bam_file, run_name): run_subprocess(cmd, shell=True) @staticmethod - def update_columns(metadata, experiment_name, sample_name, read_type): + def _update_columns(metadata, experiment_name, sample_name, read_type): """ Update the metadata associated with a particular experiment. @@ -1319,7 +1271,7 @@ def update_columns(metadata, experiment_name, sample_name, read_type): return exp - def sra_bam_conversion2(self, bam_file, run_name, picard_path=None): + def _sra_bam_conversion2(self, bam_file, run_name, picard_path=None): """ Converting of SRA file to BAM file by using fastq-dump (is used when sam-dump fails, yielding an empty bam file. Here fastq -> bam conversion is used) @@ -1358,7 +1310,7 @@ def sra_bam_conversion2(self, bam_file, run_name, picard_path=None): self._LOGGER.info(f"Conversion command: {cmd}") run_subprocess(cmd, shell=True) - def write_subannotation(self, tabular_data, filepath, column_names=None): + def _write_subannotation(self, tabular_data, filepath, column_names=None): """ Writes one or more tables to a given CSV filepath. @@ -1385,7 +1337,7 @@ def write_subannotation(self, tabular_data, filepath, column_names=None): writer.writerows(values) return fp - def download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): + def _download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): """ Given an url for a file, downloading to specified folder :param str file_url: the URL of the file to download @@ -1413,7 +1365,7 @@ def download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): else: self._LOGGER.info(f"\033[38;5;242mFile {full_filepath} exists.\033[0m") - def get_list_of_processed_files( + def _get_list_of_processed_files( self, file_gse_content: list, file_gsm_content: list ): """ @@ -1429,7 +1381,7 @@ def get_list_of_processed_files( for line in file_gse_content: if re.compile(r"!Series_geo_accession").search(line): - gse_numb = self.get_value(line) + gse_numb = self._get_value(line) meta_processed_series["GSE"] = gse_numb found = re.findall(SER_SUPP_FILE_PATTERN, line) @@ -1468,7 +1420,7 @@ def get_list_of_processed_files( nb = len(meta_processed_samples) - 1 for line_gsm in file_gsm_content: if line_gsm[0] == "^": - nb = len(self.check_file_existance(meta_processed_samples)) + nb = len(self._check_file_existance(meta_processed_samples)) meta_processed_samples.append( {"files": [], "GSE": gse_numb} ) @@ -1512,13 +1464,9 @@ def get_list_of_processed_files( if file_url_gsm != "NONE": meta_processed_samples[nb]["files"].append(file_url_gsm) - self.check_file_existance(meta_processed_samples) - meta_processed_samples = self.separate_list_of_files( - meta_processed_samples - ) - meta_processed_samples = self.separate_file_url( - meta_processed_samples - ) + self._check_file_existance(meta_processed_samples) + meta_processed_samples = self._separate_list_of_files(meta_processed_samples) + meta_processed_samples = self._separate_file_url(meta_processed_samples) self._LOGGER.info( f"Total number of processed SAMPLES files found is: " @@ -1526,7 +1474,7 @@ def get_list_of_processed_files( ) # expand meta_processed_samples with information about type and size - file_info_add = self.read_tar_filelist(filelist_raw_text) + file_info_add = self._read_tar_filelist(filelist_raw_text) for index_nr in range(len(meta_processed_samples)): file_name = meta_processed_samples[index_nr]["file"] meta_processed_samples[index_nr].update( @@ -1534,11 +1482,9 @@ def get_list_of_processed_files( ) if self.filter_re: - meta_processed_samples = self.run_filter(meta_processed_samples) + meta_processed_samples = self._run_filter(meta_processed_samples) if self.filter_size: - meta_processed_samples = self.run_size_filter( - meta_processed_samples - ) + meta_processed_samples = self._run_size_filter(meta_processed_samples) # other files than .tar: saving them into meta_processed_series list else: @@ -1563,19 +1509,19 @@ def get_list_of_processed_files( f"IndexError in adding value to meta_processed_series: %s" % ind_err ) - meta_processed_series = self.separate_list_of_files(meta_processed_series) - meta_processed_series = self.separate_file_url(meta_processed_series) + meta_processed_series = self._separate_list_of_files(meta_processed_series) + meta_processed_series = self._separate_file_url(meta_processed_series) self._LOGGER.info( f"Total number of processed SERIES files found is: " f"%s" % str(len(meta_processed_series)) ) if self.filter_re: - meta_processed_series = self.run_filter(meta_processed_series) + meta_processed_series = self._run_filter(meta_processed_series) return meta_processed_samples, meta_processed_series @staticmethod - def check_file_existance(meta_processed_sample): + def _check_file_existance(meta_processed_sample): """ Checking if last element of the list has files. If list of files is empty deleting it """ @@ -1587,7 +1533,7 @@ def check_file_existance(meta_processed_sample): return meta_processed_sample @staticmethod - def separate_list_of_files(meta_list, col_name="files"): + def _separate_list_of_files(meta_list, col_name="files"): """ This method is separating list of files (dict value) or just simple dict into two different dicts @@ -1611,7 +1557,7 @@ def separate_list_of_files(meta_list, col_name="files"): return separated_list - def separate_file_url(self, meta_list): + def _separate_file_url(self, meta_list): """ This method is adding dict key without file_name without path """ @@ -1629,12 +1575,12 @@ def separate_file_url(self, meta_list): new_dict["sample_name"] = os.path.basename(meta_elem["file"]) # sanitize sample names - new_dict["sample_name"] = self.sanitize_name(new_dict["sample_name"]) + new_dict["sample_name"] = self._sanitize_name(new_dict["sample_name"]) separated_list.append(new_dict) return separated_list - def run_filter(self, meta_list, col_name="file"): + def _run_filter(self, meta_list, col_name="file"): """ If user specified filter it will filter all this files here by col_name """ @@ -1649,7 +1595,7 @@ def run_filter(self, meta_list, col_name="file"): return filtered_list - def run_size_filter(self, meta_list, col_name="file_size"): + def _run_size_filter(self, meta_list, col_name="file_size"): """ function for filtering file size """ @@ -1670,10 +1616,10 @@ def run_size_filter(self, meta_list, col_name="file_size"): return filtered_list @staticmethod - def read_tar_filelist(raw_text: str): + def _read_tar_filelist(raw_text: str): """ Creating list for supplementary files that are listed in "filelist.txt" - :param str file_path: path to the file with information about files that are zipped ("filelist.txt") + :param str raw_text: path to the file with information about files that are zipped ("filelist.txt") :return dict: dict of supplementary file names and additional information """ f = StringIO(raw_text) @@ -1696,11 +1642,11 @@ def read_tar_filelist(raw_text: str): return files_info @staticmethod - def get_value(all_line): + def _get_value(all_line): line_value = all_line.split("= ")[-1] return line_value.split(": ")[-1].rstrip("\n") - def download_processed_file(self, file_url, data_folder): + def _download_processed_file(self, file_url, data_folder): """ Given a url for a file, download it, and extract anything passing the filter. :param str file_url: the URL of the file to download @@ -1725,7 +1671,7 @@ def download_processed_file(self, file_url, data_folder): while ntry < 10: try: - self.download_file(file_url, data_folder) + self._download_file(file_url, data_folder) self._LOGGER.info( "\033[92mFile %s has been downloaded successfully\033[0m" % f"{data_folder}/{filename}" @@ -1743,7 +1689,7 @@ def download_processed_file(self, file_url, data_folder): if ntry > 4: raise e - def get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): + def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): """ Parse out the SRA project identifier from the GSE file :param list file_gse_content: list of content of file_sde_content @@ -1789,7 +1735,7 @@ def get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): if not os.path.isfile(file_sra) or self.refresh_metadata: try: # downloading metadata - srp_list = self.get_SRP_list(acc_SRP) + srp_list = self._get_SRP_list(acc_SRP) if file_sra is not None: with open(file_sra, "w") as m_file: dict_writer = csv.DictWriter(m_file, srp_list[0].keys()) @@ -1819,7 +1765,7 @@ def get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): return srp_list else: try: - srp_list = self.get_SRP_list(acc_SRP) + srp_list = self._get_SRP_list(acc_SRP) return srp_list except Exception as err: @@ -1829,7 +1775,7 @@ def get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): ) return False - def get_SRP_list(self, srp_number: str) -> list: + def _get_SRP_list(self, srp_number: str) -> list: """ By using requests and xml searching and getting list of dicts of SRRs :param str srp_number: SRP number @@ -1859,7 +1805,7 @@ def get_SRP_list(self, srp_number: str) -> list: return SRP_list - def get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): + def _get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): """ A simple state machine to parse SOFT formatted files (Here, the GSM file) diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index f16b56f..6c227d9 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -83,7 +83,7 @@ def test_file_list( ( meta_processed_samples, meta_processed_series, - ) = initiate_geofetcher.get_list_of_processed_files(soft_gse, soft_gsm) + ) = initiate_geofetcher._get_list_of_processed_files(soft_gse, soft_gsm) assert len(meta_processed_samples) == sample_len assert len(meta_processed_series) == series_len @@ -137,7 +137,7 @@ def initiate_geofetcher(self, tmpdir): ], ) def test_downloading_files(self, file_url, file_name, tmpdir, initiate_geofetcher): - initiate_geofetcher.download_processed_file(file_url, tmpdir) + initiate_geofetcher._download_processed_file(file_url, tmpdir) assert len(tmpdir.listdir()) == 1 assert os.path.basename(tmpdir.listdir()[0]) == file_name @@ -178,7 +178,7 @@ def initiate_geofetcher(self, tmpdir): ], ) def test_filter(self, meta_list, output, initiate_geofetcher): - result = initiate_geofetcher.run_filter(meta_list) + result = initiate_geofetcher._run_filter(meta_list) assert result == output @pytest.mark.parametrize( @@ -199,7 +199,7 @@ def test_filter(self, meta_list, output, initiate_geofetcher): ], ) def test_size_filter(self, meta_list, output, initiate_geofetcher): - result = initiate_geofetcher.run_size_filter(meta_list) + result = initiate_geofetcher._run_size_filter(meta_list) assert result == output @pytest.mark.parametrize( @@ -248,7 +248,7 @@ def test_size_filter(self, meta_list, output, initiate_geofetcher): def test_large_meta_separation( self, init_meta_data, result_sample, result_proj, initiate_geofetcher ): - samp, proj = initiate_geofetcher.separate_common_meta(init_meta_data, max_len=0) + samp, proj = initiate_geofetcher._separate_common_meta(init_meta_data, max_len=0) assert samp == result_sample assert proj == result_proj From 6357dc5adc2ed47b54b66018b8cbe27da8348d83 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 19 Aug 2022 11:31:57 -0400 Subject: [PATCH 07/61] fixed #80 --- geofetch/config_template.yaml | 2 +- geofetch/geofetch.py | 15 +++++++-------- requirements/requirements-all.txt | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/geofetch/config_template.yaml b/geofetch/config_template.yaml index b492ae6..a47348a 100644 --- a/geofetch/config_template.yaml +++ b/geofetch/config_template.yaml @@ -3,7 +3,7 @@ name: {project_name} pep_version: 2.1.0 sample_table: {annotation} -subsample_table: {subannotation} +{subannotation} looper: output_dir: {project_name} diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 291282a..60544cc 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -51,7 +51,7 @@ def __init__( config_template: str = None, pipeline_samples: str = None, pipeline_project: str = None, - skip=0, + skip: int = 0, acc_anno: bool = False, use_key_subset: bool = False, processed: bool = True, @@ -74,7 +74,7 @@ def __init__( opts=None, **kwargs, ): - global _LOGGER + if opts is not None: _LOGGER = logmuse.logger_via_cli(opts) else: @@ -182,9 +182,9 @@ def __init__( if bam_conversion and not just_metadata and not self._which("samtools"): raise SystemExit("For SAM/BAM processing, samtools should be on PATH.") - def get_project_obj(self, input: str) -> Dict[peppy.Project]: + def get_project(self, input: str) -> Dict[peppy.Project]: """ - Function for fetching projects from GEO|SRA and obtaining peppy project + Function for fetching projects from GEO|SRA and receiving peppy project :param input: GSE number, or path to file of GSE numbers :return: peppy project or list of project, if acc_anno is set. """ @@ -255,8 +255,8 @@ def get_project_obj(self, input: str) -> Dict[peppy.Project]: new_dict = {} for proj_key in raw_project_dict.keys(): - new_dict[proj_key] = peppy.Project( - pd_object=pd.DataFrame(raw_project_dict[proj_key]) + new_dict[proj_key] = peppy.Project().from_pandas( + pd.DataFrame(raw_project_dict[proj_key]) ) return new_dict @@ -1020,7 +1020,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): template_values = { "project_name": self.project_name, "annotation": os.path.basename(file_annotation), - "subannotation": os.path.basename(file_subannotation), + "subannotation": f"subsample_table: {os.path.basename(file_subannotation)}", "pipeline_samples": self.file_pipeline_samples, "pipeline_project": self.file_pipeline_project, "additional_columns": modifiers_str, @@ -1790,7 +1790,6 @@ def _get_SRP_list(self, srp_number: str) -> list: if x.status_code != 200: self._LOGGER.error(f"Error in ncbi esearch response: {x.status_code}") raise x.raise_for_status() - id_results = x.json()["esearchresult"]["idlist"] id_r_string = ",".join(id_results) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 3853373..11f99b6 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -8,4 +8,4 @@ ubiquerg>=0.6.0 requests>=2.28.1 xmltodict>=0.13.0 pandas>=1.4.3 -peppy @ git+https://github.com/pepkit/peppy.git@dev_pd_init#egg=peppy +peppy>=0.34.0 From 5d982cd4f78c5336cd6986eb70a147fc6e1c9af4 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 19 Aug 2022 12:09:16 -0400 Subject: [PATCH 08/61] fixed #82 --- geofetch/geofetch.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 60544cc..45803d5 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1736,7 +1736,8 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): try: # downloading metadata srp_list = self._get_SRP_list(acc_SRP) - if file_sra is not None: + srp_list = self._unify_list_keys(srp_list) + if file_sra is not None and not self.discard_soft: with open(file_sra, "w") as m_file: dict_writer = csv.DictWriter(m_file, srp_list[0].keys()) dict_writer.writeheader() @@ -1791,16 +1792,22 @@ def _get_SRP_list(self, srp_number: str) -> list: self._LOGGER.error(f"Error in ncbi esearch response: {x.status_code}") raise x.raise_for_status() id_results = x.json()["esearchresult"]["idlist"] - - id_r_string = ",".join(id_results) - id_api = NCBI_EFETCH.format(ID=id_r_string) - y = requests.get(id_api) - if y.status_code != 200: - self._LOGGER.error(f"Error in ncbi efetch response: {x.status_code}") - raise y.raise_for_status() - - xml_result = y.text - SRP_list = xmltodict.parse(xml_result)["SraRunInfo"]["Row"] + if len(id_results) > 500: + id_results = [id_results[x:x + 100] for x in range(0, len(id_results), 100)] + else: + id_results = [id_results] + + SRP_list = [] + for result in id_results: + id_r_string = ",".join(result) + id_api = NCBI_EFETCH.format(ID=id_r_string) + + y = requests.get(id_api) + if y.status_code != 200: + self._LOGGER.error(f"Error in ncbi efetch response in SRA fetching: {x.status_code}") + raise y.raise_for_status() + xml_result = y.text + SRP_list.extend(xmltodict.parse(xml_result)["SraRunInfo"]["Row"]) return SRP_list From 212f6eb1cfbbad5e989c1a235b89c90fcd71e09d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 19 Aug 2022 12:16:52 -0400 Subject: [PATCH 09/61] Fixed #80 (2) --- geofetch/geofetch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 45803d5..892c927 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1008,8 +1008,10 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): self.metadata_raw, self.project_name + "_subannotation.csv" ) self._write_subannotation(subannotation_dict_combined, file_subannotation) + subanot_path_yaml = f"subsample_table: {os.path.basename(file_subannotation)}" else: file_subannotation = "null" + subanot_path_yaml = f"" # Write project config file if not self.config_template: geofetchdir = os.path.dirname(__file__) @@ -1020,7 +1022,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): template_values = { "project_name": self.project_name, "annotation": os.path.basename(file_annotation), - "subannotation": f"subsample_table: {os.path.basename(file_subannotation)}", + "subannotation": subanot_path_yaml, "pipeline_samples": self.file_pipeline_samples, "pipeline_project": self.file_pipeline_project, "additional_columns": modifiers_str, From c880492e344b0904e978f0eeef1dda84b60d2ed0 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 19 Aug 2022 14:36:58 -0400 Subject: [PATCH 10/61] fixed #73 --- geofetch/geofetch.py | 51 +++++++++++++++++++++---------- requirements/requirements-all.txt | 1 + 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 892c927..02da536 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -13,6 +13,8 @@ from string import punctuation import requests import xmltodict +from rich.progress import track +#from tqdm import tqdm # import tarfile import time @@ -71,6 +73,7 @@ def __init__( attr_limit_truncate: int = 500, discard_soft: bool = False, add_dotfile: bool = False, + disable_progressbar: bool = False, opts=None, **kwargs, ): @@ -167,6 +170,7 @@ def __init__( self.discard_soft = discard_soft self.add_dotfile = add_dotfile + self.disable_progressbar = disable_progressbar self._LOGGER.info(f"Metadata folder: {self.metadata_expanded}") @@ -182,6 +186,8 @@ def __init__( if bam_conversion and not just_metadata and not self._which("samtools"): raise SystemExit("For SAM/BAM processing, samtools should be on PATH.") + self.just_object = False + def get_project(self, input: str) -> Dict[peppy.Project]: """ Function for fetching projects from GEO|SRA and receiving peppy project @@ -189,6 +195,8 @@ def get_project(self, input: str) -> Dict[peppy.Project]: :return: peppy project or list of project, if acc_anno is set. """ self.just_metadata = True + self.just_object = True + self.disable_progressbar = True acc_GSE_list = parse_accessions( input, self.metadata_expanded, self.just_metadata ) @@ -207,35 +215,35 @@ def get_project(self, input: str) -> Dict[peppy.Project]: if data_source_all: # samples self.supp_by = "samples" - samples_list = self.fetch_all(input=acc_GSE, just_object=True) + samples_list = self.fetch_all(input=acc_GSE) if len(samples_list) > 0: raw_project_dict[acc_GSE + "_samples"] = samples_list # series self.supp_by = "series" - series_list = self.fetch_all(input=acc_GSE, just_object=True) + series_list = self.fetch_all(input=acc_GSE) if len(series_list) > 0: raw_project_dict[acc_GSE + "_series"] = series_list else: - ser_list = self.fetch_all(input=acc_GSE, just_object=True) + ser_list = self.fetch_all(input=acc_GSE) if len(ser_list) > 0: raw_project_dict[acc_GSE + "_" + self.supp_by] = ser_list else: if data_source_all: # samples self.supp_by = "samples" - samples_list = self.fetch_all(input=input, just_object=True) + samples_list = self.fetch_all(input=input) if len(samples_list) > 0: raw_project_dict["project_samples"] = samples_list # series self.supp_by = "series" - series_list = self.fetch_all(input=input, just_object=True) + series_list = self.fetch_all(input=input) if len(series_list) > 0: raw_project_dict["project_series"] = series_list else: - ser_list = self.fetch_all(input=input, just_object=True) + ser_list = self.fetch_all(input=input) if len(ser_list) > 0: raw_project_dict["project_" + self.supp_by] = ser_list @@ -244,12 +252,12 @@ def get_project(self, input: str) -> Dict[peppy.Project]: if self.acc_anno: self.acc_anno = False for acc_GSE in acc_GSE_list.keys(): - project_dict = self.fetch_all(input=input, just_object=True) + project_dict = self.fetch_all(input=input) if len(project_dict) > 0: raw_project_dict[acc_GSE + "_raw_samples"] = project_dict else: - ser_dict = self.fetch_all(input=input, just_object=True) + ser_dict = self.fetch_all(input=input) if len(ser_dict) > 0: raw_project_dict["raw_samples"] = ser_dict @@ -261,7 +269,7 @@ def get_project(self, input: str) -> Dict[peppy.Project]: return new_dict - def fetch_all(self, input: str, name: str = None, just_object: bool = False): + def fetch_all(self, input: str, name: str = None): """Main script driver/workflow""" if name: @@ -288,7 +296,10 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): acc_GSE_keys = acc_GSE_list.keys() nkeys = len(acc_GSE_keys) ncount = 0 - for acc_GSE in acc_GSE_list.keys(): + for acc_GSE in track(acc_GSE_list.keys(), + description="Processing... ", + disable=self.disable_progressbar): + ncount += 1 if ncount <= self.skip: continue @@ -638,7 +649,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) elif self.supp_by == "samples": - if just_object: + if self.just_object: return processed_metadata_samples else: supp_sample_path_meta = os.path.join( @@ -649,7 +660,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) elif self.supp_by == "series": - if just_object: + if self.just_object: return processed_metadata_exp else: supp_series_path_meta = os.path.join( @@ -661,7 +672,7 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): # saving PEPs for raw data else: - if not just_object: + if not self.just_object: self._write_raw_annotation(metadata_dict, subannotation_dict) else: raw_meta_list = [] @@ -670,7 +681,6 @@ def fetch_all(self, input: str, name: str = None, just_object: bool = False): metadata_dict[meta_key][srx_key]["gse_number"] = meta_key metadata_dict[meta_key][srx_key]["srx_number"] = srx_key raw_meta_list.append(metadata_dict[meta_key][srx_key]) - # TODO: add subannotation_dict!!!! return raw_meta_list def _expand_metadata_list(self, metadata_list, dict_key): @@ -1471,7 +1481,7 @@ def _get_list_of_processed_files( meta_processed_samples = self._separate_file_url(meta_processed_samples) self._LOGGER.info( - f"Total number of processed SAMPLES files found is: " + f"\nTotal number of processed SAMPLES files found is: " f"%s" % str(len(meta_processed_samples)) ) @@ -1712,7 +1722,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): # as part of this GEO submission. Can't proceed. self._LOGGER.warning( "\033[91mUnable to get SRA accession (SRP#) from GEO GSE SOFT file. " - "No raw data?\033[0m" + "No raw data detected! Continuing anyway...\033[0m" ) # but wait; another possibility: there's no SRP linked to the GSE, but there # could still be an SRX linked to the (each) GSM. @@ -1784,6 +1794,9 @@ def _get_SRP_list(self, srp_number: str) -> list: :param str srp_number: SRP number :return: list of dicts of SRRs """ + if not srp_number: + self._LOGGER.info(f"No srp number in this accession found") + return [] self._LOGGER.info(f"Downloading {srp_number} sra metadata") ncbi_esearch = NCBI_ESEARCH.format(SRP_NUMBER=srp_number) @@ -1972,6 +1985,12 @@ def _parse_cmdl(cmdl): "These will be added to the project config file to make it immediately " "compatible with looper. [Default: null]", ) + # Optional + parser.add_argument( + "--disable-progressbar", + action="store_true", + help="Optional: Disable progressbar", + ) # Optional parser.add_argument( diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 11f99b6..06cc825 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -9,3 +9,4 @@ requests>=2.28.1 xmltodict>=0.13.0 pandas>=1.4.3 peppy>=0.34.0 +rich>=12.5.1 From 66717331f5c8948b2ead512a3740b95da98d7dd5 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Aug 2022 19:44:52 -0400 Subject: [PATCH 11/61] New, optimized version to peppy project --- geofetch/geofetch.py | 182 +++++++++++++++++++++++-------------------- 1 file changed, 99 insertions(+), 83 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 02da536..965fadf 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -14,7 +14,7 @@ import requests import xmltodict from rich.progress import track -#from tqdm import tqdm +import yaml # import tarfile import time @@ -201,7 +201,7 @@ def get_project(self, input: str) -> Dict[peppy.Project]: input, self.metadata_expanded, self.just_metadata ) - raw_project_dict = {} + project_dict = {} if self.processed: if self.supp_by == "all": @@ -216,36 +216,30 @@ def get_project(self, input: str) -> Dict[peppy.Project]: # samples self.supp_by = "samples" samples_list = self.fetch_all(input=acc_GSE) - if len(samples_list) > 0: - raw_project_dict[acc_GSE + "_samples"] = samples_list + project_dict[acc_GSE + "_samples"] = samples_list # series self.supp_by = "series" series_list = self.fetch_all(input=acc_GSE) - if len(series_list) > 0: - raw_project_dict[acc_GSE + "_series"] = series_list + project_dict[acc_GSE + "_series"] = series_list else: ser_list = self.fetch_all(input=acc_GSE) - if len(ser_list) > 0: - raw_project_dict[acc_GSE + "_" + self.supp_by] = ser_list + project_dict[acc_GSE + "_" + self.supp_by] = ser_list else: if data_source_all: # samples self.supp_by = "samples" samples_list = self.fetch_all(input=input) - if len(samples_list) > 0: - raw_project_dict["project_samples"] = samples_list + project_dict["project_samples"] = samples_list # series self.supp_by = "series" series_list = self.fetch_all(input=input) - if len(series_list) > 0: - raw_project_dict["project_series"] = series_list + project_dict["project_series"] = series_list else: ser_list = self.fetch_all(input=input) - if len(ser_list) > 0: - raw_project_dict["project_" + self.supp_by] = ser_list + project_dict["project_" + self.supp_by] = ser_list else: # Not sure about below code... @@ -253,21 +247,13 @@ def get_project(self, input: str) -> Dict[peppy.Project]: self.acc_anno = False for acc_GSE in acc_GSE_list.keys(): project_dict = self.fetch_all(input=input) - if len(project_dict) > 0: - raw_project_dict[acc_GSE + "_raw_samples"] = project_dict + project_dict[acc_GSE + "_raw_samples"] = project_dict else: ser_dict = self.fetch_all(input=input) - if len(ser_dict) > 0: - raw_project_dict["raw_samples"] = ser_dict + project_dict["raw_samples"] = ser_dict - new_dict = {} - for proj_key in raw_project_dict.keys(): - new_dict[proj_key] = peppy.Project().from_pandas( - pd.DataFrame(raw_project_dict[proj_key]) - ) - - return new_dict + return project_dict def fetch_all(self, input: str, name: str = None): """Main script driver/workflow""" @@ -639,49 +625,43 @@ def fetch_all(self, input: str, name: str = None): "PEP_samples", self.project_name + SAMPLE_SUPP_METADATA_FILE, ) - self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) + peppy_obj = self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) supp_series_path_meta = os.path.join( self.metadata_raw, "PEP_series", self.project_name + EXP_SUPP_METADATA_FILE, ) - self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) + peppy_obj = self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) elif self.supp_by == "samples": - if self.just_object: - return processed_metadata_samples - else: - supp_sample_path_meta = os.path.join( - self.metadata_raw, - "PEP_samples", - self.project_name + SAMPLE_SUPP_METADATA_FILE, - ) - self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) + supp_sample_path_meta = os.path.join( + self.metadata_raw, + "PEP_samples", + self.project_name + SAMPLE_SUPP_METADATA_FILE, + ) + peppy_obj = self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) elif self.supp_by == "series": - if self.just_object: - return processed_metadata_exp - else: - supp_series_path_meta = os.path.join( - self.metadata_raw, - "PEP_series", - self.project_name + EXP_SUPP_METADATA_FILE, - ) - self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) + supp_series_path_meta = os.path.join( + self.metadata_raw, + "PEP_series", + self.project_name + EXP_SUPP_METADATA_FILE, + ) + peppy_obj = self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) + + else: + return None + + if self.just_object: + return peppy_obj # saving PEPs for raw data else: - if not self.just_object: - self._write_raw_annotation(metadata_dict, subannotation_dict) - else: - raw_meta_list = [] - for meta_key in metadata_dict.keys(): - for srx_key in metadata_dict[meta_key].keys(): - metadata_dict[meta_key][srx_key]["gse_number"] = meta_key - metadata_dict[meta_key][srx_key]["srx_number"] = srx_key - raw_meta_list.append(metadata_dict[meta_key][srx_key]) - return raw_meta_list + return_value = self._write_raw_annotation(metadata_dict, subannotation_dict) + if self.just_object: + return return_value + def _expand_metadata_list(self, metadata_list, dict_key): """ @@ -888,14 +868,6 @@ def _write_processed_annotation(self, processed_metadata, file_annotation_path): ] modifiers_str = "\n ".join(d for d in meta_list_str) - with open(file_annotation_path, "w") as m_file: - dict_writer = csv.DictWriter(m_file, processed_metadata[0].keys()) - dict_writer.writeheader() - dict_writer.writerows(processed_metadata) - self._LOGGER.info( - "\033[92mFile %s has been saved successfully\033[0m" % file_annotation_path - ) - geofetchdir = os.path.dirname(__file__) config_template = os.path.join(geofetchdir, "config_processed_template.yaml") @@ -915,17 +887,34 @@ def _write_processed_annotation(self, processed_metadata, file_annotation_path): placeholder = "{" + str(k) + "}" template = template.replace(placeholder, str(v)) - # save .yaml file - yaml_name = os.path.split(file_annotation_path)[1][:-4] + ".yaml" - config = os.path.join(pep_file_folder, yaml_name) - self._write(config, template, msg_pre=" Config file: ") + if not self.just_object: + with open(file_annotation_path, "w") as m_file: + dict_writer = csv.DictWriter(m_file, processed_metadata[0].keys()) + dict_writer.writeheader() + dict_writer.writerows(processed_metadata) + self._LOGGER.info( + "\033[92mFile %s has been saved successfully\033[0m" % file_annotation_path + ) + + # save .yaml file + yaml_name = os.path.split(file_annotation_path)[1][:-4] + ".yaml" + config = os.path.join(pep_file_folder, yaml_name) + self._write(config, template, msg_pre=" Config file: ") + + # save .pep.yaml file + if self.add_dotfile: + dot_yaml_path = os.path.join(pep_file_folder, ".pep.yaml") + self._create_dot_yaml(dot_yaml_path, yaml_name) + + return None + + else: + pd_value = pd.DataFrame(processed_metadata) - # save .pep.yaml file - if self.add_dotfile: - dot_yaml_path = os.path.join(pep_file_folder, ".pep.yaml") - self._create_dot_yaml(dot_yaml_path, yaml_name) + conf = yaml.load(template, Loader=yaml.Loader) + proj = peppy.Project().from_pandas(pd_value, config=conf) + return proj - return True @staticmethod def _sanitize_name(name_str: str): @@ -1011,7 +1000,8 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): file_annotation = os.path.join( self.metadata_raw, self.project_name + "_annotation.csv" ) - self._write_gsm_annotation(metadata_dict_combined, file_annotation, use_key_subset=self.use_key_subset) + + # Write combined subannotation table if len(subannotation_dict_combined) > 0: file_subannotation = os.path.join( @@ -1022,6 +1012,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): else: file_subannotation = "null" subanot_path_yaml = f"" + # Write project config file if not self.config_template: geofetchdir = os.path.dirname(__file__) @@ -1039,16 +1030,40 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): } for k, v in template_values.items(): placeholder = "{" + str(k) + "}" + # v1 = v.replace(':', '=') template = template.replace(placeholder, str(v)) - # save .yaml file - yaml_name = self.project_name + "_config.yaml" - config = os.path.join(self.metadata_raw, yaml_name) - self._write(config, template, msg_pre=" Config file: ") - # save .pep.yaml file - if self.add_dotfile: - dot_yaml_path = os.path.join(self.metadata_raw, ".pep.yaml") - self._create_dot_yaml(dot_yaml_path, yaml_name) + if not self.just_object: + # write annotation + self._write_gsm_annotation(metadata_dict_combined, file_annotation, use_key_subset=self.use_key_subset) + # write subannotation + if len(subannotation_dict_combined) > 0: + self._write_subannotation(subannotation_dict_combined, file_subannotation) + + # save .yaml file + yaml_name = self.project_name + "_config.yaml" + config = os.path.join(self.metadata_raw, yaml_name) + self._write(config, template, msg_pre=" Config file: ") + + # save .pep.yaml file + if self.add_dotfile: + dot_yaml_path = os.path.join(self.metadata_raw, ".pep.yaml") + self._create_dot_yaml(dot_yaml_path, yaml_name) + + else: + sddd = [metadata_dict_combined.values()] + sdddd = subannotation_dict_combined.values() + meta_df = pd.DataFrame.from_dict(metadata_dict_combined, orient='index') + + print(meta_df) + + sub_meta_df = pd.DataFrame.from_dict(subannotation_dict_combined, orient='index') + if sub_meta_df.empty: + sub_meta_df = None + conf = yaml.load(template, Loader=yaml.Loader) + + proj = peppy.Project().from_pandas(meta_df, sub_meta_df, conf) + return proj @staticmethod def _create_dot_yaml(file_path: str, yaml_path: str): @@ -1115,7 +1130,7 @@ def _separate_common_meta( if first_key: if len(str(nb_sample[1][this_key])) <= del_limit: new_meta_project.append( - {this_key: nb_sample[1][this_key]} + {this_key: f'\"{nb_sample[1][this_key]}\"'} ) first_key = False del meta_list[nb_sample[0]][this_key] @@ -2186,7 +2201,8 @@ def main(): args = _parse_cmdl(sys.argv[1:]) args_dict = vars(args) args_dict["args"] = args - Geofetcher(**args_dict).fetch_all(args_dict["input"]) + #Geofetcher(**args_dict).fetch_all(args_dict["input"]) + Geofetcher(**args_dict).get_project(args_dict["input"]) if __name__ == "__main__": From 23afea22d5c74c9bcd4462d5fd90e4ac967ae90b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Aug 2022 20:26:55 -0400 Subject: [PATCH 12/61] small corrections --- geofetch/geofetch.py | 223 +++++++++++++++++++++++++---------------- tests/test_geofetch.py | 4 +- 2 files changed, 138 insertions(+), 89 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 965fadf..a65f40e 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -204,42 +204,13 @@ def get_project(self, input: str) -> Dict[peppy.Project]: project_dict = {} if self.processed: - if self.supp_by == "all": - data_source_all = True - else: - data_source_all = False - if self.acc_anno: self.acc_anno = False for acc_GSE in acc_GSE_list.keys(): - if data_source_all: - # samples - self.supp_by = "samples" - samples_list = self.fetch_all(input=acc_GSE) - project_dict[acc_GSE + "_samples"] = samples_list + project_dict.update(self.fetch_all(input=acc_GSE, name=acc_GSE)) - # series - self.supp_by = "series" - series_list = self.fetch_all(input=acc_GSE) - project_dict[acc_GSE + "_series"] = series_list - else: - ser_list = self.fetch_all(input=acc_GSE) - project_dict[acc_GSE + "_" + self.supp_by] = ser_list else: - if data_source_all: - # samples - self.supp_by = "samples" - samples_list = self.fetch_all(input=input) - project_dict["project_samples"] = samples_list - - # series - self.supp_by = "series" - series_list = self.fetch_all(input=input) - project_dict["project_series"] = series_list - - else: - ser_list = self.fetch_all(input=input) - project_dict["project_" + self.supp_by] = ser_list + project_dict.update(self.fetch_all(input=input, name="project")) else: # Not sure about below code... @@ -282,9 +253,11 @@ def fetch_all(self, input: str, name: str = None): acc_GSE_keys = acc_GSE_list.keys() nkeys = len(acc_GSE_keys) ncount = 0 - for acc_GSE in track(acc_GSE_list.keys(), - description="Processing... ", - disable=self.disable_progressbar): + for acc_GSE in track( + acc_GSE_list.keys(), + description="Processing... ", + disable=self.disable_progressbar, + ): ncount += 1 if ncount <= self.skip: @@ -346,11 +319,15 @@ def fetch_all(self, input: str, name: str = None): ( meta_processed_samples, meta_processed_series, - ) = self._get_list_of_processed_files(file_gse_content, file_gsm_content) + ) = self._get_list_of_processed_files( + file_gse_content, file_gsm_content + ) # taking into account list of GSM that is specified in the input file gsm_list = acc_GSE_list[acc_GSE] - meta_processed_samples = self._filter_gsm(meta_processed_samples, gsm_list) + meta_processed_samples = self._filter_gsm( + meta_processed_samples, gsm_list + ) # Unify keys: meta_processed_samples = self._unify_list_keys(meta_processed_samples) meta_processed_series = self._unify_list_keys(meta_processed_series) @@ -359,17 +336,25 @@ def fetch_all(self, input: str, name: str = None): list_of_keys = self._get_list_of_keys(meta_processed_samples) self._LOGGER.info("Expanding metadata list...") for key_in_list in list_of_keys: - meta_processed_samples = self._expand_metadata_list(meta_processed_samples, key_in_list) + meta_processed_samples = self._expand_metadata_list( + meta_processed_samples, key_in_list + ) # series list_of_keys_series = self._get_list_of_keys(meta_processed_series) self._LOGGER.info("Expanding metadata list...") for key_in_list in list_of_keys_series: - meta_processed_series = self._expand_metadata_list(meta_processed_series, key_in_list) + meta_processed_series = self._expand_metadata_list( + meta_processed_series, key_in_list + ) # convert column names to lowercase and underscore - meta_processed_samples = self._standardize_colnames(meta_processed_samples) - meta_processed_series = self._standardize_colnames(meta_processed_series) + meta_processed_samples = self._standardize_colnames( + meta_processed_samples + ) + meta_processed_series = self._standardize_colnames( + meta_processed_series + ) if not self.acc_anno: # adding metadata from current experiment to the project @@ -385,7 +370,9 @@ def fetch_all(self, input: str, name: str = None): f"{acc_GSE}_samples", acc_GSE + SAMPLE_SUPP_METADATA_FILE, ) - self._write_processed_annotation(meta_processed_samples, pep_acc_path_sample) + self._write_processed_annotation( + meta_processed_samples, pep_acc_path_sample + ) # series pep_acc_path_exp = os.path.join( @@ -393,21 +380,27 @@ def fetch_all(self, input: str, name: str = None): f"{acc_GSE}_series", acc_GSE + EXP_SUPP_METADATA_FILE, ) - self._write_processed_annotation(meta_processed_series, pep_acc_path_exp) + self._write_processed_annotation( + meta_processed_series, pep_acc_path_exp + ) elif self.supp_by == "samples": pep_acc_path_sample = os.path.join( self.metadata_raw, f"{acc_GSE}_samples", acc_GSE + SAMPLE_SUPP_METADATA_FILE, ) - self._write_processed_annotation(meta_processed_samples, pep_acc_path_sample) + self._write_processed_annotation( + meta_processed_samples, pep_acc_path_sample + ) elif self.supp_by == "series": pep_acc_path_exp = os.path.join( self.metadata_raw, f"{acc_GSE}_series", acc_GSE + EXP_SUPP_METADATA_FILE, ) - self._write_processed_annotation(meta_processed_series, pep_acc_path_exp) + self._write_processed_annotation( + meta_processed_series, pep_acc_path_exp + ) if not self.just_metadata: data_geo_folder = os.path.join(self.geo_folder, acc_GSE) @@ -447,11 +440,15 @@ def fetch_all(self, input: str, name: str = None): else: # download gsm metadata - gsm_metadata = self._get_gsm_metadata(acc_GSE, acc_GSE_list, file_gsm_content) + gsm_metadata = self._get_gsm_metadata( + acc_GSE, acc_GSE_list, file_gsm_content + ) metadata_dict[acc_GSE] = gsm_metadata # download gsm metadata - SRP_list_result = self._get_SRA_meta(file_gse_content, gsm_metadata, file_sra) + SRP_list_result = self._get_SRA_meta( + file_gse_content, gsm_metadata, file_sra + ) if not SRP_list_result: # delete current acc if no raw data was found # del metadata_dict[acc_GSE] @@ -495,8 +492,12 @@ def fetch_all(self, input: str, name: str = None): # Otherwise, record that there's SRA data for this run. # And set a few columns that are used as input to the Looper # print("Updating columns for looper") - self._update_columns(gsm_metadata, experiment, sample_name=sample_name, - read_type=line["LibraryLayout"]) + self._update_columns( + gsm_metadata, + experiment, + sample_name=sample_name, + read_type=line["LibraryLayout"], + ) # Some experiments are flagged in SRA as having multiple runs. if gsm_metadata[experiment].get("SRR") is not None: @@ -591,7 +592,9 @@ def fetch_all(self, input: str, name: str = None): self._LOGGER.warning( "Bam conversion failed with sam-dump. Trying fastq-dump..." ) - self._sra_bam_conversion2(bam_file, run_name, self.picard_path) + self._sra_bam_conversion2( + bam_file, run_name, self.picard_path + ) except FileNotFoundError as err: self._LOGGER.info( @@ -622,17 +625,27 @@ def fetch_all(self, input: str, name: str = None): if self.supp_by == "all": supp_sample_path_meta = os.path.join( self.metadata_raw, - "PEP_samples", + "s", self.project_name + SAMPLE_SUPP_METADATA_FILE, ) - peppy_obj = self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) + peppy_obj_samples = self._write_processed_annotation( + processed_metadata_samples, supp_sample_path_meta + ) supp_series_path_meta = os.path.join( self.metadata_raw, "PEP_series", self.project_name + EXP_SUPP_METADATA_FILE, ) - peppy_obj = self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) + peppy_obj_series = self._write_processed_annotation( + processed_metadata_exp, supp_series_path_meta + ) + + if self.just_object: + return { + f"{name}_samples": peppy_obj_samples, + f"{name}_series": peppy_obj_series, + } elif self.supp_by == "samples": supp_sample_path_meta = os.path.join( @@ -640,7 +653,11 @@ def fetch_all(self, input: str, name: str = None): "PEP_samples", self.project_name + SAMPLE_SUPP_METADATA_FILE, ) - peppy_obj = self._write_processed_annotation(processed_metadata_samples, supp_sample_path_meta) + peppy_obj = self._write_processed_annotation( + processed_metadata_samples, supp_sample_path_meta + ) + if self.just_object: + return {f"{name}_samples": peppy_obj} elif self.supp_by == "series": supp_series_path_meta = os.path.join( @@ -648,21 +665,21 @@ def fetch_all(self, input: str, name: str = None): "PEP_series", self.project_name + EXP_SUPP_METADATA_FILE, ) - peppy_obj = self._write_processed_annotation(processed_metadata_exp, supp_series_path_meta) + peppy_obj = self._write_processed_annotation( + processed_metadata_exp, supp_series_path_meta + ) + if self.just_object: + return {f"{name}_PEP_series": peppy_obj} else: return None - if self.just_object: - return peppy_obj - # saving PEPs for raw data else: return_value = self._write_raw_annotation(metadata_dict, subannotation_dict) if self.just_object: return return_value - def _expand_metadata_list(self, metadata_list, dict_key): """ Expanding list items in the list by creating new items or joining them @@ -808,7 +825,9 @@ def _find_genome(self, metadata_list): metadata_list[sample[0]]["sample_genome"] = sample_genome return metadata_list - def _write_gsm_annotation(self, gsm_metadata, file_annotation, use_key_subset=False): + def _write_gsm_annotation( + self, gsm_metadata, file_annotation, use_key_subset=False + ): """ Write metadata sheet out as an annotation file. @@ -861,8 +880,12 @@ def _write_processed_annotation(self, processed_metadata, file_annotation_path): processed_metadata = self._find_genome(processed_metadata) # filtering huge annotation strings that are repeating for each sample - processed_metadata, proj_meta = self._separate_common_meta(processed_metadata, self.const_limit_project, - self.const_limit_discard, self.attr_limit_truncate) + processed_metadata, proj_meta = self._separate_common_meta( + processed_metadata, + self.const_limit_project, + self.const_limit_discard, + self.attr_limit_truncate, + ) meta_list_str = [ f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta ] @@ -893,7 +916,8 @@ def _write_processed_annotation(self, processed_metadata, file_annotation_path): dict_writer.writeheader() dict_writer.writerows(processed_metadata) self._LOGGER.info( - "\033[92mFile %s has been saved successfully\033[0m" % file_annotation_path + "\033[92mFile %s has been saved successfully\033[0m" + % file_annotation_path ) # save .yaml file @@ -915,7 +939,6 @@ def _write_processed_annotation(self, processed_metadata, file_annotation_path): proj = peppy.Project().from_pandas(pd_value, config=conf) return proj - @staticmethod def _sanitize_name(name_str: str): """ @@ -961,7 +984,9 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] # sanitize sample names - fixed_dict[key_sample]["sample_name"] = self._sanitize_name(fixed_dict[key_sample]["sample_name"]) + fixed_dict[key_sample]["sample_name"] = self._sanitize_name( + fixed_dict[key_sample]["sample_name"] + ) metadata_dict[key] = fixed_dict @@ -971,13 +996,18 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): self.metadata_expanded, acc_GSE + "_annotation.csv" ) if self.acc_anno: - self._write_gsm_annotation(gsm_metadata, file_annotation, use_key_subset=self.use_key_subset) + self._write_gsm_annotation( + gsm_metadata, file_annotation, use_key_subset=self.use_key_subset + ) metadata_dict_combined.update(gsm_metadata) # filtering huge annotation strings that are repeating for each sample - metadata_dict_combined, proj_meta = self._separate_common_meta(metadata_dict_combined, self.const_limit_project, - self.const_limit_discard, - self.attr_limit_truncate) + metadata_dict_combined, proj_meta = self._separate_common_meta( + metadata_dict_combined, + self.const_limit_project, + self.const_limit_discard, + self.attr_limit_truncate, + ) meta_list_str = [ f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta ] @@ -1001,14 +1031,15 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): self.metadata_raw, self.project_name + "_annotation.csv" ) - # Write combined subannotation table if len(subannotation_dict_combined) > 0: file_subannotation = os.path.join( self.metadata_raw, self.project_name + "_subannotation.csv" ) self._write_subannotation(subannotation_dict_combined, file_subannotation) - subanot_path_yaml = f"subsample_table: {os.path.basename(file_subannotation)}" + subanot_path_yaml = ( + f"subsample_table: {os.path.basename(file_subannotation)}" + ) else: file_subannotation = "null" subanot_path_yaml = f"" @@ -1030,15 +1061,20 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): } for k, v in template_values.items(): placeholder = "{" + str(k) + "}" - # v1 = v.replace(':', '=') template = template.replace(placeholder, str(v)) if not self.just_object: # write annotation - self._write_gsm_annotation(metadata_dict_combined, file_annotation, use_key_subset=self.use_key_subset) + self._write_gsm_annotation( + metadata_dict_combined, + file_annotation, + use_key_subset=self.use_key_subset, + ) # write subannotation if len(subannotation_dict_combined) > 0: - self._write_subannotation(subannotation_dict_combined, file_subannotation) + self._write_subannotation( + subannotation_dict_combined, file_subannotation + ) # save .yaml file yaml_name = self.project_name + "_config.yaml" @@ -1051,13 +1087,11 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): self._create_dot_yaml(dot_yaml_path, yaml_name) else: - sddd = [metadata_dict_combined.values()] - sdddd = subannotation_dict_combined.values() - meta_df = pd.DataFrame.from_dict(metadata_dict_combined, orient='index') - - print(meta_df) + meta_df = pd.DataFrame.from_dict(metadata_dict_combined, orient="index") - sub_meta_df = pd.DataFrame.from_dict(subannotation_dict_combined, orient='index') + sub_meta_df = pd.DataFrame.from_dict( + subannotation_dict_combined, orient="index" + ) if sub_meta_df.empty: sub_meta_df = None conf = yaml.load(template, Loader=yaml.Loader) @@ -1130,7 +1164,7 @@ def _separate_common_meta( if first_key: if len(str(nb_sample[1][this_key])) <= del_limit: new_meta_project.append( - {this_key: f'\"{nb_sample[1][this_key]}\"'} + {this_key: f'"{nb_sample[1][this_key]}"'} ) first_key = False del meta_list[nb_sample[0]][this_key] @@ -1492,8 +1526,12 @@ def _get_list_of_processed_files( meta_processed_samples[nb]["files"].append(file_url_gsm) self._check_file_existance(meta_processed_samples) - meta_processed_samples = self._separate_list_of_files(meta_processed_samples) - meta_processed_samples = self._separate_file_url(meta_processed_samples) + meta_processed_samples = self._separate_list_of_files( + meta_processed_samples + ) + meta_processed_samples = self._separate_file_url( + meta_processed_samples + ) self._LOGGER.info( f"\nTotal number of processed SAMPLES files found is: " @@ -1509,9 +1547,13 @@ def _get_list_of_processed_files( ) if self.filter_re: - meta_processed_samples = self._run_filter(meta_processed_samples) + meta_processed_samples = self._run_filter( + meta_processed_samples + ) if self.filter_size: - meta_processed_samples = self._run_size_filter(meta_processed_samples) + meta_processed_samples = self._run_size_filter( + meta_processed_samples + ) # other files than .tar: saving them into meta_processed_series list else: @@ -1823,7 +1865,9 @@ def _get_SRP_list(self, srp_number: str) -> list: raise x.raise_for_status() id_results = x.json()["esearchresult"]["idlist"] if len(id_results) > 500: - id_results = [id_results[x:x + 100] for x in range(0, len(id_results), 100)] + id_results = [ + id_results[x : x + 100] for x in range(0, len(id_results), 100) + ] else: id_results = [id_results] @@ -1834,7 +1878,9 @@ def _get_SRP_list(self, srp_number: str) -> list: y = requests.get(id_api) if y.status_code != 200: - self._LOGGER.error(f"Error in ncbi efetch response in SRA fetching: {x.status_code}") + self._LOGGER.error( + f"Error in ncbi efetch response in SRA fetching: {x.status_code}" + ) raise y.raise_for_status() xml_result = y.text SRP_list.extend(xmltodict.parse(xml_result)["SraRunInfo"]["Row"]) @@ -2201,8 +2247,9 @@ def main(): args = _parse_cmdl(sys.argv[1:]) args_dict = vars(args) args_dict["args"] = args - #Geofetcher(**args_dict).fetch_all(args_dict["input"]) - Geofetcher(**args_dict).get_project(args_dict["input"]) + # Geofetcher(**args_dict).fetch_all(args_dict["input"]) + abc = Geofetcher(**args_dict).get_project(args_dict["input"]) + abc if __name__ == "__main__": diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index 6c227d9..4c8eeb8 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -248,7 +248,9 @@ def test_size_filter(self, meta_list, output, initiate_geofetcher): def test_large_meta_separation( self, init_meta_data, result_sample, result_proj, initiate_geofetcher ): - samp, proj = initiate_geofetcher._separate_common_meta(init_meta_data, max_len=0) + samp, proj = initiate_geofetcher._separate_common_meta( + init_meta_data, max_len=0 + ) assert samp == result_sample assert proj == result_proj From 27fdfc1ac22a3d6544fec192ebc5a56b7adb9980 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 22 Aug 2022 20:39:07 -0400 Subject: [PATCH 13/61] run corrections --- geofetch/geofetch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index a65f40e..0fad630 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -2247,9 +2247,8 @@ def main(): args = _parse_cmdl(sys.argv[1:]) args_dict = vars(args) args_dict["args"] = args - # Geofetcher(**args_dict).fetch_all(args_dict["input"]) - abc = Geofetcher(**args_dict).get_project(args_dict["input"]) - abc + Geofetcher(**args_dict).fetch_all(args_dict["input"]) + if __name__ == "__main__": From ffa1fddcee92ebf538ba55501b7639a26781e0c1 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 23 Aug 2022 10:34:16 -0400 Subject: [PATCH 14/61] yaml regex fix --- geofetch/geofetch.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 0fad630..82b9906 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1163,8 +1163,10 @@ def _separate_common_meta( if this_key not in list_keys_diff: if first_key: if len(str(nb_sample[1][this_key])) <= del_limit: + new_str = nb_sample[1][this_key].replace('"', '') + new_str = re.sub('[^A-Za-z0-9]+', ' ', new_str) new_meta_project.append( - {this_key: f'"{nb_sample[1][this_key]}"'} + {this_key: f'"{new_str}"'} ) first_key = False del meta_list[nb_sample[0]][this_key] From 4b3ec7145ac4a6804c865e7b500cf022490231ed Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 24 Aug 2022 12:02:13 -0400 Subject: [PATCH 15/61] some changes to get_project() --- geofetch/geofetch.py | 81 ++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 82b9906..932e20f 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -56,7 +56,7 @@ def __init__( skip: int = 0, acc_anno: bool = False, use_key_subset: bool = False, - processed: bool = True, + processed: bool = False, data_source: str = "samples", filter: str = None, filter_size: str = None, @@ -174,51 +174,59 @@ def __init__( self._LOGGER.info(f"Metadata folder: {self.metadata_expanded}") - # check to make sure prefetch is callable - if not just_metadata and not processed: - if not is_command_callable("prefetch"): - raise SystemExit( - "To download raw data You must first install the sratoolkit, with prefetch in your PATH." - " Installation instruction: http://geofetch.databio.org/en/latest/install/" - ) - # Some sanity checks before proceeding if bam_conversion and not just_metadata and not self._which("samtools"): raise SystemExit("For SAM/BAM processing, samtools should be on PATH.") self.just_object = False - def get_project(self, input: str) -> Dict[peppy.Project]: + def get_project(self, input: str, just_metadata: bool = True, discard_soft: bool = True) -> Dict[peppy.Project]: """ Function for fetching projects from GEO|SRA and receiving peppy project :param input: GSE number, or path to file of GSE numbers + :param just_metadata: process only metadata + :param discard_soft: clean run, without downloading soft files :return: peppy project or list of project, if acc_anno is set. """ - self.just_metadata = True + self.just_metadata = just_metadata self.just_object = True self.disable_progressbar = True + self.discard_soft = discard_soft acc_GSE_list = parse_accessions( input, self.metadata_expanded, self.just_metadata ) project_dict = {} + # processed data: if self.processed: if self.acc_anno: + nkeys = len(acc_GSE_list.keys()) + ncount = 0 self.acc_anno = False for acc_GSE in acc_GSE_list.keys(): + ncount += 1 + self._LOGGER.info( + f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m" + ) project_dict.update(self.fetch_all(input=acc_GSE, name=acc_GSE)) - else: project_dict.update(self.fetch_all(input=input, name="project")) + # raw data: else: # Not sure about below code... if self.acc_anno: self.acc_anno = False + nkeys = len(acc_GSE_list.keys()) + ncount = 0 for acc_GSE in acc_GSE_list.keys(): + ncount += 1 + self._LOGGER.info( + f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m" + ) project_dict = self.fetch_all(input=input) - project_dict[acc_GSE + "_raw_samples"] = project_dict + project_dict[acc_GSE + "_raw"] = project_dict else: ser_dict = self.fetch_all(input=input) @@ -234,6 +242,14 @@ def fetch_all(self, input: str, name: str = None): else: self.project_name = os.path.splitext(os.path.basename(input))[0] + # check to make sure prefetch is callable + if not self.just_metadata and not self.processed: + if not is_command_callable("prefetch"): + raise SystemExit( + "To download raw data You must first install the sratoolkit, with prefetch in your PATH." + " Installation instruction: http://geofetch.databio.org/en/latest/install/" + ) + acc_GSE_list = parse_accessions( input, self.metadata_expanded, self.just_metadata ) @@ -264,9 +280,11 @@ def fetch_all(self, input: str, name: str = None): continue elif ncount == self.skip + 1: self._LOGGER.info(f"Skipped {self.skip} accessions. Starting now.") - self._LOGGER.info( - f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m" - ) + + if not self.just_object: + self._LOGGER.info( + f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m" + ) if len(re.findall(GSE_PATTERN, acc_GSE)) != 1: self._LOGGER.debug(len(re.findall(GSE_PATTERN, acc_GSE))) @@ -983,13 +1001,15 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): ): fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] - # sanitize sample names + # sanitize names fixed_dict[key_sample]["sample_name"] = self._sanitize_name( fixed_dict[key_sample]["sample_name"] ) metadata_dict[key] = fixed_dict + # TODO: should be checked: + # annotation table metadata_dict_combined = {} for acc_GSE, gsm_metadata in metadata_dict.items(): file_annotation = os.path.join( @@ -1001,6 +1021,19 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): ) metadata_dict_combined.update(gsm_metadata) + # subatnotation table + subannotation_dict_combined = {} + for acc_GSE, gsm_multi_table in subannotation_dict.items(): + file_subannotation = os.path.join( + self.metadata_expanded, acc_GSE + "_subannotation.csv" + ) + if self.acc_anno: + self._write_subannotation(gsm_multi_table, file_subannotation) + subannotation_dict_combined.update(gsm_multi_table) + + self._LOGGER.info( + "Creating complete project annotation sheets and config file..." + ) # filtering huge annotation strings that are repeating for each sample metadata_dict_combined, proj_meta = self._separate_common_meta( metadata_dict_combined, @@ -1013,17 +1046,6 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): ] modifiers_str = "\n ".join(d for d in meta_list_str) - subannotation_dict_combined = {} - for acc_GSE, gsm_multi_table in subannotation_dict.items(): - file_subannotation = os.path.join( - self.metadata_expanded, acc_GSE + "_subannotation.csv" - ) - if self.acc_anno: - self._write_subannotation(gsm_multi_table, file_subannotation) - subannotation_dict_combined.update(gsm_multi_table) - self._LOGGER.info( - "Creating complete project annotation sheets and config file..." - ) # If the project included more than one GSE, we can now output combined # annotation tables for the entire project. # Write combined annotation sheet @@ -1063,7 +1085,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): placeholder = "{" + str(k) + "}" template = template.replace(placeholder, str(v)) - if not self.just_object: + if not self.just_object and not self.acc_anno: # write annotation self._write_gsm_annotation( metadata_dict_combined, @@ -1089,6 +1111,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): else: meta_df = pd.DataFrame.from_dict(metadata_dict_combined, orient="index") + # TODO: correct error here: sub_meta_df = pd.DataFrame.from_dict( subannotation_dict_combined, orient="index" ) From 457f9aa789bb15f15b44afdd6c8fd4cb7198d201 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 25 Aug 2022 11:53:29 -0400 Subject: [PATCH 16/61] peppy init fix --- geofetch/geofetch.py | 95 +++++++++++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 31 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 932e20f..52264ae 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1,7 +1,5 @@ #!/usr/bin/env python3 -from __future__ import annotations - __author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"] import argparse @@ -180,7 +178,7 @@ def __init__( self.just_object = False - def get_project(self, input: str, just_metadata: bool = True, discard_soft: bool = True) -> Dict[peppy.Project]: + def get_project(self, input: str, just_metadata: bool = True, discard_soft: bool = True) -> Dict[peppy.Project, peppy.Project]: """ Function for fetching projects from GEO|SRA and receiving peppy project :param input: GSE number, or path to file of GSE numbers @@ -225,11 +223,11 @@ def get_project(self, input: str, just_metadata: bool = True, discard_soft: bool self._LOGGER.info( f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m" ) - project_dict = self.fetch_all(input=input) + project_dict = self.fetch_all(input=acc_GSE) project_dict[acc_GSE + "_raw"] = project_dict else: - ser_dict = self.fetch_all(input=input) + ser_dict = self.fetch_all(input=acc_GSE_list) project_dict["raw_samples"] = ser_dict return project_dict @@ -965,7 +963,8 @@ def _sanitize_name(name_str: str): :return: sanitized strings """ new_str = name_str - for odd_char in list(punctuation): + punctuation1 = r"""!"#$%&'()*,./:;<=>?@[\]^_`{|}~""" + for odd_char in list(punctuation1): new_str = new_str.replace(odd_char, "_") new_str = new_str.replace(" ", "_").replace("__", "_") return new_str @@ -1001,10 +1000,10 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): ): fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] - # sanitize names - fixed_dict[key_sample]["sample_name"] = self._sanitize_name( - fixed_dict[key_sample]["sample_name"] - ) + # # sanitize names + # fixed_dict[key_sample]["sample_name"] = self._sanitize_name( + # fixed_dict[key_sample]["sample_name"] + # ) metadata_dict[key] = fixed_dict @@ -1012,14 +1011,15 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): # annotation table metadata_dict_combined = {} for acc_GSE, gsm_metadata in metadata_dict.items(): + gsm_metadata1 = self._standardize_colnames(gsm_metadata) file_annotation = os.path.join( self.metadata_expanded, acc_GSE + "_annotation.csv" ) if self.acc_anno: self._write_gsm_annotation( - gsm_metadata, file_annotation, use_key_subset=self.use_key_subset + gsm_metadata1, file_annotation, use_key_subset=self.use_key_subset ) - metadata_dict_combined.update(gsm_metadata) + metadata_dict_combined.update(gsm_metadata1) # subatnotation table subannotation_dict_combined = {} @@ -1111,12 +1111,17 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): else: meta_df = pd.DataFrame.from_dict(metadata_dict_combined, orient="index") - # TODO: correct error here: - sub_meta_df = pd.DataFrame.from_dict( - subannotation_dict_combined, orient="index" - ) + # open list: + new_sub_list = [] + for sub_key in subannotation_dict_combined.keys(): + new_sub_list.extend([col_item for col_item in subannotation_dict_combined[sub_key]]) + + sub_meta_df = pd.DataFrame(new_sub_list, columns=["sample_name", "SRX", "SRR"]) + if sub_meta_df.empty: sub_meta_df = None + else: + sub_meta_df = [sub_meta_df] conf = yaml.load(template, Loader=yaml.Loader) proj = peppy.Project().from_pandas(meta_df, sub_meta_df, conf) @@ -1133,7 +1138,7 @@ def _create_dot_yaml(file_path: str, yaml_path: str): file.writelines(f"config_file: {yaml_path}") def _separate_common_meta( - self, meta_list, max_len=50, del_limit=250, attr_limit_truncate=500 + self, meta_list: Union[List, Dict], max_len: int = 50, del_limit: int = 250, attr_limit_truncate: int = 500 ): """ This function is separating information for the experiment from a sample @@ -1145,16 +1150,11 @@ def _separate_common_meta( list of samples metadata dictionaries and 2: list of common samples metadata dictionaries that are linked to the project. """ + # check if meta_list is dict and converting it to list input_is_dict = False if isinstance(meta_list, dict): input_is_dict = True - new_meta_list = [] - for key in meta_list: - new_dict = meta_list[key] - new_dict["big_key"] = key - new_meta_list.append(new_dict) - - meta_list = new_meta_list + meta_list = self._dict_to_list_convector(proj_dict=meta_list) list_of_keys = self._get_list_of_keys(meta_list) list_keys_diff = [] @@ -1210,11 +1210,7 @@ def _separate_common_meta( meta_list = new_list if input_is_dict: - new_sample_dict = {} - for sample in meta_list: - new_sample_dict[sample["big_key"]] = sample - meta_list = new_sample_dict - + meta_list = self._dict_to_list_convector(proj_list=meta_list) return meta_list, new_meta_project def _standardize_colnames(self, meta_list): @@ -1223,6 +1219,12 @@ def _standardize_colnames(self, meta_list): :param list meta_list: list of dictionaries of samples :return : list of dictionaries of samples with standard colnames """ + # check if meta_list is dict and converting it to list + input_is_dict = False + if isinstance(meta_list, dict): + input_is_dict = True + meta_list = self._dict_to_list_convector(proj_dict=meta_list) + new_metalist = [] list_keys = self._get_list_of_keys(meta_list) for item_nb, values in enumerate(meta_list): @@ -1237,8 +1239,39 @@ def _standardize_colnames(self, meta_list): except KeyError: pass + if input_is_dict: + new_metalist = self._dict_to_list_convector(proj_list=new_metalist) + return new_metalist + @staticmethod + def _dict_to_list_convector(proj_dict: Dict = None, proj_list: List = None) -> Union[Dict, List]: + """ + Convector project dict to list and vice versa + :param proj_dict: project dictionary + :param proj_list: project list + :return: converted values + """ + if proj_dict is not None: + new_meta_list = [] + for key in proj_dict: + new_dict = proj_dict[key] + new_dict["big_key"] = key + new_meta_list.append(new_dict) + + meta_list = new_meta_list + + elif proj_list is not None: + new_sample_dict = {} + for sample in proj_list: + new_sample_dict[sample["big_key"]] = sample + meta_list = new_sample_dict + + else: + raise ValueError + + return meta_list + def _download_SRA_file(self, run_name): """ Downloading SRA file by ising 'prefetch' utility from the SRA Toolkit @@ -1633,14 +1666,14 @@ def _separate_list_of_files(meta_list, col_name="files"): into two different dicts """ separated_list = [] - if type(meta_list) == list: + if isinstance(meta_list, list): for meta_elem in meta_list: for file_elem in meta_elem[col_name]: new_dict = meta_elem.copy() new_dict.pop(col_name, None) new_dict["file"] = file_elem separated_list.append(new_dict) - elif type(meta_list) == dict: + elif isinstance(meta_list, dict): for file_elem in meta_list[col_name]: new_dict = meta_list.copy() new_dict.pop(col_name, None) From c8a3b8df34f084a85a8982201911ea9e5ad11fdc Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 26 Aug 2022 14:17:27 -0400 Subject: [PATCH 17/61] test fixed --- geofetch/geofetch.py | 54 +++++++++++++++++++++++++++++------------- tests/test_geofetch.py | 14 ++++++++--- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 52264ae..db6d415 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -178,7 +178,9 @@ def __init__( self.just_object = False - def get_project(self, input: str, just_metadata: bool = True, discard_soft: bool = True) -> Dict[peppy.Project, peppy.Project]: + def get_project( + self, input: str, just_metadata: bool = True, discard_soft: bool = True + ) -> Dict[peppy.Project, peppy.Project]: """ Function for fetching projects from GEO|SRA and receiving peppy project :param input: GSE number, or path to file of GSE numbers @@ -223,8 +225,8 @@ def get_project(self, input: str, just_metadata: bool = True, discard_soft: bool self._LOGGER.info( f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m" ) - project_dict = self.fetch_all(input=acc_GSE) - project_dict[acc_GSE + "_raw"] = project_dict + project = self.fetch_all(input=acc_GSE) + project_dict[acc_GSE + "_raw"] = project else: ser_dict = self.fetch_all(input=acc_GSE_list) @@ -999,7 +1001,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): or value_sample["sample_name"] is None ): fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] - + # TODO: should be corrected: # # sanitize names # fixed_dict[key_sample]["sample_name"] = self._sanitize_name( # fixed_dict[key_sample]["sample_name"] @@ -1007,7 +1009,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): metadata_dict[key] = fixed_dict - # TODO: should be checked: + # annotation table metadata_dict_combined = {} for acc_GSE, gsm_metadata in metadata_dict.items(): @@ -1015,6 +1017,7 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): file_annotation = os.path.join( self.metadata_expanded, acc_GSE + "_annotation.csv" ) + # for each sample if self.acc_anno: self._write_gsm_annotation( gsm_metadata1, file_annotation, use_key_subset=self.use_key_subset @@ -1027,10 +1030,13 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): file_subannotation = os.path.join( self.metadata_expanded, acc_GSE + "_subannotation.csv" ) + # for each sample: if self.acc_anno: self._write_subannotation(gsm_multi_table, file_subannotation) subannotation_dict_combined.update(gsm_multi_table) + # TODO: were is .yaml file for each acc_anno? + self._LOGGER.info( "Creating complete project annotation sheets and config file..." ) @@ -1114,9 +1120,13 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): # open list: new_sub_list = [] for sub_key in subannotation_dict_combined.keys(): - new_sub_list.extend([col_item for col_item in subannotation_dict_combined[sub_key]]) + new_sub_list.extend( + [col_item for col_item in subannotation_dict_combined[sub_key]] + ) - sub_meta_df = pd.DataFrame(new_sub_list, columns=["sample_name", "SRX", "SRR"]) + sub_meta_df = pd.DataFrame( + new_sub_list, columns=["sample_name", "SRX", "SRR"] + ) if sub_meta_df.empty: sub_meta_df = None @@ -1138,7 +1148,11 @@ def _create_dot_yaml(file_path: str, yaml_path: str): file.writelines(f"config_file: {yaml_path}") def _separate_common_meta( - self, meta_list: Union[List, Dict], max_len: int = 50, del_limit: int = 250, attr_limit_truncate: int = 500 + self, + meta_list: Union[List, Dict], + max_len: int = 50, + del_limit: int = 250, + attr_limit_truncate: int = 500, ): """ This function is separating information for the experiment from a sample @@ -1186,11 +1200,11 @@ def _separate_common_meta( if this_key not in list_keys_diff: if first_key: if len(str(nb_sample[1][this_key])) <= del_limit: - new_str = nb_sample[1][this_key].replace('"', '') - new_str = re.sub('[^A-Za-z0-9]+', ' ', new_str) - new_meta_project.append( - {this_key: f'"{new_str}"'} - ) + new_str = nb_sample[1][this_key] + if isinstance(nb_sample[1][this_key], str): + new_str = nb_sample[1][this_key].replace('"', "") + new_str = re.sub("[^A-Za-z0-9]+", " ", new_str) + new_meta_project.append({this_key: new_str}) first_key = False del meta_list[nb_sample[0]][this_key] except KeyError: @@ -1245,7 +1259,9 @@ def _standardize_colnames(self, meta_list): return new_metalist @staticmethod - def _dict_to_list_convector(proj_dict: Dict = None, proj_list: List = None) -> Union[Dict, List]: + def _dict_to_list_convector( + proj_dict: Dict = None, proj_list: List = None + ) -> Union[Dict, List]: """ Convector project dict to list and vice versa :param proj_dict: project dictionary @@ -1522,13 +1538,18 @@ def _get_list_of_processed_files( self.metadata_expanded, gse_numb + "_file_list.txt" ) + # TODO: make new function of code below: if not os.path.isfile(filelist_path) or self.refresh_metadata: result = requests.get(tar_files_list_url) if result.ok: filelist_raw_text = result.text if not self.discard_soft: - with open(filelist_path, "w") as f: - f.write(filelist_raw_text) + try: + with open(filelist_path, "w") as f: + f.write(filelist_raw_text) + except OSError: + self._LOGGER.warning(f"{filelist_path} not found. File won't be saved..") + else: raise Exception(f"error in requesting tar_files_list") else: @@ -2308,7 +2329,6 @@ def main(): Geofetcher(**args_dict).fetch_all(args_dict["input"]) - if __name__ == "__main__": try: sys.exit(main()) diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index 4c8eeb8..d26cbb1 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -1,3 +1,4 @@ +import geofetch from geofetch import parse_accessions, Geofetcher, utils import os import pytest @@ -15,6 +16,7 @@ def get_soft_path(gse_numb, sample_len, series_len): run test test_file_list """ return ( + gse_numb, os.path.join(GSE_FILES, gse_numb, GSE_SOFT_NAME), os.path.join(GSE_FILES, gse_numb, GSM_SOFT_NAME), sample_len, @@ -75,15 +77,21 @@ def initiate_geofetcher(self, tmpdir): yield instance @pytest.mark.parametrize( - "soft_gse, soft_gsm, sample_len, series_len", processed_meta_file_test + "gse_numb,soft_gse, soft_gsm, sample_len, series_len", processed_meta_file_test ) def test_file_list( - self, soft_gse, soft_gsm, sample_len, series_len, initiate_geofetcher + self, gse_numb, soft_gse, soft_gsm, sample_len, series_len, initiate_geofetcher ): + file_gse_content = geofetch.Accession(gse_numb).fetch_metadata( + soft_gse, typename="GSE", clean=False + ) + file_gsm_content = geofetch.Accession(gse_numb).fetch_metadata( + soft_gsm, typename="GSM", clean=False + ) ( meta_processed_samples, meta_processed_series, - ) = initiate_geofetcher._get_list_of_processed_files(soft_gse, soft_gsm) + ) = initiate_geofetcher._get_list_of_processed_files(file_gse_content, file_gsm_content) assert len(meta_processed_samples) == sample_len assert len(meta_processed_series) == series_len From a63558926b36666ff1025fdad573a09f5e8a0eec Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 26 Aug 2022 14:26:33 -0400 Subject: [PATCH 18/61] lint --- geofetch/geofetch.py | 5 +++-- requirements/requirements-all.txt | 2 +- tests/test_geofetch.py | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index db6d415..860876c 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1009,7 +1009,6 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): metadata_dict[key] = fixed_dict - # annotation table metadata_dict_combined = {} for acc_GSE, gsm_metadata in metadata_dict.items(): @@ -1548,7 +1547,9 @@ def _get_list_of_processed_files( with open(filelist_path, "w") as f: f.write(filelist_raw_text) except OSError: - self._LOGGER.warning(f"{filelist_path} not found. File won't be saved..") + self._LOGGER.warning( + f"{filelist_path} not found. File won't be saved.." + ) else: raise Exception(f"error in requesting tar_files_list") diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 06cc825..f8a0182 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -7,6 +7,6 @@ logmuse>=0.2.7 ubiquerg>=0.6.0 requests>=2.28.1 xmltodict>=0.13.0 -pandas>=1.4.3 +pandas>=1.3.5 peppy>=0.34.0 rich>=12.5.1 diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index d26cbb1..f9e268b 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -91,7 +91,9 @@ def test_file_list( ( meta_processed_samples, meta_processed_series, - ) = initiate_geofetcher._get_list_of_processed_files(file_gse_content, file_gsm_content) + ) = initiate_geofetcher._get_list_of_processed_files( + file_gse_content, file_gsm_content + ) assert len(meta_processed_samples) == sample_len assert len(meta_processed_series) == series_len From 0bedaf96f2ccce1ffd23c9a57f6349a62c7fb470 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 26 Aug 2022 14:42:19 -0400 Subject: [PATCH 19/61] fixed test --- geofetch/geofetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 860876c..ab1d817 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -643,7 +643,7 @@ def fetch_all(self, input: str, name: str = None): if self.supp_by == "all": supp_sample_path_meta = os.path.join( self.metadata_raw, - "s", + "PEP_samples", self.project_name + SAMPLE_SUPP_METADATA_FILE, ) peppy_obj_samples = self._write_processed_annotation( From f4d5adabc5e395afd2ea2e1eac34ade9e6dc2f0c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 26 Aug 2022 15:08:22 -0400 Subject: [PATCH 20/61] added changelog --- docs/changelog.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/changelog.md b/docs/changelog.md index 2adcd81..89bcb72 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,12 @@ # Changelog +## [0.11.0] -- 2022-XX-XX +- Added initialization of peppy Project without saving any files functionality +- Added progress bar +- Fixed None issue in config file +- Changed way of saving soft files to request library +- + ## [0.10.1] -- 2022-08-04 - Updated metadata fetching requests from SRA database From 3e690fd84def3b0c381c31b0855eda99cd168a61 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 26 Aug 2022 16:45:15 -0400 Subject: [PATCH 21/61] added corrections to PR --- geofetch/geofetch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index ab1d817..0655838 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -63,7 +63,7 @@ def __init__( bam_folder: str = "", fq_folder: str = "", sra_folder: str = "", - bam_conversion=False, + bam_conversion: bool = False, picard_path: str = "", input: str = None, const_limit_project: int = 50, @@ -180,7 +180,7 @@ def __init__( def get_project( self, input: str, just_metadata: bool = True, discard_soft: bool = True - ) -> Dict[peppy.Project, peppy.Project]: + ) -> Dict[str, peppy.Project]: """ Function for fetching projects from GEO|SRA and receiving peppy project :param input: GSE number, or path to file of GSE numbers From 655ade122f6fe13711ebc7669ed347784cb6b7ec Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 26 Aug 2022 16:56:29 -0400 Subject: [PATCH 22/61] added encoding-UTF-8 to requests --- geofetch/geofetch.py | 2 ++ geofetch/utils.py | 1 + 2 files changed, 3 insertions(+) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 0655838..b54eb08 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1541,6 +1541,7 @@ def _get_list_of_processed_files( if not os.path.isfile(filelist_path) or self.refresh_metadata: result = requests.get(tar_files_list_url) if result.ok: + result.encoding = "UTF-8" filelist_raw_text = result.text if not self.discard_soft: try: @@ -1941,6 +1942,7 @@ def _get_SRP_list(self, srp_number: str) -> list: x = requests.post(ncbi_esearch) if x.status_code != 200: + x.encoding = "UTF-8" self._LOGGER.error(f"Error in ncbi esearch response: {x.status_code}") raise x.raise_for_status() id_results = x.json()["esearchresult"]["idlist"] diff --git a/geofetch/utils.py b/geofetch/utils.py index f3f97ff..99735b7 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -202,6 +202,7 @@ def fetch_metadata( result = requests.get(full_url) if result.ok: + result.encoding = "UTF-8" result_text = result.text result_list = result_text.replace("\r", "").split("\n") result_list = [elem for elem in result_list if len(elem) > 0] From 93f84b048e6f5a76e29227a5acf34c130257611c Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 29 Aug 2022 13:35:42 -0400 Subject: [PATCH 23/61] Refactored processed functions --- geofetch/const.py | 2 + geofetch/geofetch.py | 557 +++++++++++++++++++++---------------------- 2 files changed, 279 insertions(+), 280 deletions(-) diff --git a/geofetch/const.py b/geofetch/const.py index 6bc0ff4..aa727d0 100644 --- a/geofetch/const.py +++ b/geofetch/const.py @@ -39,3 +39,5 @@ NCBI_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra&term={SRP_NUMBER}&retmax=999&rettype=uilist&retmode=json" NCBI_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id={ID}&rettype=runinfo&retmode=xml" + +NEW_GENOME_COL_NAME = "ref_genome" \ No newline at end of file diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index b54eb08..49b16ff 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -17,9 +17,9 @@ # import tarfile import time -from ._version import __version__ -from .const import * -from .utils import ( +from _version import __version__ +from const import * +from utils import ( Accession, parse_accessions, parse_SOFT_line, @@ -31,7 +31,7 @@ import logmuse from ubiquerg import expandpath, is_command_callable from io import StringIO -from typing import List, Union, Dict +from typing import List, Union, Dict, Tuple, NoReturn import peppy import pandas as pd @@ -42,38 +42,38 @@ class Geofetcher: """ def __init__( - self, - name: str = "", - metadata_root: str = "", - metadata_folder: str = "", - just_metadata: bool = False, - refresh_metadata: bool = False, - config_template: str = None, - pipeline_samples: str = None, - pipeline_project: str = None, - skip: int = 0, - acc_anno: bool = False, - use_key_subset: bool = False, - processed: bool = False, - data_source: str = "samples", - filter: str = None, - filter_size: str = None, - geo_folder: str = ".", - split_experiments: bool = False, - bam_folder: str = "", - fq_folder: str = "", - sra_folder: str = "", - bam_conversion: bool = False, - picard_path: str = "", - input: str = None, - const_limit_project: int = 50, - const_limit_discard: int = 250, - attr_limit_truncate: int = 500, - discard_soft: bool = False, - add_dotfile: bool = False, - disable_progressbar: bool = False, - opts=None, - **kwargs, + self, + name: str = "", + metadata_root: str = "", + metadata_folder: str = "", + just_metadata: bool = False, + refresh_metadata: bool = False, + config_template: str = None, + pipeline_samples: str = None, + pipeline_project: str = None, + skip: int = 0, + acc_anno: bool = False, + use_key_subset: bool = False, + processed: bool = False, + data_source: str = "samples", + filter: str = None, + filter_size: str = None, + geo_folder: str = ".", + split_experiments: bool = False, + bam_folder: str = "", + fq_folder: str = "", + sra_folder: str = "", + bam_conversion: bool = False, + picard_path: str = "", + input: str = None, + const_limit_project: int = 50, + const_limit_discard: int = 250, + attr_limit_truncate: int = 500, + discard_soft: bool = False, + add_dotfile: bool = False, + disable_progressbar: bool = False, + opts=None, + **kwargs, ): if opts is not None: @@ -179,7 +179,7 @@ def __init__( self.just_object = False def get_project( - self, input: str, just_metadata: bool = True, discard_soft: bool = True + self, input: str, just_metadata: bool = True, discard_soft: bool = True ) -> Dict[str, peppy.Project]: """ Function for fetching projects from GEO|SRA and receiving peppy project @@ -264,15 +264,15 @@ def fetch_all(self, input: str, name: str = None): subannotation_dict = {} failed_runs = [] processed_metadata_samples = [] - processed_metadata_exp = [] + processed_metadata_series = [] acc_GSE_keys = acc_GSE_list.keys() nkeys = len(acc_GSE_keys) ncount = 0 for acc_GSE in track( - acc_GSE_list.keys(), - description="Processing... ", - disable=self.disable_progressbar, + acc_GSE_list.keys(), + description="Processing... ", + disable=self.disable_progressbar, ): ncount += 1 @@ -298,9 +298,6 @@ def fetch_all(self, input: str, name: str = None): f"Limit to: {list(acc_GSE_list[acc_GSE])}" ) # a list of GSM#s - if self.refresh_metadata: - self._LOGGER.info("Refreshing metadata...") - # For each GSE acc, produce a series of metadata files file_gse = os.path.join(self.metadata_expanded, acc_GSE + "_GSE.soft") file_gsm = os.path.join(self.metadata_expanded, acc_GSE + "_GSM.soft") @@ -333,128 +330,26 @@ def fetch_all(self, input: str, name: str = None): # download processed data if self.processed: - # try: - ( - meta_processed_samples, - meta_processed_series, - ) = self._get_list_of_processed_files( - file_gse_content, file_gsm_content - ) - - # taking into account list of GSM that is specified in the input file gsm_list = acc_GSE_list[acc_GSE] - meta_processed_samples = self._filter_gsm( - meta_processed_samples, gsm_list - ) - # Unify keys: - meta_processed_samples = self._unify_list_keys(meta_processed_samples) - meta_processed_series = self._unify_list_keys(meta_processed_series) - - # samples - list_of_keys = self._get_list_of_keys(meta_processed_samples) - self._LOGGER.info("Expanding metadata list...") - for key_in_list in list_of_keys: - meta_processed_samples = self._expand_metadata_list( - meta_processed_samples, key_in_list - ) + meta_processed_samples, meta_processed_series = self.fetch_processed_one(acc_gse=acc_GSE, + gse_file_content=file_gse_content, + gsm_file_content=file_gsm_content, + gsm_filter_list=gsm_list) - # series - list_of_keys_series = self._get_list_of_keys(meta_processed_series) - self._LOGGER.info("Expanding metadata list...") - for key_in_list in list_of_keys_series: - meta_processed_series = self._expand_metadata_list( - meta_processed_series, key_in_list - ) + # download processed files: + if not self.just_metadata: + self._download_processed_data(acc_gse=acc_GSE, meta_processed_samples=meta_processed_samples, + meta_processed_series=meta_processed_series) - # convert column names to lowercase and underscore - meta_processed_samples = self._standardize_colnames( - meta_processed_samples - ) - meta_processed_series = self._standardize_colnames( - meta_processed_series - ) + # generating PEPs for processed files: + if self.acc_anno: + self._generate_processed_meta(acc_GSE, meta_processed_samples, meta_processed_series) - if not self.acc_anno: + else: # adding metadata from current experiment to the project processed_metadata_samples.extend(meta_processed_samples) - processed_metadata_exp.extend(meta_processed_series) - - # save PEP for each accession if acc-anno flag is true - if self.acc_anno and len(acc_GSE_list.keys()) > 1: - if self.supp_by == "all": - # samples - pep_acc_path_sample = os.path.join( - self.metadata_raw, - f"{acc_GSE}_samples", - acc_GSE + SAMPLE_SUPP_METADATA_FILE, - ) - self._write_processed_annotation( - meta_processed_samples, pep_acc_path_sample - ) - - # series - pep_acc_path_exp = os.path.join( - self.metadata_raw, - f"{acc_GSE}_series", - acc_GSE + EXP_SUPP_METADATA_FILE, - ) - self._write_processed_annotation( - meta_processed_series, pep_acc_path_exp - ) - elif self.supp_by == "samples": - pep_acc_path_sample = os.path.join( - self.metadata_raw, - f"{acc_GSE}_samples", - acc_GSE + SAMPLE_SUPP_METADATA_FILE, - ) - self._write_processed_annotation( - meta_processed_samples, pep_acc_path_sample - ) - elif self.supp_by == "series": - pep_acc_path_exp = os.path.join( - self.metadata_raw, - f"{acc_GSE}_series", - acc_GSE + EXP_SUPP_METADATA_FILE, - ) - self._write_processed_annotation( - meta_processed_series, pep_acc_path_exp - ) - - if not self.just_metadata: - data_geo_folder = os.path.join(self.geo_folder, acc_GSE) - self._LOGGER.debug("Data folder: " + data_geo_folder) - - if self.supp_by == "all": - processed_samples_files = [ - each_file["file_url"] - for each_file in meta_processed_samples - ] - for file_url in processed_samples_files: - self._download_processed_file(file_url, data_geo_folder) - - processed_series_files = [ - each_file["file_url"] for each_file in meta_processed_series - ] - for file_url in processed_series_files: - self._download_processed_file(file_url, data_geo_folder) + processed_metadata_series.extend(meta_processed_series) - elif self.supp_by == "samples": - processed_samples_files = [ - each_file["file_url"] - for each_file in meta_processed_samples - ] - for file_url in processed_samples_files: - self._download_processed_file(file_url, data_geo_folder) - - elif self.supp_by == "series": - processed_series_files = [ - each_file["file_url"] for each_file in meta_processed_series - ] - for file_url in processed_series_files: - self._download_processed_file(file_url, data_geo_folder) - # except Exception as processed_exception: - # failed_runs.append(acc_GSE) - # self._LOGGER.warning(f"Error occurred: {processed_exception}") else: # download gsm metadata @@ -622,7 +517,11 @@ def fetch_all(self, input: str, name: str = None): # accumulate subannotations subannotation_dict[acc_GSE] = gsm_multi_table - # Logging additional information about processing + if len(failed_runs) > 0: + self._LOGGER.warn( + f"The following samples could not be downloaded: {failed_runs}" + ) + self._LOGGER.info(f"Finished processing {len(acc_GSE_list)} accession(s)") # Logging cleaning process: @@ -630,67 +529,16 @@ def fetch_all(self, input: str, name: str = None): self._LOGGER.info(f"Cleaning soft files ...") clean_soft_files(self.metadata_raw) - if len(failed_runs) > 0: - self._LOGGER.warn( - f"The following samples could not be downloaded: {failed_runs}" - ) - ####################################################################################### # saving PEPs for processed data if self.processed: if not self.acc_anno: - if self.supp_by == "all": - supp_sample_path_meta = os.path.join( - self.metadata_raw, - "PEP_samples", - self.project_name + SAMPLE_SUPP_METADATA_FILE, - ) - peppy_obj_samples = self._write_processed_annotation( - processed_metadata_samples, supp_sample_path_meta - ) - - supp_series_path_meta = os.path.join( - self.metadata_raw, - "PEP_series", - self.project_name + EXP_SUPP_METADATA_FILE, - ) - peppy_obj_series = self._write_processed_annotation( - processed_metadata_exp, supp_series_path_meta - ) - - if self.just_object: - return { - f"{name}_samples": peppy_obj_samples, - f"{name}_series": peppy_obj_series, - } - - elif self.supp_by == "samples": - supp_sample_path_meta = os.path.join( - self.metadata_raw, - "PEP_samples", - self.project_name + SAMPLE_SUPP_METADATA_FILE, - ) - peppy_obj = self._write_processed_annotation( - processed_metadata_samples, supp_sample_path_meta - ) - if self.just_object: - return {f"{name}_samples": peppy_obj} - - elif self.supp_by == "series": - supp_series_path_meta = os.path.join( - self.metadata_raw, - "PEP_series", - self.project_name + EXP_SUPP_METADATA_FILE, - ) - peppy_obj = self._write_processed_annotation( - processed_metadata_exp, supp_series_path_meta - ) - if self.just_object: - return {f"{name}_PEP_series": peppy_obj} - - else: - return None + return_value = self._generate_processed_meta(name="PEP_processed", + meta_processed_samples=processed_metadata_samples, + meta_processed_series=processed_metadata_series) + if self.just_object: + return return_value # saving PEPs for raw data else: @@ -698,14 +546,159 @@ def fetch_all(self, input: str, name: str = None): if self.just_object: return return_value - def _expand_metadata_list(self, metadata_list, dict_key): + def fetch_processed_one(self, acc_gse: str, gse_file_content: list, gsm_file_content: list, + gsm_filter_list: dict) -> Tuple: + """ + Fetching just one processed GSE project + :param acc_gse: GSE number + :param gsm_file_content: gse soft file content + :param gse_file_content: gsm soft file content + :param gsm_filter_list: list of gsm that have to be downloaded + :return: Tuple of project list of gsm samples and gse samples + """ + ( + meta_processed_samples, + meta_processed_series, + ) = self._get_list_of_processed_files( + gse_file_content, gsm_file_content + ) + + # taking into account list of GSM that is specified in the input file + meta_processed_samples = self._filter_gsm( + meta_processed_samples, gsm_filter_list + ) + # # Unify keys: -- probably we don't need this + # meta_processed_samples = self._unify_list_keys(meta_processed_samples) + # meta_processed_series = self._unify_list_keys(meta_processed_series) + + # samples + meta_processed_samples = self._expand_metadata_list(meta_processed_samples) + + # series + meta_processed_series = self._expand_metadata_list(meta_processed_series) + + # convert column names to lowercase and underscore + meta_processed_samples = self._standardize_colnames( + meta_processed_samples + ) + meta_processed_series = self._standardize_colnames( + meta_processed_series + ) + + return meta_processed_samples, meta_processed_series + + + def _generate_processed_meta(self, name: str, meta_processed_samples: list, meta_processed_series: list) -> dict: + """ + Generate and save PEPs for processed accessions. GEO has data in GSE and GSM, + conditions are used to decide which PEPs have to be saved. + :param name: name of the folder/file where PEP will be saved + :param meta_processed_samples: + :param meta_processed_series: + :return: dict of objects if just_object is set, otherwise dicts of None """ - Expanding list items in the list by creating new items or joining them + return_objects = {f"{name}_samples": None, + f"{name}_series": None} + + if self.supp_by == "all": + # samples + pep_acc_path_sample = os.path.join( + self.metadata_raw, + f"{name}_samples", + name + SAMPLE_SUPP_METADATA_FILE, + ) + return_objects[f"{name}_samples"] = self._write_processed_annotation( + meta_processed_samples, pep_acc_path_sample, just_object=self.just_object, + ) + + # series + pep_acc_path_exp = os.path.join( + self.metadata_raw, + f"{name}_series", + name + EXP_SUPP_METADATA_FILE, + ) + return_objects[f"{name}_series"] = self._write_processed_annotation( + meta_processed_series, pep_acc_path_exp, just_object=self.just_object, + ) + + elif self.supp_by == "samples": + pep_acc_path_sample = os.path.join( + self.metadata_raw, + f"{name}_samples", + name + SAMPLE_SUPP_METADATA_FILE, + ) + return_objects[f"{name}_samples"] = self._write_processed_annotation( + meta_processed_samples, pep_acc_path_sample, just_object=self.just_object, + ) + elif self.supp_by == "series": + return_objects[f"{name}_series"] = pep_acc_path_exp = os.path.join( + self.metadata_raw, + f"{name}_series", + name + EXP_SUPP_METADATA_FILE, + ) + self._write_processed_annotation( + meta_processed_series, pep_acc_path_exp, just_object=self.just_object, + ) + + return return_objects + + def _download_processed_data(self, acc_gse: str, meta_processed_samples: list, + meta_processed_series: list) -> NoReturn: + data_geo_folder = os.path.join(self.geo_folder, acc_gse) + self._LOGGER.debug("Data folder: " + data_geo_folder) + + if self.supp_by == "all": + processed_samples_files = [ + each_file["file_url"] + for each_file in meta_processed_samples + ] + for file_url in processed_samples_files: + self._download_processed_file(file_url, data_geo_folder) + + processed_series_files = [ + each_file["file_url"] for each_file in meta_processed_series + ] + for file_url in processed_series_files: + self._download_processed_file(file_url, data_geo_folder) + + elif self.supp_by == "samples": + processed_samples_files = [ + each_file["file_url"] + for each_file in meta_processed_samples + ] + for file_url in processed_samples_files: + self._download_processed_file(file_url, data_geo_folder) + + elif self.supp_by == "series": + processed_series_files = [ + each_file["file_url"] for each_file in meta_processed_series + ] + for file_url in processed_series_files: + self._download_processed_file(file_url, data_geo_folder) + + def _expand_metadata_list(self, metadata_list: list) -> list: + """ + Expanding all lists of all items in the list by creating new items or joining them :param list metadata_list: list of dicts that store metadata - :param str dict_key: key in the dictionaries that have to be expanded + :return list: expanded metadata list + """ + self._LOGGER.info("Expanding metadata list...") + list_of_keys = self._get_list_of_keys(metadata_list) + for key_in_list in list_of_keys: + metadata_list = self._expand_metadata_list_item( + metadata_list, key_in_list + ) + return metadata_list - :return str: path to file written + def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): + """ + Expanding list of one element (item) in the list by creating new items or joining them + ["first1: fff", ...] -> separate columns + + :param list metadata_list: list of dicts that store metadata + :param str dict_key: key in the dictionaries that have to be expanded + :return list: expanded metadata list """ try: element_is_list = any( @@ -760,14 +753,14 @@ def _expand_metadata_list(self, metadata_list, dict_key): return metadata_list else: self._LOGGER.debug( - "metadata with %s was not expanded, as item is not list" % dict_key + f"Metadata with {dict_key} was not expanded, as item is not list" ) return metadata_list except KeyError as err: - self._LOGGER.warning("Key Error: %s" % err) + self._LOGGER.warning(f"expand_metadata_list: Key Error: {err}") return metadata_list - except ValueError as err1: - self._LOGGER.warning("Value Error: %s" % err1) + except ValueError as err: + self._LOGGER.warning("expand_metadata_list: Value Error: {err}") return metadata_list def _filter_gsm(self, meta_processed_samples: list, gsm_list: dict) -> list: @@ -800,7 +793,6 @@ def _get_list_of_keys(list_of_dict): Getting list of all keys that are in the dictionaries in the list :param list list_of_dict: list of dicts with metadata - :return list: list of dictionary keys """ @@ -815,7 +807,6 @@ def _unify_list_keys(self, processed_meta_list): same keys :param list processed_meta_list: list of dicts with metadata - :return str: list of unified dicts with metadata """ list_of_keys = self._get_list_of_keys(processed_meta_list) @@ -840,11 +831,11 @@ def _find_genome(self, metadata_list): sample_genome = "" for key in proj_gen_keys: sample_genome = " ".join([sample_genome, sample[1][key]]) - metadata_list[sample[0]]["sample_genome"] = sample_genome + metadata_list[sample[0]][NEW_GENOME_COL_NAME] = sample_genome return metadata_list def _write_gsm_annotation( - self, gsm_metadata, file_annotation, use_key_subset=False + self, gsm_metadata, file_annotation, use_key_subset=False ): """ Write metadata sheet out as an annotation file. @@ -873,11 +864,13 @@ def _write_gsm_annotation( self._LOGGER.info("\033[92mFile has been saved successfully\033[0m") return fp - def _write_processed_annotation(self, processed_metadata, file_annotation_path): + def _write_processed_annotation(self, processed_metadata: list, file_annotation_path: str, just_object: bool = False) -> Union[NoReturn, peppy.Project]: """ Saving annotation file by providing list of dictionaries with files metadata :param list processed_metadata: list of dictionaries with files metadata :param str file_annotation_path: the path to the metadata file that has to be saved + :type just_object: True, if you want to get peppy object without saving file + :return: """ if len(processed_metadata) == 0: self._LOGGER.info( @@ -928,7 +921,7 @@ def _write_processed_annotation(self, processed_metadata, file_annotation_path): placeholder = "{" + str(k) + "}" template = template.replace(placeholder, str(v)) - if not self.just_object: + if not just_object: with open(file_annotation_path, "w") as m_file: dict_writer = csv.DictWriter(m_file, processed_metadata[0].keys()) dict_writer.writeheader() @@ -997,8 +990,8 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): for key_sample, value_sample in value.items(): fixed_dict[key_sample] = value_sample if ( - value_sample["sample_name"] == "" - or value_sample["sample_name"] is None + value_sample["sample_name"] == "" + or value_sample["sample_name"] is None ): fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] # TODO: should be corrected: @@ -1147,11 +1140,11 @@ def _create_dot_yaml(file_path: str, yaml_path: str): file.writelines(f"config_file: {yaml_path}") def _separate_common_meta( - self, - meta_list: Union[List, Dict], - max_len: int = 50, - del_limit: int = 250, - attr_limit_truncate: int = 500, + self, + meta_list: Union[List, Dict], + max_len: int = 50, + del_limit: int = 250, + attr_limit_truncate: int = 500, ): """ This function is separating information for the experiment from a sample @@ -1259,7 +1252,7 @@ def _standardize_colnames(self, meta_list): @staticmethod def _dict_to_list_convector( - proj_dict: Dict = None, proj_list: List = None + proj_dict: Dict = None, proj_list: List = None ) -> Union[Dict, List]: """ Convector project dict to list and vice versa @@ -1350,10 +1343,10 @@ def _sra_bam_conversion(self, bam_file, run_name): # The -u here allows unaligned reads, and seems to be # required for some sra files regardless of aligned state cmd = ( - "sam-dump -u " - + os.path.join(self.sra_folder, run_name + ".sra") - + " | samtools view -bS - > " - + bam_file + "sam-dump -u " + + os.path.join(self.sra_folder, run_name + ".sra") + + " | samtools view -bS - > " + + bam_file ) # sam-dump -u SRR020515.sra | samtools view -bS - > test.bam @@ -1416,10 +1409,10 @@ def _sra_bam_conversion2(self, bam_file, run_name, picard_path=None): # check to make sure it worked cmd = ( - "fastq-dump --split-3 -O " - + os.path.realpath(self.sra_folder) - + " " - + os.path.join(self.sra_folder, run_name + ".sra") + "fastq-dump --split-3 -O " + + os.path.realpath(self.sra_folder) + + " " + + os.path.join(self.sra_folder, run_name + ".sra") ) self._LOGGER.info(f"Command: {cmd}") run_subprocess(cmd, shell=True) @@ -1500,7 +1493,7 @@ def _download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): self._LOGGER.info(f"\033[38;5;242mFile {full_filepath} exists.\033[0m") def _get_list_of_processed_files( - self, file_gse_content: list, file_gsm_content: list + self, file_gse_content: list, file_gsm_content: list ): """ Given a paths to GSE and GSM metafile create a list of dicts of metadata of processed files @@ -1530,7 +1523,7 @@ def _get_list_of_processed_files( # find and download filelist - file with information about files in tar index = file_url.rfind("/") tar_files_list_url = ( - "https" + file_url[3 : index + 1] + "filelist.txt" + "https" + file_url[3: index + 1] + "filelist.txt" ) # file_list_name filelist_path = os.path.join( @@ -1575,14 +1568,14 @@ def _get_list_of_processed_files( element_values = list(pl.values())[0] if not re.findall(SUPP_FILE_PATTERN, line_gsm): if ( - element_keys - not in meta_processed_samples[nb].keys() + element_keys + not in meta_processed_samples[nb].keys() ): meta_processed_samples[nb].update(pl) else: if ( - type(meta_processed_samples[nb][element_keys]) - is not list + type(meta_processed_samples[nb][element_keys]) + is not list ): meta_processed_samples[nb][element_keys] = [ meta_processed_samples[nb][element_keys] @@ -1792,7 +1785,11 @@ def _read_tar_filelist(raw_text: str): return files_info @staticmethod - def _get_value(all_line): + def _get_value(all_line: str): + """ + :param all_line: string with key value. (e.g. '!Series_geo_accession = GSE188720') + :return: value (e.g. GSE188720) + """ line_value = all_line.split("= ")[-1] return line_value.split(": ")[-1].rstrip("\n") @@ -1948,7 +1945,7 @@ def _get_SRP_list(self, srp_number: str) -> list: id_results = x.json()["esearchresult"]["idlist"] if len(id_results) > 500: id_results = [ - id_results[x : x + 100] for x in range(0, len(id_results), 100) + id_results[x: x + 100] for x in range(0, len(id_results), 100) ] else: id_results = [id_results] @@ -1994,8 +1991,8 @@ def _get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): if line[0] == "^": pl = parse_SOFT_line(line) if ( - len(acc_GSE_list[acc_GSE]) > 0 - and pl["SAMPLE"] not in GSM_limit_list + len(acc_GSE_list[acc_GSE]) > 0 + and pl["SAMPLE"] not in GSM_limit_list ): # sys.stdout.write(" Skipping " + a['SAMPLE'] + ".") current_sample_id = None @@ -2082,16 +2079,16 @@ def _parse_cmdl(cmdl): dest="metadata_root", default=safe_echo("SRAMETA"), help="Specify a parent folder location to store metadata. " - "The project name will be added as a subfolder " - "[Default: $SRAMETA:" + safe_echo("SRAMETA") + "]", + "The project name will be added as a subfolder " + "[Default: $SRAMETA:" + safe_echo("SRAMETA") + "]", ) parser.add_argument( "-u", "--metadata-folder", help="Specify an absolute folder location to store metadata. " - "No subfolder will be added. Overrides value of --metadata-root " - "[Default: Not used (--metadata-root is used by default)]", + "No subfolder will be added. Overrides value of --metadata-root " + "[Default: Not used (--metadata-root is used by default)]", ) parser.add_argument( @@ -2116,8 +2113,8 @@ def _parse_cmdl(cmdl): "--pipeline-samples", default=None, help="Optional: Specify one or more filepaths to SAMPLES pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", ) # Optional @@ -2125,8 +2122,8 @@ def _parse_cmdl(cmdl): "--pipeline-project", default=None, help="Optional: Specify one or more filepaths to PROJECT pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", ) # Optional parser.add_argument( @@ -2148,7 +2145,7 @@ def _parse_cmdl(cmdl): "--acc-anno", action="store_true", help="Optional: Produce annotation sheets for each accession." - " Project combined PEP for the whole project won't be produced.", + " Project combined PEP for the whole project won't be produced.", ) parser.add_argument( @@ -2162,7 +2159,7 @@ def _parse_cmdl(cmdl): type=int, default=50, help="Optional: Limit of the number of the constant sample characters " - "that should not be in project yaml. [Default: 50]", + "that should not be in project yaml. [Default: 50]", ) parser.add_argument( @@ -2170,7 +2167,7 @@ def _parse_cmdl(cmdl): type=int, default=250, help="Optional: Limit of the number of the constant sample characters " - "that should not be discarded [Default: 250]", + "that should not be discarded [Default: 250]", ) parser.add_argument( @@ -2178,8 +2175,8 @@ def _parse_cmdl(cmdl): type=int, default=500, help="Optional: Limit of the number of sample characters." - "Any attribute with more than X characters will truncate to the first X," - " where X is a number of characters [Default: 500]", + "Any attribute with more than X characters will truncate to the first X," + " where X is a number of characters [Default: 500]", ) parser.add_argument( @@ -2202,17 +2199,17 @@ def _parse_cmdl(cmdl): choices=["all", "samples", "series"], default="samples", help="Optional: Specifies the source of data on the GEO record" - " to retrieve processed data, which may be attached to the" - " collective series entity, or to individual samples. " - "Allowable values are: samples, series or both (all). " - "Ignored unless 'processed' flag is set. [Default: samples]", + " to retrieve processed data, which may be attached to the" + " collective series entity, or to individual samples. " + "Allowable values are: samples, series or both (all). " + "Ignored unless 'processed' flag is set. [Default: samples]", ) processed_group.add_argument( "--filter", default=None, help="Optional: Filter regex for processed filenames [Default: None]." - "Ignored unless 'processed' flag is set.", + "Ignored unless 'processed' flag is set.", ) processed_group.add_argument( @@ -2231,8 +2228,8 @@ def _parse_cmdl(cmdl): "--geo-folder", default=safe_echo("GEODATA"), help="Optional: Specify a location to store processed GEO files." - " Ignored unless 'processed' flag is set." - "[Default: $GEODATA:" + safe_echo("GEODATA") + "]", + " Ignored unless 'processed' flag is set." + "[Default: $GEODATA:" + safe_echo("GEODATA") + "]", ) raw_group.add_argument( @@ -2254,8 +2251,8 @@ def _parse_cmdl(cmdl): help="""Optional: Specify folder of bam files. Geofetch will not download sra files when corresponding bam files already exist. [Default: $SRABAM:""" - + safe_echo("SRABAM") - + "]", + + safe_echo("SRABAM") + + "]", ) raw_group.add_argument( @@ -2266,8 +2263,8 @@ def _parse_cmdl(cmdl): help="""Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding fastq files already exist. [Default: $SRAFQ:""" - + safe_echo("SRAFQ") - + "]", + + safe_echo("SRAFQ") + + "]", ) # Deprecated; these are for bam conversion which now happens in sra_convert From d940a99d3dbb2278006f737b923a6d24f5a13b41 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 29 Aug 2022 21:57:11 -0400 Subject: [PATCH 24/61] Refactored raw functions 1 --- geofetch/const.py | 2 +- geofetch/geofetch.py | 687 +++++++++++++++++++++-------------------- tests/test_geofetch.py | 12 +- 3 files changed, 354 insertions(+), 347 deletions(-) diff --git a/geofetch/const.py b/geofetch/const.py index aa727d0..01d1e05 100644 --- a/geofetch/const.py +++ b/geofetch/const.py @@ -40,4 +40,4 @@ NCBI_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra&term={SRP_NUMBER}&retmax=999&rettype=uilist&retmode=json" NCBI_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id={ID}&rettype=runinfo&retmode=xml" -NEW_GENOME_COL_NAME = "ref_genome" \ No newline at end of file +NEW_GENOME_COL_NAME = "ref_genome" diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 49b16ff..5fee104 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -17,9 +17,9 @@ # import tarfile import time -from _version import __version__ -from const import * -from utils import ( +from ._version import __version__ +from .const import * +from .utils import ( Accession, parse_accessions, parse_SOFT_line, @@ -42,38 +42,38 @@ class Geofetcher: """ def __init__( - self, - name: str = "", - metadata_root: str = "", - metadata_folder: str = "", - just_metadata: bool = False, - refresh_metadata: bool = False, - config_template: str = None, - pipeline_samples: str = None, - pipeline_project: str = None, - skip: int = 0, - acc_anno: bool = False, - use_key_subset: bool = False, - processed: bool = False, - data_source: str = "samples", - filter: str = None, - filter_size: str = None, - geo_folder: str = ".", - split_experiments: bool = False, - bam_folder: str = "", - fq_folder: str = "", - sra_folder: str = "", - bam_conversion: bool = False, - picard_path: str = "", - input: str = None, - const_limit_project: int = 50, - const_limit_discard: int = 250, - attr_limit_truncate: int = 500, - discard_soft: bool = False, - add_dotfile: bool = False, - disable_progressbar: bool = False, - opts=None, - **kwargs, + self, + name: str = "", + metadata_root: str = "", + metadata_folder: str = "", + just_metadata: bool = False, + refresh_metadata: bool = False, + config_template: str = None, + pipeline_samples: str = None, + pipeline_project: str = None, + skip: int = 0, + acc_anno: bool = False, + use_key_subset: bool = False, + processed: bool = False, + data_source: str = "samples", + filter: str = None, + filter_size: str = None, + geo_folder: str = ".", + split_experiments: bool = False, + bam_folder: str = "", + fq_folder: str = "", + sra_folder: str = "", + bam_conversion: bool = False, + picard_path: str = "", + input: str = None, + const_limit_project: int = 50, + const_limit_discard: int = 250, + attr_limit_truncate: int = 500, + discard_soft: bool = False, + add_dotfile: bool = False, + disable_progressbar: bool = False, + opts=None, + **kwargs, ): if opts is not None: @@ -179,8 +179,8 @@ def __init__( self.just_object = False def get_project( - self, input: str, just_metadata: bool = True, discard_soft: bool = True - ) -> Dict[str, peppy.Project]: + self, input: str, just_metadata: bool = True, discard_soft: bool = True + ) -> dict: """ Function for fetching projects from GEO|SRA and receiving peppy project :param input: GSE number, or path to file of GSE numbers @@ -254,15 +254,9 @@ def fetch_all(self, input: str, name: str = None): input, self.metadata_expanded, self.just_metadata ) - # Loop through each accession. - # This will process that accession, produce metadata and download files for - # the GSM #s included in the list for each GSE#. - # acc_GSE = "GSE61150" # example - - # This loop populates a list of metadata. metadata_dict = {} subannotation_dict = {} - failed_runs = [] + processed_metadata_samples = [] processed_metadata_series = [] @@ -270,9 +264,9 @@ def fetch_all(self, input: str, name: str = None): nkeys = len(acc_GSE_keys) ncount = 0 for acc_GSE in track( - acc_GSE_list.keys(), - description="Processing... ", - disable=self.disable_progressbar, + acc_GSE_list.keys(), + description="Processing... ", + disable=self.disable_progressbar, ): ncount += 1 @@ -303,11 +297,6 @@ def fetch_all(self, input: str, name: str = None): file_gsm = os.path.join(self.metadata_expanded, acc_GSE + "_GSM.soft") file_sra = os.path.join(self.metadata_expanded, acc_GSE + "_SRA.csv") - # Grab the GSE and GSM SOFT files from GEO. - # The GSE file has metadata describing the experiment, which includes - # The SRA number we need to download the raw data from SRA - # The GSM file has metadata describing each sample, which we will use to - # produce a sample annotation sheet. if not os.path.isfile(file_gse) or self.refresh_metadata: file_gse_content = Accession(acc_GSE).fetch_metadata( file_gse, clean=self.discard_soft @@ -328,200 +317,75 @@ def fetch_all(self, input: str, name: str = None): file_gsm_content = gsm_file_obj.read().split("\n") file_gsm_content = [elem for elem in file_gsm_content if len(elem) > 0] + gsm_enter_dict = acc_GSE_list[acc_GSE] + # download processed data if self.processed: - gsm_list = acc_GSE_list[acc_GSE] - meta_processed_samples, meta_processed_series = self.fetch_processed_one(acc_gse=acc_GSE, - gse_file_content=file_gse_content, - gsm_file_content=file_gsm_content, - gsm_filter_list=gsm_list) + ( + meta_processed_samples, + meta_processed_series, + ) = self.fetch_processed_one( + gse_file_content=file_gse_content, + gsm_file_content=file_gsm_content, + gsm_filter_list=gsm_enter_dict, + ) # download processed files: if not self.just_metadata: - self._download_processed_data(acc_gse=acc_GSE, meta_processed_samples=meta_processed_samples, - meta_processed_series=meta_processed_series) + self._download_processed_data( + acc_gse=acc_GSE, + meta_processed_samples=meta_processed_samples, + meta_processed_series=meta_processed_series, + ) # generating PEPs for processed files: if self.acc_anno: - self._generate_processed_meta(acc_GSE, meta_processed_samples, meta_processed_series) + self._generate_processed_meta( + acc_GSE, meta_processed_samples, meta_processed_series + ) else: # adding metadata from current experiment to the project processed_metadata_samples.extend(meta_processed_samples) processed_metadata_series.extend(meta_processed_series) - else: - # download gsm metadata - gsm_metadata = self._get_gsm_metadata( + # read gsm metadata + gsm_metadata = self._read_gsm_metadata( acc_GSE, acc_GSE_list, file_gsm_content ) - metadata_dict[acc_GSE] = gsm_metadata - # download gsm metadata - SRP_list_result = self._get_SRA_meta( + # download sra metadata + srp_list_result = self._get_SRA_meta( file_gse_content, gsm_metadata, file_sra ) - if not SRP_list_result: + if not srp_list_result: + self._LOGGER.info(f"No SRP data, continuing ....") # delete current acc if no raw data was found # del metadata_dict[acc_GSE] continue - # Parse metadata from SRA - # Produce an annotated output from the GSM and SRARunInfo files. - # This will merge the GSM and SRA sample metadata into a dict of dicts, - # with one entry per sample. - # NB: There may be multiple SRA Runs (and thus lines in the RunInfo file) - # Corresponding to each sample. - # For multi samples (samples with multiple runs), we keep track of these - # relations in a separate table, which is called the subannotation table. - - gsm_multi_table = {} - self._LOGGER.info("Parsing SRA file to download SRR records") - - for line in SRP_list_result: - - # Only download if it's in the include list: - experiment = line["Experiment"] - run_name = line["Run"] - if experiment not in gsm_metadata: - # print(f"Skipping: {experiment}") - continue - - # local convenience variable - # possibly set in the input tsv file - sample_name = None # initialize to empty - try: - sample_name = acc_GSE_list[acc_GSE][ - gsm_metadata[experiment]["gsm_id"] - ] - except KeyError: - self._LOGGER.info( - f"sample_name does not exist, creating new..." - ) - if not sample_name or sample_name == "": - temp = gsm_metadata[experiment]["Sample_title"] - sample_name = self._sanitize_name(temp) - - # Otherwise, record that there's SRA data for this run. - # And set a few columns that are used as input to the Looper - # print("Updating columns for looper") - self._update_columns( - gsm_metadata, - experiment, - sample_name=sample_name, - read_type=line["LibraryLayout"], - ) - - # Some experiments are flagged in SRA as having multiple runs. - if gsm_metadata[experiment].get("SRR") is not None: - # This SRX number already has an entry in the table. - self._LOGGER.info( - f"Found additional run: {run_name} ({experiment})" - ) - - if ( - isinstance(gsm_metadata[experiment]["SRR"], str) - and experiment not in gsm_multi_table - ): - # Only one has been stuck in so far, make a list - gsm_multi_table[experiment] = [] - # Add first the original one, which was stored as a string - # previously - gsm_multi_table[experiment].append( - [ - sample_name, - experiment, - gsm_metadata[experiment]["SRR"], - ] - ) - # Now append the current SRR number in a list as [SRX, SRR] - gsm_multi_table[experiment].append( - [sample_name, experiment, run_name] - ) - else: - # this is the 3rd or later sample; the first two are done, - # so just add it. - gsm_multi_table[experiment].append( - [sample_name, experiment, run_name] - ) - - if self.split_experiments: - # Duplicate the gsm metadata for this experiment (copy to make sure - # it's not just an alias). - rep_number = len(gsm_multi_table[experiment]) - new_SRX = experiment + "_" + str(rep_number) - gsm_metadata[new_SRX] = copy.copy(gsm_metadata[experiment]) - # gsm_metadata[new_SRX]["SRX"] = new_SRX - gsm_metadata[new_SRX]["sample_name"] += "_" + str( - rep_number - ) - gsm_metadata[new_SRX]["SRR"] = run_name - else: - # Either way, set the srr code to multi in the main table. - gsm_metadata[experiment]["SRR"] = "multi" - else: - # The first SRR for this SRX is added to GSM metadata - gsm_metadata[experiment]["SRR"] = run_name - - self._LOGGER.info(f"Getting SRR: {run_name} ({experiment})") - bam_file = ( - "" - if self.bam_folder == "" - else os.path.join(self.bam_folder, run_name + ".bam") - ) - fq_file = ( - "" - if self.fq_folder == "" - else os.path.join(self.fq_folder, run_name + "_1.fq") + else: + self._LOGGER.info("Parsing SRA file to download SRR records") + gsm_multi_table = self._process_sra_meta( + srp_list_result, gsm_enter_dict, gsm_metadata ) - # TODO: sam-dump has a built-in prefetch. I don't have to do - # any of this stuff... This also solves the bad sam-dump issues. - - if os.path.exists(bam_file): - self._LOGGER.info(f"BAM found: {bam_file} . Skipping...") - elif os.path.exists(fq_file): - self._LOGGER.info(f"FQ found: {fq_file} .Skipping...") - else: - if not self.just_metadata: - try: - self._download_SRA_file(run_name) - except Exception as err: - failed_runs.append(run_name) - self._LOGGER.warning( - f"Error occurred while downloading SRA file: {err}" - ) - else: - self._LOGGER.info("Dry run (no raw data will be download)") - - if self.bam_conversion and self.bam_folder != "": - try: - # converting sra to bam using - self._sra_bam_conversion(bam_file, run_name) - - # checking if bam_file converted correctly, if not --> use fastq-dump - st = os.stat(bam_file) - if st.st_size < 100: - self._LOGGER.warning( - "Bam conversion failed with sam-dump. Trying fastq-dump..." - ) - self._sra_bam_conversion2( - bam_file, run_name, self.picard_path - ) - - except FileNotFoundError as err: + # download raw data: + if not self.just_metadata: + for file_key in gsm_multi_table.keys(): + for run in gsm_multi_table[file_key]: + # download raw data self._LOGGER.info( - f"SRA file doesn't exist, please download it first: {err}" + f"Getting SRR: {run[2]} in ({acc_GSE})" ) + self._download_raw_data(run[2]) + else: + self._LOGGER.info(f"Dry run, no data will be downloaded") # accumulate subannotations + metadata_dict[acc_GSE] = gsm_metadata subannotation_dict[acc_GSE] = gsm_multi_table - if len(failed_runs) > 0: - self._LOGGER.warn( - f"The following samples could not be downloaded: {failed_runs}" - ) - self._LOGGER.info(f"Finished processing {len(acc_GSE_list)} accession(s)") # Logging cleaning process: @@ -534,9 +398,11 @@ def fetch_all(self, input: str, name: str = None): # saving PEPs for processed data if self.processed: if not self.acc_anno: - return_value = self._generate_processed_meta(name="PEP_processed", - meta_processed_samples=processed_metadata_samples, - meta_processed_series=processed_metadata_series) + return_value = self._generate_processed_meta( + name="PEP_processed", + meta_processed_samples=processed_metadata_samples, + meta_processed_series=processed_metadata_series, + ) if self.just_object: return return_value @@ -546,11 +412,130 @@ def fetch_all(self, input: str, name: str = None): if self.just_object: return return_value - def fetch_processed_one(self, acc_gse: str, gse_file_content: list, gsm_file_content: list, - gsm_filter_list: dict) -> Tuple: + def _process_sra_meta(self, srp_list_result, gsm_enter_dict, gsm_metadata): + gsm_multi_table = {} + for line in srp_list_result: + + # Only download if it's in the include list: + experiment = line["Experiment"] + run_name = line["Run"] + if experiment not in gsm_metadata: + # print(f"Skipping: {experiment}") + continue + + sample_name = None + try: + sample_name = gsm_enter_dict[gsm_metadata[experiment]["gsm_id"]] + except KeyError: + # No name in input file + pass + + if not sample_name or sample_name == "": + temp = gsm_metadata[experiment]["Sample_title"] + sample_name = self._sanitize_name(temp) + + # Otherwise, record that there's SRA data for this run. + # And set a few columns that are used as input to the Looper + # print("Updating columns for looper") + self._update_columns( + gsm_metadata, + experiment, + sample_name=sample_name, + read_type=line["LibraryLayout"], + ) + + # Some experiments are flagged in SRA as having multiple runs. + if gsm_metadata[experiment].get("SRR") is not None: + # This SRX number already has an entry in the table. + self._LOGGER.debug(f"Found additional run: {run_name} ({experiment})") + if ( + isinstance(gsm_metadata[experiment]["SRR"], str) + and experiment not in gsm_multi_table + ): + gsm_multi_table[experiment] = [] + + gsm_multi_table[experiment].append( + [ + sample_name, + experiment, + gsm_metadata[experiment]["SRR"], + ] + ) + gsm_multi_table[experiment].append( + [sample_name, experiment, run_name] + ) + else: + gsm_multi_table[experiment].append( + [sample_name, experiment, run_name] + ) + + if self.split_experiments: + rep_number = len(gsm_multi_table[experiment]) + new_SRX = experiment + "_" + str(rep_number) + gsm_metadata[new_SRX] = copy.copy(gsm_metadata[experiment]) + # gsm_metadata[new_SRX]["SRX"] = new_SRX + gsm_metadata[new_SRX]["sample_name"] += "_" + str(rep_number) + gsm_metadata[new_SRX]["SRR"] = run_name + else: + # Either way, set the srr code to multi in the main table. + gsm_metadata[experiment]["SRR"] = "multi" + else: + # The first SRR for this SRX is added to GSM metadata + gsm_metadata[experiment]["SRR"] = run_name + return gsm_multi_table + + def _download_raw_data(self, run_name): + bam_file = ( + "" + if self.bam_folder == "" + else os.path.join(self.bam_folder, run_name + ".bam") + ) + fq_file = ( + "" + if self.fq_folder == "" + else os.path.join(self.fq_folder, run_name + "_1.fq") + ) + + if os.path.exists(bam_file): + self._LOGGER.info(f"BAM found: {bam_file} . Skipping...") + elif os.path.exists(fq_file): + self._LOGGER.info(f"FQ found: {fq_file} .Skipping...") + else: + try: + self._download_SRA_file(run_name) + except Exception as err: + self._LOGGER.warning( + f"Error occurred while downloading SRA file: {err}" + ) + + if self.bam_conversion and self.bam_folder != "": + try: + # converting sra to bam using + # TODO: sam-dump has a built-in prefetch. I don't have to do + # any of this stuff... This also solves the bad sam-dump issues. + self._sra_bam_conversion(bam_file, run_name) + + # checking if bam_file converted correctly, if not --> use fastq-dump + st = os.stat(bam_file) + if st.st_size < 100: + self._LOGGER.warning( + "Bam conversion failed with sam-dump. Trying fastq-dump..." + ) + self._sra_bam_conversion2(bam_file, run_name, self.picard_path) + + except FileNotFoundError as err: + self._LOGGER.info( + f"SRA file doesn't exist, please download it first: {err}" + ) + + def fetch_processed_one( + self, + gse_file_content: list, + gsm_file_content: list, + gsm_filter_list: dict, + ) -> Tuple: """ Fetching just one processed GSE project - :param acc_gse: GSE number :param gsm_file_content: gse soft file content :param gse_file_content: gsm soft file content :param gsm_filter_list: list of gsm that have to be downloaded @@ -559,17 +544,12 @@ def fetch_processed_one(self, acc_gse: str, gse_file_content: list, gsm_file_con ( meta_processed_samples, meta_processed_series, - ) = self._get_list_of_processed_files( - gse_file_content, gsm_file_content - ) + ) = self._get_list_of_processed_files(gse_file_content, gsm_file_content) # taking into account list of GSM that is specified in the input file meta_processed_samples = self._filter_gsm( meta_processed_samples, gsm_filter_list ) - # # Unify keys: -- probably we don't need this - # meta_processed_samples = self._unify_list_keys(meta_processed_samples) - # meta_processed_series = self._unify_list_keys(meta_processed_series) # samples meta_processed_samples = self._expand_metadata_list(meta_processed_samples) @@ -578,17 +558,14 @@ def fetch_processed_one(self, acc_gse: str, gse_file_content: list, gsm_file_con meta_processed_series = self._expand_metadata_list(meta_processed_series) # convert column names to lowercase and underscore - meta_processed_samples = self._standardize_colnames( - meta_processed_samples - ) - meta_processed_series = self._standardize_colnames( - meta_processed_series - ) + meta_processed_samples = self._standardize_colnames(meta_processed_samples) + meta_processed_series = self._standardize_colnames(meta_processed_series) return meta_processed_samples, meta_processed_series - - def _generate_processed_meta(self, name: str, meta_processed_samples: list, meta_processed_series: list) -> dict: + def _generate_processed_meta( + self, name: str, meta_processed_samples: list, meta_processed_series: list + ) -> dict: """ Generate and save PEPs for processed accessions. GEO has data in GSE and GSM, conditions are used to decide which PEPs have to be saved. @@ -597,8 +574,7 @@ def _generate_processed_meta(self, name: str, meta_processed_samples: list, meta :param meta_processed_series: :return: dict of objects if just_object is set, otherwise dicts of None """ - return_objects = {f"{name}_samples": None, - f"{name}_series": None} + return_objects = {f"{name}_samples": None, f"{name}_series": None} if self.supp_by == "all": # samples @@ -608,7 +584,9 @@ def _generate_processed_meta(self, name: str, meta_processed_samples: list, meta name + SAMPLE_SUPP_METADATA_FILE, ) return_objects[f"{name}_samples"] = self._write_processed_annotation( - meta_processed_samples, pep_acc_path_sample, just_object=self.just_object, + meta_processed_samples, + pep_acc_path_sample, + just_object=self.just_object, ) # series @@ -618,7 +596,9 @@ def _generate_processed_meta(self, name: str, meta_processed_samples: list, meta name + EXP_SUPP_METADATA_FILE, ) return_objects[f"{name}_series"] = self._write_processed_annotation( - meta_processed_series, pep_acc_path_exp, just_object=self.just_object, + meta_processed_series, + pep_acc_path_exp, + just_object=self.just_object, ) elif self.supp_by == "samples": @@ -628,7 +608,9 @@ def _generate_processed_meta(self, name: str, meta_processed_samples: list, meta name + SAMPLE_SUPP_METADATA_FILE, ) return_objects[f"{name}_samples"] = self._write_processed_annotation( - meta_processed_samples, pep_acc_path_sample, just_object=self.just_object, + meta_processed_samples, + pep_acc_path_sample, + just_object=self.just_object, ) elif self.supp_by == "series": return_objects[f"{name}_series"] = pep_acc_path_exp = os.path.join( @@ -637,20 +619,22 @@ def _generate_processed_meta(self, name: str, meta_processed_samples: list, meta name + EXP_SUPP_METADATA_FILE, ) self._write_processed_annotation( - meta_processed_series, pep_acc_path_exp, just_object=self.just_object, + meta_processed_series, + pep_acc_path_exp, + just_object=self.just_object, ) return return_objects - def _download_processed_data(self, acc_gse: str, meta_processed_samples: list, - meta_processed_series: list) -> NoReturn: + def _download_processed_data( + self, acc_gse: str, meta_processed_samples: list, meta_processed_series: list + ) -> NoReturn: data_geo_folder = os.path.join(self.geo_folder, acc_gse) self._LOGGER.debug("Data folder: " + data_geo_folder) if self.supp_by == "all": processed_samples_files = [ - each_file["file_url"] - for each_file in meta_processed_samples + each_file["file_url"] for each_file in meta_processed_samples ] for file_url in processed_samples_files: self._download_processed_file(file_url, data_geo_folder) @@ -663,8 +647,7 @@ def _download_processed_data(self, acc_gse: str, meta_processed_samples: list, elif self.supp_by == "samples": processed_samples_files = [ - each_file["file_url"] - for each_file in meta_processed_samples + each_file["file_url"] for each_file in meta_processed_samples ] for file_url in processed_samples_files: self._download_processed_file(file_url, data_geo_folder) @@ -676,6 +659,11 @@ def _download_processed_data(self, acc_gse: str, meta_processed_samples: list, for file_url in processed_series_files: self._download_processed_file(file_url, data_geo_folder) + def _expand_metadata_list_in_dict(self, metadata_dict: dict) -> dict: + prj_list = self._dict_to_list_convector(proj_dict=metadata_dict) + prj_list = self._expand_metadata_list(prj_list) + return self._dict_to_list_convector(proj_list=prj_list) + def _expand_metadata_list(self, metadata_list: list) -> list: """ Expanding all lists of all items in the list by creating new items or joining them @@ -686,9 +674,7 @@ def _expand_metadata_list(self, metadata_list: list) -> list: self._LOGGER.info("Expanding metadata list...") list_of_keys = self._get_list_of_keys(metadata_list) for key_in_list in list_of_keys: - metadata_list = self._expand_metadata_list_item( - metadata_list, key_in_list - ) + metadata_list = self._expand_metadata_list_item(metadata_list, key_in_list) return metadata_list def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): @@ -706,49 +692,52 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): ) if element_is_list: for n_elem in range(len(metadata_list)): - if type(metadata_list[n_elem][dict_key]) is not list: - metadata_list[n_elem][dict_key] = [ - metadata_list[n_elem][dict_key] - ] - - just_string = False - this_string = "" - for elem in metadata_list[n_elem][dict_key]: - separated_elements = elem.split(": ") - if len(separated_elements) >= 2: - - # if first element is larger than 40 then treat it like simple string - if len(separated_elements[0]) > 40: - just_string = True - if this_string != "": - this_string = ", ".join([this_string, elem]) + try: + if type(metadata_list[n_elem][dict_key]) is not list: + metadata_list[n_elem][dict_key] = [ + metadata_list[n_elem][dict_key] + ] + + just_string = False + this_string = "" + for elem in metadata_list[n_elem][dict_key]: + separated_elements = elem.split(": ") + if len(separated_elements) >= 2: + + # if first element is larger than 40 then treat it like simple string + if len(separated_elements[0]) > 40: + just_string = True + if this_string != "": + this_string = ", ".join([this_string, elem]) + else: + this_string = elem + # additional elem for all bed files + elif len(separated_elements[0].split("(")) > 1: + just_string = True + if this_string != "": + this_string = "(".join([this_string, elem]) + else: + this_string = elem else: - this_string = elem - # additional elem for all bed files - elif len(separated_elements[0].split("(")) > 1: + list_of_elem = [ + separated_elements[0], + ": ".join(separated_elements[1:]), + ] + sample_char = dict([list_of_elem]) + metadata_list[n_elem].update(sample_char) + else: just_string = True if this_string != "": - this_string = "(".join([this_string, elem]) + this_string = ", ".join([this_string, elem]) else: this_string = elem - else: - list_of_elem = [ - separated_elements[0], - ": ".join(separated_elements[1:]), - ] - sample_char = dict([list_of_elem]) - metadata_list[n_elem].update(sample_char) - else: - just_string = True - if this_string != "": - this_string = ", ".join([this_string, elem]) - else: - this_string = elem - if just_string: - metadata_list[n_elem][dict_key] = this_string - else: - del metadata_list[n_elem][dict_key] + if just_string: + metadata_list[n_elem][dict_key] = this_string + else: + del metadata_list[n_elem][dict_key] + except KeyError as err: + self._LOGGER.warning(f"expand_metadata_list: Key Error: {err}") return metadata_list else: @@ -835,7 +824,7 @@ def _find_genome(self, metadata_list): return metadata_list def _write_gsm_annotation( - self, gsm_metadata, file_annotation, use_key_subset=False + self, gsm_metadata, file_annotation, use_key_subset=False ): """ Write metadata sheet out as an annotation file. @@ -864,7 +853,12 @@ def _write_gsm_annotation( self._LOGGER.info("\033[92mFile has been saved successfully\033[0m") return fp - def _write_processed_annotation(self, processed_metadata: list, file_annotation_path: str, just_object: bool = False) -> Union[NoReturn, peppy.Project]: + def _write_processed_annotation( + self, + processed_metadata: list, + file_annotation_path: str, + just_object: bool = False, + ) -> Union[NoReturn, peppy.Project]: """ Saving annotation file by providing list of dictionaries with files metadata :param list processed_metadata: list of dictionaries with files metadata @@ -990,8 +984,8 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): for key_sample, value_sample in value.items(): fixed_dict[key_sample] = value_sample if ( - value_sample["sample_name"] == "" - or value_sample["sample_name"] is None + value_sample["sample_name"] == "" + or value_sample["sample_name"] is None ): fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] # TODO: should be corrected: @@ -1140,11 +1134,11 @@ def _create_dot_yaml(file_path: str, yaml_path: str): file.writelines(f"config_file: {yaml_path}") def _separate_common_meta( - self, - meta_list: Union[List, Dict], - max_len: int = 50, - del_limit: int = 250, - attr_limit_truncate: int = 500, + self, + meta_list: Union[List, Dict], + max_len: int = 50, + del_limit: int = 250, + attr_limit_truncate: int = 500, ): """ This function is separating information for the experiment from a sample @@ -1252,7 +1246,7 @@ def _standardize_colnames(self, meta_list): @staticmethod def _dict_to_list_convector( - proj_dict: Dict = None, proj_list: List = None + proj_dict: Dict = None, proj_list: List = None ) -> Union[Dict, List]: """ Convector project dict to list and vice versa @@ -1343,10 +1337,10 @@ def _sra_bam_conversion(self, bam_file, run_name): # The -u here allows unaligned reads, and seems to be # required for some sra files regardless of aligned state cmd = ( - "sam-dump -u " - + os.path.join(self.sra_folder, run_name + ".sra") - + " | samtools view -bS - > " - + bam_file + "sam-dump -u " + + os.path.join(self.sra_folder, run_name + ".sra") + + " | samtools view -bS - > " + + bam_file ) # sam-dump -u SRR020515.sra | samtools view -bS - > test.bam @@ -1409,10 +1403,10 @@ def _sra_bam_conversion2(self, bam_file, run_name, picard_path=None): # check to make sure it worked cmd = ( - "fastq-dump --split-3 -O " - + os.path.realpath(self.sra_folder) - + " " - + os.path.join(self.sra_folder, run_name + ".sra") + "fastq-dump --split-3 -O " + + os.path.realpath(self.sra_folder) + + " " + + os.path.join(self.sra_folder, run_name + ".sra") ) self._LOGGER.info(f"Command: {cmd}") run_subprocess(cmd, shell=True) @@ -1493,7 +1487,7 @@ def _download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): self._LOGGER.info(f"\033[38;5;242mFile {full_filepath} exists.\033[0m") def _get_list_of_processed_files( - self, file_gse_content: list, file_gsm_content: list + self, file_gse_content: list, file_gsm_content: list ): """ Given a paths to GSE and GSM metafile create a list of dicts of metadata of processed files @@ -1523,7 +1517,7 @@ def _get_list_of_processed_files( # find and download filelist - file with information about files in tar index = file_url.rfind("/") tar_files_list_url = ( - "https" + file_url[3: index + 1] + "filelist.txt" + "https" + file_url[3 : index + 1] + "filelist.txt" ) # file_list_name filelist_path = os.path.join( @@ -1568,14 +1562,14 @@ def _get_list_of_processed_files( element_values = list(pl.values())[0] if not re.findall(SUPP_FILE_PATTERN, line_gsm): if ( - element_keys - not in meta_processed_samples[nb].keys() + element_keys + not in meta_processed_samples[nb].keys() ): meta_processed_samples[nb].update(pl) else: if ( - type(meta_processed_samples[nb][element_keys]) - is not list + type(meta_processed_samples[nb][element_keys]) + is not list ): meta_processed_samples[nb][element_keys] = [ meta_processed_samples[nb][element_keys] @@ -1945,7 +1939,7 @@ def _get_SRP_list(self, srp_number: str) -> list: id_results = x.json()["esearchresult"]["idlist"] if len(id_results) > 500: id_results = [ - id_results[x: x + 100] for x in range(0, len(id_results), 100) + id_results[x : x + 100] for x in range(0, len(id_results), 100) ] else: id_results = [id_results] @@ -1966,7 +1960,9 @@ def _get_SRP_list(self, srp_number: str) -> list: return SRP_list - def _get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): + def _read_gsm_metadata( + self, acc_GSE: str, acc_GSE_list: dict, file_gsm_content: list + ) -> dict: """ A simple state machine to parse SOFT formatted files (Here, the GSM file) @@ -1991,8 +1987,8 @@ def _get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): if line[0] == "^": pl = parse_SOFT_line(line) if ( - len(acc_GSE_list[acc_GSE]) > 0 - and pl["SAMPLE"] not in GSM_limit_list + len(acc_GSE_list[acc_GSE]) > 0 + and pl["SAMPLE"] not in GSM_limit_list ): # sys.stdout.write(" Skipping " + a['SAMPLE'] + ".") current_sample_id = None @@ -2020,7 +2016,17 @@ def _get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): f"line: {line}" ) continue - gsm_metadata[current_sample_id].update(pl) + new_key = list(pl.keys())[0] + if new_key in gsm_metadata[current_sample_id]: + if isinstance(gsm_metadata[current_sample_id][new_key], list): + gsm_metadata[current_sample_id][new_key].append(pl[new_key]) + else: + gsm_metadata[current_sample_id][new_key] = [ + gsm_metadata[current_sample_id][new_key] + ] + gsm_metadata[current_sample_id][new_key].append(pl[new_key]) + else: + gsm_metadata[current_sample_id].update(pl) # Now convert the ids GEO accessions into SRX accessions if not current_sample_srx: @@ -2036,6 +2042,7 @@ def _get_gsm_metadata(self, acc_GSE, acc_GSE_list, file_gsm_content: list): current_sample_srx = True # GSM SOFT file parsed, save it in a list self._LOGGER.info(f"Processed {len(samples_list)} samples.") + gsm_metadata = self._expand_metadata_list_in_dict(gsm_metadata) return gsm_metadata def _write(self, f_var_value, content, msg_pre=None, omit_newline=False): @@ -2079,16 +2086,16 @@ def _parse_cmdl(cmdl): dest="metadata_root", default=safe_echo("SRAMETA"), help="Specify a parent folder location to store metadata. " - "The project name will be added as a subfolder " - "[Default: $SRAMETA:" + safe_echo("SRAMETA") + "]", + "The project name will be added as a subfolder " + "[Default: $SRAMETA:" + safe_echo("SRAMETA") + "]", ) parser.add_argument( "-u", "--metadata-folder", help="Specify an absolute folder location to store metadata. " - "No subfolder will be added. Overrides value of --metadata-root " - "[Default: Not used (--metadata-root is used by default)]", + "No subfolder will be added. Overrides value of --metadata-root " + "[Default: Not used (--metadata-root is used by default)]", ) parser.add_argument( @@ -2113,8 +2120,8 @@ def _parse_cmdl(cmdl): "--pipeline-samples", default=None, help="Optional: Specify one or more filepaths to SAMPLES pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", ) # Optional @@ -2122,8 +2129,8 @@ def _parse_cmdl(cmdl): "--pipeline-project", default=None, help="Optional: Specify one or more filepaths to PROJECT pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", ) # Optional parser.add_argument( @@ -2145,7 +2152,7 @@ def _parse_cmdl(cmdl): "--acc-anno", action="store_true", help="Optional: Produce annotation sheets for each accession." - " Project combined PEP for the whole project won't be produced.", + " Project combined PEP for the whole project won't be produced.", ) parser.add_argument( @@ -2159,7 +2166,7 @@ def _parse_cmdl(cmdl): type=int, default=50, help="Optional: Limit of the number of the constant sample characters " - "that should not be in project yaml. [Default: 50]", + "that should not be in project yaml. [Default: 50]", ) parser.add_argument( @@ -2167,7 +2174,7 @@ def _parse_cmdl(cmdl): type=int, default=250, help="Optional: Limit of the number of the constant sample characters " - "that should not be discarded [Default: 250]", + "that should not be discarded [Default: 250]", ) parser.add_argument( @@ -2175,8 +2182,8 @@ def _parse_cmdl(cmdl): type=int, default=500, help="Optional: Limit of the number of sample characters." - "Any attribute with more than X characters will truncate to the first X," - " where X is a number of characters [Default: 500]", + "Any attribute with more than X characters will truncate to the first X," + " where X is a number of characters [Default: 500]", ) parser.add_argument( @@ -2199,17 +2206,17 @@ def _parse_cmdl(cmdl): choices=["all", "samples", "series"], default="samples", help="Optional: Specifies the source of data on the GEO record" - " to retrieve processed data, which may be attached to the" - " collective series entity, or to individual samples. " - "Allowable values are: samples, series or both (all). " - "Ignored unless 'processed' flag is set. [Default: samples]", + " to retrieve processed data, which may be attached to the" + " collective series entity, or to individual samples. " + "Allowable values are: samples, series or both (all). " + "Ignored unless 'processed' flag is set. [Default: samples]", ) processed_group.add_argument( "--filter", default=None, help="Optional: Filter regex for processed filenames [Default: None]." - "Ignored unless 'processed' flag is set.", + "Ignored unless 'processed' flag is set.", ) processed_group.add_argument( @@ -2228,8 +2235,8 @@ def _parse_cmdl(cmdl): "--geo-folder", default=safe_echo("GEODATA"), help="Optional: Specify a location to store processed GEO files." - " Ignored unless 'processed' flag is set." - "[Default: $GEODATA:" + safe_echo("GEODATA") + "]", + " Ignored unless 'processed' flag is set." + "[Default: $GEODATA:" + safe_echo("GEODATA") + "]", ) raw_group.add_argument( @@ -2251,8 +2258,8 @@ def _parse_cmdl(cmdl): help="""Optional: Specify folder of bam files. Geofetch will not download sra files when corresponding bam files already exist. [Default: $SRABAM:""" - + safe_echo("SRABAM") - + "]", + + safe_echo("SRABAM") + + "]", ) raw_group.add_argument( @@ -2263,8 +2270,8 @@ def _parse_cmdl(cmdl): help="""Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding fastq files already exist. [Default: $SRAFQ:""" - + safe_echo("SRAFQ") - + "]", + + safe_echo("SRAFQ") + + "]", ) # Deprecated; these are for bam conversion which now happens in sra_convert diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index f9e268b..d3fa7de 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -109,20 +109,20 @@ def test_downloading_soft_files(self, initiate_geofetcher): def test_creating_sample_pep_files(self, initiate_geofetcher): initiate_geofetcher.fetch_all("GSE138657") downloaded_meta_files = list( - os.walk(initiate_geofetcher.metadata_expanded + "/PEP_samples") + os.walk(initiate_geofetcher.metadata_expanded + "/PEP_processed_samples") )[0][2] - assert "GSE138657_samples.csv" in downloaded_meta_files - assert "GSE138657_samples.yaml" in downloaded_meta_files + assert "PEP_processed_samples.csv" in downloaded_meta_files + assert "PEP_processed_samples.yaml" in downloaded_meta_files def test_creating_series_pep_files(self, initiate_geofetcher): initiate_geofetcher.fetch_all("GSE199313") downloaded_meta_files = list( - os.walk(initiate_geofetcher.metadata_expanded + "/PEP_series") + os.walk(initiate_geofetcher.metadata_expanded + "/PEP_processed_series") )[0][2] - assert "GSE199313_series.csv" in downloaded_meta_files - assert "GSE199313_series.yaml" in downloaded_meta_files + assert "PEP_processed_series.csv" in downloaded_meta_files + assert "PEP_processed_series.yaml" in downloaded_meta_files class TestDownloadingProcFiles: From 259bcff3c4c96bd5900c156787672fab0fd3e946 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 31 Aug 2022 17:11:41 -0400 Subject: [PATCH 25/61] Refactored raw functions 2 + stability --- geofetch/const.py | 5 + geofetch/geofetch.py | 357 ++++++++++++++++++++----------------------- geofetch/utils.py | 23 +-- 3 files changed, 180 insertions(+), 205 deletions(-) diff --git a/geofetch/const.py b/geofetch/const.py index 01d1e05..cc3e898 100644 --- a/geofetch/const.py +++ b/geofetch/const.py @@ -32,6 +32,8 @@ SAMPLE_SUPP_METADATA_FILE = "_samples.csv" EXP_SUPP_METADATA_FILE = "_series.csv" +FILE_RAW_NAME_SAMPLE_PATTERN = "_raw.csv" +FILE_RAW_NAME_SUBSAMPLE_PATTERN = "_raw_subtable.csv" # How many times should we retry failing prefetch call? NUM_RETRIES = 3 @@ -41,3 +43,6 @@ NCBI_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=sra&id={ID}&rettype=runinfo&retmode=xml" NEW_GENOME_COL_NAME = "ref_genome" + +CONFIG_PROCESSED_TEMPLATE_NAME = "config_processed_template.yaml" +CONFIG_RAW_TEMPLATE_NAME = "config_template.yaml" \ No newline at end of file diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 5fee104..ba4ef26 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -94,18 +94,18 @@ def __init__( if metadata_folder: self.metadata_expanded = expandpath(metadata_folder) if os.path.isabs(self.metadata_expanded): - self.metadata_raw = metadata_folder + self.metadata_root_full = metadata_folder else: self.metadata_expanded = os.path.abspath(self.metadata_expanded) - self.metadata_raw = os.path.abspath(metadata_root) - self.metadata_raw = metadata_folder + self.metadata_root_full = os.path.abspath(metadata_root) + self.metadata_root_full = metadata_folder else: self.metadata_expanded = expandpath(metadata_root) if os.path.isabs(self.metadata_expanded): - self.metadata_raw = metadata_root + self.metadata_root_full = metadata_root else: self.metadata_expanded = os.path.abspath(self.metadata_expanded) - self.metadata_raw = os.path.abspath(metadata_root) + self.metadata_root_full = os.path.abspath(metadata_root) self.just_metadata = just_metadata self.refresh_metadata = refresh_metadata @@ -143,7 +143,7 @@ def __init__( self.metadata_expanded = os.path.join( self.metadata_expanded, self.project_name ) - self.metadata_raw = os.path.join(self.metadata_raw, self.project_name) + self.metadata_root_full = os.path.join(self.metadata_root_full, self.project_name) if filter_size is not None: try: @@ -229,7 +229,7 @@ def get_project( project_dict[acc_GSE + "_raw"] = project else: - ser_dict = self.fetch_all(input=acc_GSE_list) + ser_dict = self.fetch_all(input=input) project_dict["raw_samples"] = ser_dict return project_dict @@ -240,7 +240,10 @@ def fetch_all(self, input: str, name: str = None): if name: self.project_name = name else: - self.project_name = os.path.splitext(os.path.basename(input))[0] + try: + self.project_name = os.path.splitext(os.path.basename(input))[0] + except TypeError: + self.project_name = input # check to make sure prefetch is callable if not self.just_metadata and not self.processed: @@ -254,8 +257,8 @@ def fetch_all(self, input: str, name: str = None): input, self.metadata_expanded, self.just_metadata ) - metadata_dict = {} - subannotation_dict = {} + metadata_dict_combined = {} + subannotation_dict_combined = {} processed_metadata_samples = [] processed_metadata_series = [] @@ -361,37 +364,42 @@ def fetch_all(self, input: str, name: str = None): ) if not srp_list_result: self._LOGGER.info(f"No SRP data, continuing ....") + self._LOGGER.warning(f"No raw pep will be created! ....") # delete current acc if no raw data was found # del metadata_dict[acc_GSE] - continue + pass else: self._LOGGER.info("Parsing SRA file to download SRR records") - gsm_multi_table = self._process_sra_meta( - srp_list_result, gsm_enter_dict, gsm_metadata - ) + gsm_multi_table = self._process_sra_meta( + srp_list_result, gsm_enter_dict, gsm_metadata + ) - # download raw data: - if not self.just_metadata: - for file_key in gsm_multi_table.keys(): - for run in gsm_multi_table[file_key]: - # download raw data - self._LOGGER.info( - f"Getting SRR: {run[2]} in ({acc_GSE})" - ) - self._download_raw_data(run[2]) - else: - self._LOGGER.info(f"Dry run, no data will be downloaded") + # download raw data: + if not self.just_metadata: + for file_key in gsm_multi_table.keys(): + for run in gsm_multi_table[file_key]: + # download raw data + self._LOGGER.info( + f"Getting SRR: {run[2]} in ({acc_GSE})" + ) + self._download_raw_data(run[2]) + else: + self._LOGGER.info(f"Dry run, no data will be downloaded") - # accumulate subannotations - metadata_dict[acc_GSE] = gsm_metadata - subannotation_dict[acc_GSE] = gsm_multi_table + # save one project + if self.acc_anno and nkeys > 1: + self._write_raw_annotation_new(name=acc_GSE, metadata_dict=gsm_metadata, subannot_dict=gsm_multi_table) + + else: + metadata_dict_combined.update(gsm_metadata) + subannotation_dict_combined.update(gsm_multi_table) self._LOGGER.info(f"Finished processing {len(acc_GSE_list)} accession(s)") # Logging cleaning process: if self.discard_soft: self._LOGGER.info(f"Cleaning soft files ...") - clean_soft_files(self.metadata_raw) + clean_soft_files(self.metadata_root_full) ####################################################################################### @@ -408,11 +416,11 @@ def fetch_all(self, input: str, name: str = None): # saving PEPs for raw data else: - return_value = self._write_raw_annotation(metadata_dict, subannotation_dict) + return_value = self._write_raw_annotation_new("PEP", metadata_dict_combined, subannotation_dict_combined) if self.just_object: return return_value - def _process_sra_meta(self, srp_list_result, gsm_enter_dict, gsm_metadata): + def _process_sra_meta(self, srp_list_result=None, gsm_enter_dict=None, gsm_metadata=None): gsm_multi_table = {} for line in srp_list_result: @@ -482,6 +490,7 @@ def _process_sra_meta(self, srp_list_result, gsm_enter_dict, gsm_metadata): else: # The first SRR for this SRX is added to GSM metadata gsm_metadata[experiment]["SRR"] = run_name + return gsm_multi_table def _download_raw_data(self, run_name): @@ -579,7 +588,7 @@ def _generate_processed_meta( if self.supp_by == "all": # samples pep_acc_path_sample = os.path.join( - self.metadata_raw, + self.metadata_root_full, f"{name}_samples", name + SAMPLE_SUPP_METADATA_FILE, ) @@ -591,7 +600,7 @@ def _generate_processed_meta( # series pep_acc_path_exp = os.path.join( - self.metadata_raw, + self.metadata_root_full, f"{name}_series", name + EXP_SUPP_METADATA_FILE, ) @@ -603,7 +612,7 @@ def _generate_processed_meta( elif self.supp_by == "samples": pep_acc_path_sample = os.path.join( - self.metadata_raw, + self.metadata_root_full, f"{name}_samples", name + SAMPLE_SUPP_METADATA_FILE, ) @@ -614,7 +623,7 @@ def _generate_processed_meta( ) elif self.supp_by == "series": return_objects[f"{name}_series"] = pep_acc_path_exp = os.path.join( - self.metadata_raw, + self.metadata_root_full, f"{name}_series", name + EXP_SUPP_METADATA_FILE, ) @@ -688,7 +697,7 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): """ try: element_is_list = any( - type(list_item[dict_key]) is list for list_item in metadata_list + type(list_item.get(dict_key)) is list for list_item in metadata_list ) if element_is_list: for n_elem in range(len(metadata_list)): @@ -737,7 +746,7 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): else: del metadata_list[n_elem][dict_key] except KeyError as err: - self._LOGGER.warning(f"expand_metadata_list: Key Error: {err}") + self._LOGGER.warning(f"expand_metadata_list: Key Error: {err}, continuing ...") return metadata_list else: @@ -824,7 +833,7 @@ def _find_genome(self, metadata_list): return metadata_list def _write_gsm_annotation( - self, gsm_metadata, file_annotation, use_key_subset=False + self, gsm_metadata, file_annotation ): """ Write metadata sheet out as an annotation file. @@ -832,16 +841,9 @@ def _write_gsm_annotation( :param Mapping gsm_metadata: the data to write, parsed from a file with metadata/annotation information :param str file_annotation: the path to the file to write - :param bool use_key_subset: whether to use the keys present in the - metadata object given (False), or instead use a fixed set of keys - defined within this module (True) :return str: path to file written """ - if use_key_subset: - keys = ANNOTATION_SHEET_KEYS - else: - # keys = gsm_metadata[gsm_metadata.keys().next()].keys() - keys = list(list(gsm_metadata.values())[0].keys()) + keys = list(list(gsm_metadata.values())[0].keys()) self._LOGGER.info(f"Sample annotation sheet: {file_annotation} . Saving....") fp = expandpath(file_annotation) @@ -891,29 +893,8 @@ def _write_processed_annotation( self.const_limit_discard, self.attr_limit_truncate, ) - meta_list_str = [ - f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta - ] - modifiers_str = "\n ".join(d for d in meta_list_str) - geofetchdir = os.path.dirname(__file__) - config_template = os.path.join(geofetchdir, "config_processed_template.yaml") - - with open(config_template, "r") as template_file: - template = template_file.read() - - template_values = { - "project_name": self.project_name, - "sample_table": os.path.basename(file_annotation_path), - "geo_folder": self.geo_folder, - "pipeline_samples": self.file_pipeline_samples, - "pipeline_project": self.file_pipeline_project, - "additional_columns": modifiers_str, - } - - for k, v in template_values.items(): - placeholder = "{" + str(k) + "}" - template = template.replace(placeholder, str(v)) + template = self._create_config_processed(file_annotation_path, proj_meta) if not just_object: with open(file_annotation_path, "w") as m_file: @@ -944,170 +925,78 @@ def _write_processed_annotation( proj = peppy.Project().from_pandas(pd_value, config=conf) return proj - @staticmethod - def _sanitize_name(name_str: str): + def _write_raw_annotation_new(self, name, metadata_dict: dict, subannot_dict: dict = None) -> Union[None, peppy.Project]: """ - Function that sanitizing strings. (Replace all odd characters) - :param str name_str: Any string value that has to be sanitized. - :return: sanitized strings - """ - new_str = name_str - punctuation1 = r"""!"#$%&'()*,./:;<=>?@[\]^_`{|}~""" - for odd_char in list(punctuation1): - new_str = new_str.replace(odd_char, "_") - new_str = new_str.replace(" ", "_").replace("__", "_") - return new_str - - def _write_raw_annotation(self, metadata_dict, subannotation_dict): - """ - Combining individual accessions into project-level annotations, and writeing + Combining individual accessions into project-level annotations, and writing individual accession files (if requested) - :param dict metadata_dict: dictionary of metadata - :param dict sub-annotation_dict: dictionary of sub-annotation metadata + :param name: + :param metadata_dict: + :param subannot_dict: + :return: none or peppy object """ - - if self.discard_soft: - clean_soft_files(os.path.join(self.metadata_raw)) - try: assert len(metadata_dict) > 0 except AssertionError: self._LOGGER.warning( "\033[33mNo PEP created, as no raw data was found!!!\033[0m" ) - return False + return None - # checking sample_name value if it's not empty, - # otherwise pulling from title - for key, value in metadata_dict.items(): - fixed_dict = {} - for key_sample, value_sample in value.items(): - fixed_dict[key_sample] = value_sample - if ( - value_sample["sample_name"] == "" - or value_sample["sample_name"] is None - ): - fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] - # TODO: should be corrected: - # # sanitize names - # fixed_dict[key_sample]["sample_name"] = self._sanitize_name( - # fixed_dict[key_sample]["sample_name"] - # ) + if self.discard_soft: + clean_soft_files(os.path.join(self.metadata_root_full)) - metadata_dict[key] = fixed_dict + self._LOGGER.info( + "Creating complete project annotation sheets and config file..." + ) - # annotation table - metadata_dict_combined = {} - for acc_GSE, gsm_metadata in metadata_dict.items(): - gsm_metadata1 = self._standardize_colnames(gsm_metadata) - file_annotation = os.path.join( - self.metadata_expanded, acc_GSE + "_annotation.csv" - ) - # for each sample - if self.acc_anno: - self._write_gsm_annotation( - gsm_metadata1, file_annotation, use_key_subset=self.use_key_subset - ) - metadata_dict_combined.update(gsm_metadata1) + proj_root = os.path.join(self.metadata_root_full, name) + if not os.path.exists(proj_root): + os.makedirs(proj_root) - # subatnotation table - subannotation_dict_combined = {} - for acc_GSE, gsm_multi_table in subannotation_dict.items(): - file_subannotation = os.path.join( - self.metadata_expanded, acc_GSE + "_subannotation.csv" - ) - # for each sample: - if self.acc_anno: - self._write_subannotation(gsm_multi_table, file_subannotation) - subannotation_dict_combined.update(gsm_multi_table) + proj_root_sample = os.path.join(proj_root, f"{name}{FILE_RAW_NAME_SAMPLE_PATTERN}") + proj_root_subsample = os.path.join(proj_root, f"{name}{FILE_RAW_NAME_SUBSAMPLE_PATTERN}") + yaml_name = f"{name}.yaml" + proj_root_yaml = os.path.join(proj_root, yaml_name) + dot_yaml_path = os.path.join(proj_root, ".pep.yaml") - # TODO: were is .yaml file for each acc_anno? + metadata_dict = self._check_sample_name_standard(metadata_dict) - self._LOGGER.info( - "Creating complete project annotation sheets and config file..." - ) - # filtering huge annotation strings that are repeating for each sample - metadata_dict_combined, proj_meta = self._separate_common_meta( - metadata_dict_combined, + metadata_dict, proj_meta = self._separate_common_meta( + metadata_dict, self.const_limit_project, self.const_limit_discard, self.attr_limit_truncate, ) - meta_list_str = [ - f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta - ] - modifiers_str = "\n ".join(d for d in meta_list_str) - - # If the project included more than one GSE, we can now output combined - # annotation tables for the entire project. - # Write combined annotation sheet - file_annotation = os.path.join( - self.metadata_raw, self.project_name + "_annotation.csv" - ) # Write combined subannotation table - if len(subannotation_dict_combined) > 0: - file_subannotation = os.path.join( - self.metadata_raw, self.project_name + "_subannotation.csv" - ) - self._write_subannotation(subannotation_dict_combined, file_subannotation) + if len(subannot_dict) > 0: subanot_path_yaml = ( - f"subsample_table: {os.path.basename(file_subannotation)}" + f"subsample_table: {os.path.basename(proj_root_subsample)}" ) else: - file_subannotation = "null" subanot_path_yaml = f"" - # Write project config file - if not self.config_template: - geofetchdir = os.path.dirname(__file__) - self.config_template = os.path.join(geofetchdir, "config_template.yaml") - with open(self.config_template, "r") as template_file: - template = template_file.read() + template = self._create_config_raw(proj_meta, proj_root_sample, subanot_path_yaml) - template_values = { - "project_name": self.project_name, - "annotation": os.path.basename(file_annotation), - "subannotation": subanot_path_yaml, - "pipeline_samples": self.file_pipeline_samples, - "pipeline_project": self.file_pipeline_project, - "additional_columns": modifiers_str, - } - for k, v in template_values.items(): - placeholder = "{" + str(k) + "}" - template = template.replace(placeholder, str(v)) + if not self.just_object: + self._write_gsm_annotation(metadata_dict, proj_root_sample) - if not self.just_object and not self.acc_anno: - # write annotation - self._write_gsm_annotation( - metadata_dict_combined, - file_annotation, - use_key_subset=self.use_key_subset, - ) - # write subannotation - if len(subannotation_dict_combined) > 0: - self._write_subannotation( - subannotation_dict_combined, file_subannotation - ) + if len(subannot_dict) > 0: + self._write_subannotation(subannot_dict, proj_root_subsample) - # save .yaml file - yaml_name = self.project_name + "_config.yaml" - config = os.path.join(self.metadata_raw, yaml_name) - self._write(config, template, msg_pre=" Config file: ") + self._write(proj_root_yaml, template, msg_pre=" Config file: ") - # save .pep.yaml file if self.add_dotfile: - dot_yaml_path = os.path.join(self.metadata_raw, ".pep.yaml") self._create_dot_yaml(dot_yaml_path, yaml_name) else: - meta_df = pd.DataFrame.from_dict(metadata_dict_combined, orient="index") + meta_df = pd.DataFrame.from_dict(metadata_dict, orient="index") # open list: new_sub_list = [] - for sub_key in subannotation_dict_combined.keys(): + for sub_key in subannot_dict.keys(): new_sub_list.extend( - [col_item for col_item in subannotation_dict_combined[sub_key]] + [col_item for col_item in subannot_dict[sub_key]] ) sub_meta_df = pd.DataFrame( @@ -1123,6 +1012,83 @@ def _write_raw_annotation(self, metadata_dict, subannotation_dict): proj = peppy.Project().from_pandas(meta_df, sub_meta_df, conf) return proj + def _create_config_processed(self, file_annotation_path, proj_meta): + geofetchdir = os.path.dirname(__file__) + config_template = os.path.join(geofetchdir, CONFIG_PROCESSED_TEMPLATE_NAME) + with open(config_template, "r") as template_file: + template = template_file.read() + meta_list_str = [ + f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta + ] + modifiers_str = "\n ".join(d for d in meta_list_str) + template_values = { + "project_name": self.project_name, + "sample_table": os.path.basename(file_annotation_path), + "geo_folder": self.geo_folder, + "pipeline_samples": self.file_pipeline_samples, + "pipeline_project": self.file_pipeline_project, + "additional_columns": modifiers_str, + } + for k, v in template_values.items(): + placeholder = "{" + str(k) + "}" + template = template.replace(placeholder, str(v)) + return template + + def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): + meta_list_str = [ + f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta + ] + modifiers_str = "\n ".join(d for d in meta_list_str) + # Write project config file + if not self.config_template: + geofetchdir = os.path.dirname(__file__) + self.config_template = os.path.join(geofetchdir, CONFIG_RAW_TEMPLATE_NAME) + with open(self.config_template, "r") as template_file: + template = template_file.read() + template_values = { + "project_name": self.project_name, + "annotation": os.path.basename(proj_root_sample), + "subannotation": subanot_path_yaml, + "pipeline_samples": self.file_pipeline_samples, + "pipeline_project": self.file_pipeline_project, + "additional_columns": modifiers_str, + } + for k, v in template_values.items(): + placeholder = "{" + str(k) + "}" + template = template.replace(placeholder, str(v)) + return template + + def _check_sample_name_standard(self, metadata_dict): + fixed_dict = {} + for key_sample, value_sample in metadata_dict.items(): + fixed_dict[key_sample] = value_sample + if ( + value_sample["sample_name"] == "" + or value_sample["sample_name"] is None + ): + fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] + # sanitize names + fixed_dict[key_sample]["sample_name"] = self._sanitize_name( + fixed_dict[key_sample]["sample_name"] + ) + metadata_dict = fixed_dict + metadata_dict = self._standardize_colnames(metadata_dict) + return metadata_dict + + @staticmethod + def _sanitize_name(name_str: str): + """ + Function that sanitizing strings. (Replace all odd characters) + :param str name_str: Any string value that has to be sanitized. + :return: sanitized strings + """ + new_str = name_str + punctuation1 = r"""!"#$%&'()*,./:;<=>?@[\]^_`{|}~""" + for odd_char in list(punctuation1): + new_str = new_str.replace(odd_char, "_") + new_str = new_str.replace(" ", "_").replace("__", "_") + return new_str + @staticmethod def _create_dot_yaml(file_path: str, yaml_path: str): """ @@ -1213,7 +1179,7 @@ def _separate_common_meta( meta_list = self._dict_to_list_convector(proj_list=meta_list) return meta_list, new_meta_project - def _standardize_colnames(self, meta_list): + def _standardize_colnames(self, meta_list: Union[list, dict]): """ Standardize column names by lower-casing and underscore :param list meta_list: list of dictionaries of samples @@ -2336,6 +2302,7 @@ def main(): Geofetcher(**args_dict).fetch_all(args_dict["input"]) + if __name__ == "__main__": try: sys.exit(main()) diff --git a/geofetch/utils.py b/geofetch/utils.py index 99735b7..7917c67 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -311,16 +311,19 @@ def clean_soft_files(meta_dir: str): and creating PEPs :param str meta_dir: Path to the metadata files """ - dir_files = os.listdir(meta_dir) - - for item in dir_files: - if ( - item.endswith(".soft") - or item.endswith("_file_list.txt") - or item.endswith("SRA.csv") - or item.endswith("SRA_filt.csv") - ): - os.remove(os.path.join(meta_dir, item)) + try: + dir_files = os.listdir(meta_dir) + + for item in dir_files: + if ( + item.endswith(".soft") + or item.endswith("_file_list.txt") + or item.endswith("SRA.csv") + or item.endswith("SRA_filt.csv") + ): + os.remove(os.path.join(meta_dir, item)) + except FileNotFoundError: + _LOGGER.debug("Can't clean soft files...folder doesn't exist") def run_subprocess(*args, **kwargs): From bf37d2e8fc437d41e6e1d3a783d1b4e57e52ab07 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 1 Sep 2022 11:04:45 -0400 Subject: [PATCH 26/61] Bug fix --- geofetch/geofetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index ba4ef26..2011627 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1881,7 +1881,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): f"\033[91mError occurred, while downloading SRA Info Metadata of {acc_SRP}. " f"Error: {err} \033[0m" ) - return False + return [] def _get_SRP_list(self, srp_number: str) -> list: """ From f034235173a1827178026c60f20629f959abfc57 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 1 Sep 2022 11:27:47 -0400 Subject: [PATCH 27/61] Bug fix 2 --- geofetch/geofetch.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 2011627..f2d200e 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -845,13 +845,14 @@ def _write_gsm_annotation( """ keys = list(list(gsm_metadata.values())[0].keys()) - self._LOGGER.info(f"Sample annotation sheet: {file_annotation} . Saving....") + self._LOGGER.info(f"\033[92mSample annotation sheet: {file_annotation} . Saved!\033[0m") fp = expandpath(file_annotation) with open(fp, "w") as of: w = csv.DictWriter(of, keys, extrasaction="ignore") w.writeheader() for item in gsm_metadata: w.writerow(gsm_metadata[item]) + self._LOGGER.info(f"\033[92mSample annotation sheet: {file_annotation} . Saved!\033[0m") self._LOGGER.info("\033[92mFile has been saved successfully\033[0m") return fp @@ -1830,7 +1831,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): ) except TypeError: self._LOGGER.warning("Error in gsm_metadata") - return False + return [] # else: # # More than one sample? not sure what to do here. Does this even happen? @@ -1857,7 +1858,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): f"\033[91mError occurred, while downloading SRA Info Metadata of {acc_SRP}. " f"Error: {err} \033[0m" ) - return False + return [] else: # open existing annotation self._LOGGER.info(f"Found SRA metadata, opening..") From 156213ddcf7dd0e590620134b739739fd08aad08 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 1 Sep 2022 12:36:19 -0400 Subject: [PATCH 28/61] requirements + lint --- geofetch/const.py | 2 +- geofetch/geofetch.py | 70 ++++++++++++++++++------------- requirements/requirements-all.txt | 2 +- 3 files changed, 44 insertions(+), 30 deletions(-) diff --git a/geofetch/const.py b/geofetch/const.py index cc3e898..566aeac 100644 --- a/geofetch/const.py +++ b/geofetch/const.py @@ -45,4 +45,4 @@ NEW_GENOME_COL_NAME = "ref_genome" CONFIG_PROCESSED_TEMPLATE_NAME = "config_processed_template.yaml" -CONFIG_RAW_TEMPLATE_NAME = "config_template.yaml" \ No newline at end of file +CONFIG_RAW_TEMPLATE_NAME = "config_template.yaml" diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index f2d200e..3dbb9c0 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -6,9 +6,9 @@ import copy import csv import os -import re +# import re import sys -from string import punctuation +# from string import punctuation import requests import xmltodict from rich.progress import track @@ -143,7 +143,9 @@ def __init__( self.metadata_expanded = os.path.join( self.metadata_expanded, self.project_name ) - self.metadata_root_full = os.path.join(self.metadata_root_full, self.project_name) + self.metadata_root_full = os.path.join( + self.metadata_root_full, self.project_name + ) if filter_size is not None: try: @@ -379,16 +381,18 @@ def fetch_all(self, input: str, name: str = None): for file_key in gsm_multi_table.keys(): for run in gsm_multi_table[file_key]: # download raw data - self._LOGGER.info( - f"Getting SRR: {run[2]} in ({acc_GSE})" - ) + self._LOGGER.info(f"Getting SRR: {run[2]} in ({acc_GSE})") self._download_raw_data(run[2]) else: self._LOGGER.info(f"Dry run, no data will be downloaded") # save one project if self.acc_anno and nkeys > 1: - self._write_raw_annotation_new(name=acc_GSE, metadata_dict=gsm_metadata, subannot_dict=gsm_multi_table) + self._write_raw_annotation_new( + name=acc_GSE, + metadata_dict=gsm_metadata, + subannot_dict=gsm_multi_table, + ) else: metadata_dict_combined.update(gsm_metadata) @@ -416,11 +420,15 @@ def fetch_all(self, input: str, name: str = None): # saving PEPs for raw data else: - return_value = self._write_raw_annotation_new("PEP", metadata_dict_combined, subannotation_dict_combined) + return_value = self._write_raw_annotation_new( + "PEP", metadata_dict_combined, subannotation_dict_combined + ) if self.just_object: return return_value - def _process_sra_meta(self, srp_list_result=None, gsm_enter_dict=None, gsm_metadata=None): + def _process_sra_meta( + self, srp_list_result=None, gsm_enter_dict=None, gsm_metadata=None + ): gsm_multi_table = {} for line in srp_list_result: @@ -746,7 +754,9 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): else: del metadata_list[n_elem][dict_key] except KeyError as err: - self._LOGGER.warning(f"expand_metadata_list: Key Error: {err}, continuing ...") + self._LOGGER.warning( + f"expand_metadata_list: Key Error: {err}, continuing ..." + ) return metadata_list else: @@ -832,9 +842,7 @@ def _find_genome(self, metadata_list): metadata_list[sample[0]][NEW_GENOME_COL_NAME] = sample_genome return metadata_list - def _write_gsm_annotation( - self, gsm_metadata, file_annotation - ): + def _write_gsm_annotation(self, gsm_metadata, file_annotation): """ Write metadata sheet out as an annotation file. @@ -845,14 +853,18 @@ def _write_gsm_annotation( """ keys = list(list(gsm_metadata.values())[0].keys()) - self._LOGGER.info(f"\033[92mSample annotation sheet: {file_annotation} . Saved!\033[0m") + self._LOGGER.info( + f"\033[92mSample annotation sheet: {file_annotation} . Saved!\033[0m" + ) fp = expandpath(file_annotation) with open(fp, "w") as of: w = csv.DictWriter(of, keys, extrasaction="ignore") w.writeheader() for item in gsm_metadata: w.writerow(gsm_metadata[item]) - self._LOGGER.info(f"\033[92mSample annotation sheet: {file_annotation} . Saved!\033[0m") + self._LOGGER.info( + f"\033[92mSample annotation sheet: {file_annotation} . Saved!\033[0m" + ) self._LOGGER.info("\033[92mFile has been saved successfully\033[0m") return fp @@ -926,7 +938,9 @@ def _write_processed_annotation( proj = peppy.Project().from_pandas(pd_value, config=conf) return proj - def _write_raw_annotation_new(self, name, metadata_dict: dict, subannot_dict: dict = None) -> Union[None, peppy.Project]: + def _write_raw_annotation_new( + self, name, metadata_dict: dict, subannot_dict: dict = None + ) -> Union[None, peppy.Project]: """ Combining individual accessions into project-level annotations, and writing individual accession files (if requested) @@ -954,8 +968,12 @@ def _write_raw_annotation_new(self, name, metadata_dict: dict, subannot_dict: di if not os.path.exists(proj_root): os.makedirs(proj_root) - proj_root_sample = os.path.join(proj_root, f"{name}{FILE_RAW_NAME_SAMPLE_PATTERN}") - proj_root_subsample = os.path.join(proj_root, f"{name}{FILE_RAW_NAME_SUBSAMPLE_PATTERN}") + proj_root_sample = os.path.join( + proj_root, f"{name}{FILE_RAW_NAME_SAMPLE_PATTERN}" + ) + proj_root_subsample = os.path.join( + proj_root, f"{name}{FILE_RAW_NAME_SUBSAMPLE_PATTERN}" + ) yaml_name = f"{name}.yaml" proj_root_yaml = os.path.join(proj_root, yaml_name) dot_yaml_path = os.path.join(proj_root, ".pep.yaml") @@ -977,7 +995,9 @@ def _write_raw_annotation_new(self, name, metadata_dict: dict, subannot_dict: di else: subanot_path_yaml = f"" - template = self._create_config_raw(proj_meta, proj_root_sample, subanot_path_yaml) + template = self._create_config_raw( + proj_meta, proj_root_sample, subanot_path_yaml + ) if not self.just_object: self._write_gsm_annotation(metadata_dict, proj_root_sample) @@ -996,9 +1016,7 @@ def _write_raw_annotation_new(self, name, metadata_dict: dict, subannot_dict: di # open list: new_sub_list = [] for sub_key in subannot_dict.keys(): - new_sub_list.extend( - [col_item for col_item in subannot_dict[sub_key]] - ) + new_sub_list.extend([col_item for col_item in subannot_dict[sub_key]]) sub_meta_df = pd.DataFrame( new_sub_list, columns=["sample_name", "SRX", "SRR"] @@ -1063,10 +1081,7 @@ def _check_sample_name_standard(self, metadata_dict): fixed_dict = {} for key_sample, value_sample in metadata_dict.items(): fixed_dict[key_sample] = value_sample - if ( - value_sample["sample_name"] == "" - or value_sample["sample_name"] is None - ): + if value_sample["sample_name"] == "" or value_sample["sample_name"] is None: fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] # sanitize names fixed_dict[key_sample]["sample_name"] = self._sanitize_name( @@ -1824,7 +1839,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): # could still be an SRX linked to the (each) GSM. if len(gsm_metadata) == 1: try: - acc_SRP = gsm_metadata.keys()[0] + acc_SRP = list(gsm_metadata.keys())[0] self._LOGGER.warning( "But the GSM has an SRX number; instead of an " "SRP, using SRX identifier for this sample: " + acc_SRP @@ -2303,7 +2318,6 @@ def main(): Geofetcher(**args_dict).fetch_all(args_dict["input"]) - if __name__ == "__main__": try: sys.exit(main()) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index f8a0182..a916a7b 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -8,5 +8,5 @@ ubiquerg>=0.6.0 requests>=2.28.1 xmltodict>=0.13.0 pandas>=1.3.5 -peppy>=0.34.0 +peppy>=0.35.0 rich>=12.5.1 From d71dd780e9702cda5fb5bc985883d66745b6d973 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 1 Sep 2022 13:34:30 -0400 Subject: [PATCH 29/61] added docString to the functions --- geofetch/geofetch.py | 135 ++++++++++++++++++++++++++++++++----------- 1 file changed, 100 insertions(+), 35 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 3dbb9c0..bb0bff2 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -6,8 +6,10 @@ import copy import csv import os + # import re import sys + # from string import punctuation import requests import xmltodict @@ -427,8 +429,18 @@ def fetch_all(self, input: str, name: str = None): return return_value def _process_sra_meta( - self, srp_list_result=None, gsm_enter_dict=None, gsm_metadata=None + self, + srp_list_result: list = None, + gsm_enter_dict: dict = None, + gsm_metadata: dict = None, ): + """ + Creating srp multitable and updating gsm_metadata based on srp + :param srp_list_result: list of srp got from sra file + :param gsm_enter_dict: gsm enter content + :param gsm_metadata: dict of samples of gsm + :return: srp multitable + """ gsm_multi_table = {} for line in srp_list_result: @@ -646,6 +658,13 @@ def _generate_processed_meta( def _download_processed_data( self, acc_gse: str, meta_processed_samples: list, meta_processed_series: list ) -> NoReturn: + """ + Function that downloads processed data + :param acc_gse: accession number of the project + :param meta_processed_samples: list of annotation of samples + :param meta_processed_series: list of annotation of series + :return: Noreturn + """ data_geo_folder = os.path.join(self.geo_folder, acc_gse) self._LOGGER.debug("Data folder: " + data_geo_folder) @@ -677,6 +696,12 @@ def _download_processed_data( self._download_processed_file(file_url, data_geo_folder) def _expand_metadata_list_in_dict(self, metadata_dict: dict) -> dict: + """ + Expanding all lists of all items in the dict by creating new items or joining them + + :param metadata_dict: metadata dict + :return: expanded metadata dict + """ prj_list = self._dict_to_list_convector(proj_dict=metadata_dict) prj_list = self._expand_metadata_list(prj_list) return self._dict_to_list_convector(proj_list=prj_list) @@ -796,7 +821,7 @@ def _filter_gsm(self, meta_processed_samples: list, gsm_list: dict) -> list: return meta_processed_samples @staticmethod - def _get_list_of_keys(list_of_dict): + def _get_list_of_keys(list_of_dict: list): """ Getting list of all keys that are in the dictionaries in the list @@ -809,13 +834,13 @@ def _get_list_of_keys(list_of_dict): list_of_keys.extend(list(element.keys())) return list(set(list_of_keys)) - def _unify_list_keys(self, processed_meta_list): + def _unify_list_keys(self, processed_meta_list: list) -> list: """ Unifying list of dicts with metadata, so every dict will have same keys :param list processed_meta_list: list of dicts with metadata - :return str: list of unified dicts with metadata + :return list: list of unified dicts with metadata """ list_of_keys = self._get_list_of_keys(processed_meta_list) for k in list_of_keys: @@ -826,7 +851,7 @@ def _unify_list_keys(self, processed_meta_list): def _find_genome(self, metadata_list): """ - Create new genome table by joining few columns + Create new genome column by searching joining few columns """ list_keys = self._get_list_of_keys(metadata_list) genome_keys = [ @@ -842,14 +867,14 @@ def _find_genome(self, metadata_list): metadata_list[sample[0]][NEW_GENOME_COL_NAME] = sample_genome return metadata_list - def _write_gsm_annotation(self, gsm_metadata, file_annotation): + def _write_gsm_annotation(self, gsm_metadata: dict, file_annotation: str) -> str: """ Write metadata sheet out as an annotation file. :param Mapping gsm_metadata: the data to write, parsed from a file with metadata/annotation information :param str file_annotation: the path to the file to write - :return str: path to file written + :return str: path to the file """ keys = list(list(gsm_metadata.values())[0].keys()) @@ -879,7 +904,7 @@ def _write_processed_annotation( :param list processed_metadata: list of dictionaries with files metadata :param str file_annotation_path: the path to the metadata file that has to be saved :type just_object: True, if you want to get peppy object without saving file - :return: + :return: none, or peppy project """ if len(processed_metadata) == 0: self._LOGGER.info( @@ -944,9 +969,9 @@ def _write_raw_annotation_new( """ Combining individual accessions into project-level annotations, and writing individual accession files (if requested) - :param name: - :param metadata_dict: - :param subannot_dict: + :param name: Name of the run, project, or acc --> will influence name of the folder where project will be created + :param metadata_dict: dictionary of sample annotations + :param subannot_dict: dictionary of subsample annotations :return: none or peppy object """ try: @@ -1031,7 +1056,15 @@ def _write_raw_annotation_new( proj = peppy.Project().from_pandas(meta_df, sub_meta_df, conf) return proj - def _create_config_processed(self, file_annotation_path, proj_meta): + def _create_config_processed( + self, file_annotation_path: str, proj_meta: list + ) -> str: + """ + completing and generating config file content + :param file_annotation_path: root to the annotation file + :param proj_meta: common metadata that has to added to config file + :return: generated, complete config file content + """ geofetchdir = os.path.dirname(__file__) config_template = os.path.join(geofetchdir, CONFIG_PROCESSED_TEMPLATE_NAME) with open(config_template, "r") as template_file: @@ -1054,6 +1087,13 @@ def _create_config_processed(self, file_annotation_path, proj_meta): return template def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): + """ + completing and generating config file content for raw data + :param proj_meta: root to the annotation file + :param proj_root_sample: path to sampletable file + :param subanot_path_yaml: path to subannotation file + :return: generated, complete config file content + """ meta_list_str = [ f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta ] @@ -1077,7 +1117,13 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): template = template.replace(placeholder, str(v)) return template - def _check_sample_name_standard(self, metadata_dict): + def _check_sample_name_standard(self, metadata_dict: dict) -> dict: + """ + Standardizing sample name and checking if it exists + (This function is used for raw data) + :param metadata_dict: metadata dict + :return: metadata dict with standardize sample names + """ fixed_dict = {} for key_sample, value_sample in metadata_dict.items(): fixed_dict[key_sample] = value_sample @@ -1092,7 +1138,7 @@ def _check_sample_name_standard(self, metadata_dict): return metadata_dict @staticmethod - def _sanitize_name(name_str: str): + def _sanitize_name(name_str: str) -> str: """ Function that sanitizing strings. (Replace all odd characters) :param str name_str: Any string value that has to be sanitized. @@ -1106,7 +1152,7 @@ def _sanitize_name(name_str: str): return new_str @staticmethod - def _create_dot_yaml(file_path: str, yaml_path: str): + def _create_dot_yaml(file_path: str, yaml_path: str) -> NoReturn: """ Function that creates .pep.yaml file that points to actual yaml file :param str file_path: Path to the .pep.yaml file that we want to create @@ -1121,7 +1167,7 @@ def _separate_common_meta( max_len: int = 50, del_limit: int = 250, attr_limit_truncate: int = 500, - ): + ) -> tuple: """ This function is separating information for the experiment from a sample :param list or dict meta_list: list of dictionaries of samples @@ -1195,7 +1241,7 @@ def _separate_common_meta( meta_list = self._dict_to_list_convector(proj_list=meta_list) return meta_list, new_meta_project - def _standardize_colnames(self, meta_list: Union[list, dict]): + def _standardize_colnames(self, meta_list: Union[list, dict]) -> Union[list, dict]: """ Standardize column names by lower-casing and underscore :param list meta_list: list of dictionaries of samples @@ -1256,7 +1302,7 @@ def _dict_to_list_convector( return meta_list - def _download_SRA_file(self, run_name): + def _download_SRA_file(self, run_name: str): """ Downloading SRA file by ising 'prefetch' utility from the SRA Toolkit more info: (http://www.ncbi.nlm.nih.gov/books/NBK242621/) @@ -1285,7 +1331,7 @@ def _download_SRA_file(self, run_name): time.sleep(t * 2) @staticmethod - def _which(program): + def _which(program: str): """ return str: the path to a program to make sure it exists """ @@ -1305,7 +1351,7 @@ def is_exe(fp): if is_exe(exe_file): return exe_file - def _sra_bam_conversion(self, bam_file, run_name): + def _sra_bam_conversion(self, bam_file: str, run_name: str) -> NoReturn: """ Converting of SRA file to BAM file by using samtools function "sam-dump" :param str bam_file: path to BAM file that has to be created @@ -1330,7 +1376,9 @@ def _sra_bam_conversion(self, bam_file, run_name): run_subprocess(cmd, shell=True) @staticmethod - def _update_columns(metadata, experiment_name, sample_name, read_type): + def _update_columns( + metadata: dict, experiment_name: str, sample_name: str, read_type: str + ) -> dict: """ Update the metadata associated with a particular experiment. @@ -1346,7 +1394,7 @@ def _update_columns(metadata, experiment_name, sample_name, read_type): associated :param str read_type: usually "single" or "paired," an indication of the type of sequencing reads for this experiment - :return Mapping: + :return: updated metadata """ exp = metadata[experiment_name] @@ -1374,7 +1422,9 @@ def _update_columns(metadata, experiment_name, sample_name, read_type): return exp - def _sra_bam_conversion2(self, bam_file, run_name, picard_path=None): + def _sra_bam_conversion2( + self, bam_file: str, run_name: str, picard_path: str = None + ) -> NoReturn: """ Converting of SRA file to BAM file by using fastq-dump (is used when sam-dump fails, yielding an empty bam file. Here fastq -> bam conversion is used) @@ -1413,7 +1463,9 @@ def _sra_bam_conversion2(self, bam_file, run_name, picard_path=None): self._LOGGER.info(f"Conversion command: {cmd}") run_subprocess(cmd, shell=True) - def _write_subannotation(self, tabular_data, filepath, column_names=None): + def _write_subannotation( + self, tabular_data: dict, filepath: str, column_names: list = None + ): """ Writes one or more tables to a given CSV filepath. @@ -1440,7 +1492,9 @@ def _write_subannotation(self, tabular_data, filepath, column_names=None): writer.writerows(values) return fp - def _download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): + def _download_file( + self, file_url: str, data_folder: str, new_name: str = None, sleep_after=0.5 + ) -> NoReturn: """ Given an url for a file, downloading to specified folder :param str file_url: the URL of the file to download @@ -1470,12 +1524,12 @@ def _download_file(self, file_url, data_folder, new_name=None, sleep_after=0.5): def _get_list_of_processed_files( self, file_gse_content: list, file_gsm_content: list - ): + ) -> tuple: """ Given a paths to GSE and GSM metafile create a list of dicts of metadata of processed files :param list file_gse_content: list of lines of gse metafile :param list file_gsm_content: list of lines of gse metafile - :return list: list of metadata of processed files + :return: tuple[list of metadata of processed sample files and series files] """ tar_re = re.compile(r".*\.tar$") gse_numb = None @@ -1640,7 +1694,7 @@ def _get_list_of_processed_files( return meta_processed_samples, meta_processed_series @staticmethod - def _check_file_existance(meta_processed_sample): + def _check_file_existance(meta_processed_sample: list): """ Checking if last element of the list has files. If list of files is empty deleting it """ @@ -1769,18 +1823,13 @@ def _get_value(all_line: str): line_value = all_line.split("= ")[-1] return line_value.split(": ")[-1].rstrip("\n") - def _download_processed_file(self, file_url, data_folder): + def _download_processed_file(self, file_url: str, data_folder: str) -> bool: """ Given a url for a file, download it, and extract anything passing the filter. :param str file_url: the URL of the file to download :param str data_folder: the local folder where the file should be saved :return bool: True if the file is downloaded successfully; false if it does not pass filters and is not downloaded. - - # :param re.Pattern tar_re: a regulator expression (produced from re.compile) - # that pulls out filenames with .tar in them --- deleted - # :param re.Pattern filter_re: a regular expression (produced from - # re.compile) to filter filenames of interest. """ if not self.geo_folder: @@ -2027,7 +2076,20 @@ def _read_gsm_metadata( gsm_metadata = self._expand_metadata_list_in_dict(gsm_metadata) return gsm_metadata - def _write(self, f_var_value, content, msg_pre=None, omit_newline=False): + def _write( + self, + f_var_value: str, + content: str, + msg_pre: str = None, + omit_newline: bool = False, + ): + """ + Save new file (used for config file) + :param f_var_value: path to the file + :param content: content of the file + :param msg_pre: msg that have to be printed + :param omit_newline: omit new line + """ fp = expandpath(f_var_value) self._LOGGER.info((msg_pre or "") + fp) with open(fp, "w") as f: @@ -2037,6 +2099,9 @@ def _write(self, f_var_value, content, msg_pre=None, omit_newline=False): def _parse_cmdl(cmdl): + """ + parser + """ parser = argparse.ArgumentParser( description="Automatic GEO and SRA data downloader" ) From d7b2b702a55c05c4a7a00d14c068dc08d473753a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 1 Sep 2022 15:46:45 -0400 Subject: [PATCH 30/61] naming correction --- geofetch/geofetch.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index bb0bff2..c422acc 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -215,7 +215,7 @@ def get_project( ) project_dict.update(self.fetch_all(input=acc_GSE, name=acc_GSE)) else: - project_dict.update(self.fetch_all(input=input, name="project")) + project_dict.update(self.fetch_all(input=input, name="")) # raw data: else: @@ -234,15 +234,17 @@ def get_project( else: ser_dict = self.fetch_all(input=input) - project_dict["raw_samples"] = ser_dict + project_dict["raw"] = ser_dict return project_dict def fetch_all(self, input: str, name: str = None): """Main script driver/workflow""" - if name: + if name is not None: self.project_name = name + print("------------------------") + print(self.project_name) else: try: self.project_name = os.path.splitext(os.path.basename(input))[0] @@ -413,7 +415,7 @@ def fetch_all(self, input: str, name: str = None): if self.processed: if not self.acc_anno: return_value = self._generate_processed_meta( - name="PEP_processed", + name=self.project_name, meta_processed_samples=processed_metadata_samples, meta_processed_series=processed_metadata_series, ) From a88a02b29961e3e71a3df82e2e022f31bf9bd501 Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Fri, 2 Sep 2022 14:16:38 -0400 Subject: [PATCH 31/61] use https --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ed5c01e..98e381c 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ # geofetch logo -[![PEP compatible](http://pepkit.github.io/img/PEP-compatible-green.svg)](http://pepkit.github.io) +[![PEP compatible](https://pepkit.github.io/img/PEP-compatible-green.svg)](https://pepkit.github.io) ![Run pytests](https://github.com/pepkit/geofetch/workflows/Run%20pytests/badge.svg) -[![docs-badge](https://readthedocs.org/projects/geofetch/badge/?version=latest)](http://geofetch.databio.org/en/latest/) +[![docs-badge](https://readthedocs.org/projects/geofetch/badge/?version=latest)](https://geofetch.databio.org/en/latest/) [![pypi-badge](https://img.shields.io/pypi/v/geofetch)](https://pypi.org/project/geofetch) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -`geofetch` is a command-line tool that downloads sequencing data and metadata from GEO and SRA and creates [standard PEPs](http://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/) and documentation is hosted at [geofetch.databio.org](http://geofetch.databio.org) (source in the [/docs](/docs) folder). +`geofetch` is a command-line tool that downloads sequencing data and metadata from GEO and SRA and creates [standard PEPs](https://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/) and documentation is hosted at [geofetch.databio.org](https://geofetch.databio.org) (source in the [/docs](/docs) folder). You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command. From cb464afa55a8cc4637480ba0ba651cef9cb84654 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 2 Sep 2022 17:42:19 -0400 Subject: [PATCH 32/61] Added tests --- tests/test_geofetch.py | 101 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 91 insertions(+), 10 deletions(-) diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index d3fa7de..ae0029c 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -1,3 +1,5 @@ +import peppy + import geofetch from geofetch import parse_accessions, Geofetcher, utils import os @@ -62,7 +64,7 @@ def test_accessions_file(self): class TestListProcessedMetaFiles: """ - Testing + Testing downloading and saving process soft files """ @pytest.fixture(scope="function") @@ -107,22 +109,50 @@ def test_downloading_soft_files(self, initiate_geofetcher): assert "GSE138657_GSE.soft" in downloaded_meta_files def test_creating_sample_pep_files(self, initiate_geofetcher): - initiate_geofetcher.fetch_all("GSE138657") + gse_numb = "GSE138657" + initiate_geofetcher.fetch_all(gse_numb) + downloaded_meta_files = list( + os.walk(initiate_geofetcher.metadata_expanded + f"/{gse_numb}_samples") + )[0][2] + + assert f"{gse_numb}_samples.csv" in downloaded_meta_files + assert f"{gse_numb}_samples.yaml" in downloaded_meta_files + + def test_creating_series_pep_files(self, initiate_geofetcher): + gse_numb = "GSE199313" + initiate_geofetcher.fetch_all(gse_numb) downloaded_meta_files = list( - os.walk(initiate_geofetcher.metadata_expanded + "/PEP_processed_samples") + os.walk(initiate_geofetcher.metadata_expanded + f"/{gse_numb}_series") )[0][2] - assert "PEP_processed_samples.csv" in downloaded_meta_files - assert "PEP_processed_samples.yaml" in downloaded_meta_files + assert f"{gse_numb}_series.csv" in downloaded_meta_files + assert f"{gse_numb}_series.yaml" in downloaded_meta_files + + +class TestListRawMetaFiles: + """ + Testing downloading and saving raw files and metadata + """ + @pytest.fixture(scope="function") + def initiate_geofetcher(self, tmpdir): + instance = Geofetcher( + just_metadata=True, + processed=False, + name="test", + metadata_folder=tmpdir, + discard_soft=True, + ) + yield instance def test_creating_series_pep_files(self, initiate_geofetcher): - initiate_geofetcher.fetch_all("GSE199313") + initiate_geofetcher.fetch_all("GSE138656") downloaded_meta_files = list( - os.walk(initiate_geofetcher.metadata_expanded + "/PEP_processed_series") + os.walk(initiate_geofetcher.metadata_expanded + f"/PEP") )[0][2] - assert "PEP_processed_series.csv" in downloaded_meta_files - assert "PEP_processed_series.yaml" in downloaded_meta_files + assert "PEP_raw.csv" in downloaded_meta_files + assert "PEP.yaml" in downloaded_meta_files + assert "PEP_raw_subtable.csv" in downloaded_meta_files class TestDownloadingProcFiles: @@ -165,7 +195,7 @@ def initiate_geofetcher(self, tmpdir): processed=True, name="test", metadata_folder=tmpdir, - filter="\.Bed.gz$", + filter=r"\.Bed.gz$", filter_size="2MB", ) yield instance @@ -264,6 +294,57 @@ def test_large_meta_separation( assert samp == result_sample assert proj == result_proj +class TestPeppyInitProcessed: + """ + Testing downloading and saving raw files and metadata + """ + @pytest.fixture(scope="function") + def initiate_geofetcher(self, tmpdir): + instance = Geofetcher( + just_metadata=True, + processed=True, + name="test", + metadata_folder=tmpdir, + discard_soft=True, + data_source="all" + ) + yield instance + + def test_creating_processed_peppy(self, initiate_geofetcher): + p_prop = initiate_geofetcher.get_project("GSE190287") + assert isinstance(p_prop['_samples'], peppy.Project) + assert isinstance(p_prop['_series'], peppy.Project) + + def test_number_of_samples(self, initiate_geofetcher): + p_prop = initiate_geofetcher.get_project("GSE190287") + assert len(p_prop['_samples'].samples) == 8 #it has 11 files but 8 samples + assert len(p_prop['_series'].samples) == 2 + +class TestPeppyInitRaw: + """ + Testing downloading and saving raw files and metadata + """ + + @pytest.fixture(scope="function") + def initiate_geofetcher(self, tmpdir): + instance = Geofetcher( + just_metadata=True, + processed=False, + name="test", + metadata_folder=tmpdir, + discard_soft=True, + ) + yield instance + + def test_creating_processed_peppy(self, initiate_geofetcher): + p_prop = initiate_geofetcher.get_project("GSE189141") + assert isinstance(p_prop['raw'], peppy.Project) + + def test_number_of_samples(self, initiate_geofetcher): + p_prop = initiate_geofetcher.get_project("GSE189141") + a = ([d['sample_name'] for d in p_prop['raw'].samples]) + assert len(p_prop['raw'].samples) == 16 #it has 16 samples + def test_clean_func(tmpdir): """ From 07f6cadbf3c1e60d7fabed733430236d6773082d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 6 Sep 2022 11:37:06 -0400 Subject: [PATCH 33/61] added docstring --- geofetch/geofetch.py | 76 +++++++++++++++++++++++++++++++++++++++--- tests/test_geofetch.py | 20 ++++++----- 2 files changed, 84 insertions(+), 12 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index c422acc..d7a3679 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -41,6 +41,64 @@ class Geofetcher: """ Class to download or get projects, metadata, data from GEO and SRA + + :param input: + :param name: Specify a project name. Defaults to GSE number or name of accessions file name + :param metadata_root: Specify a parent folder location to store metadata. + The project name will be added as a subfolder [Default: $SRAMETA:] + :param metadata_folder: Specify an absolute folder location to store metadata. No subfolder will be added. + Overrides value of --metadata-root [Default: Not used (--metadata-root is used by default)] + :param just_metadata: If set, don't actually run downloads, just create metadata + :param refresh_metadata: If set, re-download metadata even if it exists. + :param config_template: Project config yaml file template. + :param pipeline_samples: Specify one or more filepaths to SAMPLES pipeline interface yaml files. + These will be added to the project config file to make it immediately compatible with looper. + [Default: null] + :param pipeline_project: Specify one or more filepaths to PROJECT pipeline interface yaml files. + These will be added to the project config file to make it immediately compatible with looper. + [Default: null] + :param acc_anno: Produce annotation sheets for each accession. + Project combined PEP for the whole project won't be produced. + :param discard_soft: Create project without downloading soft files on the disc + :param add_dotfile: Add .pep.yaml file that points .yaml PEP file + :param disable_progressbar: Set true to disable progressbar + + :param const_limit_project: Optional: Limit of the number of the constant sample characters + that should not be in project yaml. [Default: 50] + :param const_limit_discard: Optional: Limit of the number of the constant sample characters + that should not be discarded [Default: 250] + :param attr_limit_truncate: Optional: Limit of the number of sample characters. + Any attribute with more than X characters will truncate to the first X, where X is a number of characters + [Default: 500] + + :param processed: Download processed data [Default: download raw data]. + :param data_source: Specifies the source of data on the GEO record to retrieve processed data, + which may be attached to the collective series entity, or to individual samples. Allowable values are: + samples, series or both (all). Ignored unless 'processed' flag is set. [Default: samples] + :param filter: Filter regex for processed filenames [Default: None].Ignored unless 'processed' flag is set. + :param filter_size: Filter size for processed files that are stored as sample repository [Default: None]. + Works only for sample data. Supported input formats : 12B, 12KB, 12MB, 12GB. + Ignored unless 'processed' flag is set. + :param geo_folder: Specify a location to store processed GEO files. + Ignored unless 'processed' flag is set.[Default: $GEODATA:] + + :param split_experiments: Split SRR runs into individual samples. By default, SRX experiments with multiple SRR + Runs will have a single entry in the annotation table, with each run as a separate row in the + subannotation table. This setting instead treats each run as a separate sample [Works with raw data] + :param bam_folder: Optional: Specify folder of bam files. Geofetch will not download sra files when + corresponding bam files already exist. [Default: $SRABAM:] [Works with raw data] + :param fq_folder: Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding + fastq files already exist. [Default: $SRAFQ:] [Works with raw data] + :param use_key_subset: Use just the keys defined in this module when writing out metadata. [Works with raw data] + :param sra_folder: [Doesn't work ]Optional: Specify a location to store sra files + [Default: $SRARAW:" + safe_echo("SRARAW") + ] + :param bam_conversion: Optional: set True to convert bam files [Works with raw data] + :param picard_path: Specify a path to the picard jar, if you want to convert fastq to bam + [Default: $PICARD:" + safe_echo("PICARD") + "] [Works with raw data] + + :param skip: Skip some accessions. [Default: no skip]. + :param opts: opts object [Optional] + :param kwargs: other values """ def __init__( @@ -194,7 +252,6 @@ def get_project( """ self.just_metadata = just_metadata self.just_object = True - self.disable_progressbar = True self.discard_soft = discard_soft acc_GSE_list = parse_accessions( input, self.metadata_expanded, self.just_metadata @@ -205,6 +262,7 @@ def get_project( # processed data: if self.processed: if self.acc_anno: + self.disable_progressbar = True nkeys = len(acc_GSE_list.keys()) ncount = 0 self.acc_anno = False @@ -221,6 +279,7 @@ def get_project( else: # Not sure about below code... if self.acc_anno: + self.disable_progressbar = True self.acc_anno = False nkeys = len(acc_GSE_list.keys()) ncount = 0 @@ -236,14 +295,18 @@ def get_project( ser_dict = self.fetch_all(input=input) project_dict["raw"] = ser_dict - return project_dict + new_pr_dict = {} + for pr_key in project_dict.keys(): + if project_dict[pr_key]: + new_pr_dict[pr_key] = project_dict[pr_key] + + return new_pr_dict def fetch_all(self, input: str, name: str = None): """Main script driver/workflow""" if name is not None: self.project_name = name - print("------------------------") print(self.project_name) else: try: @@ -284,7 +347,7 @@ def fetch_all(self, input: str, name: str = None): elif ncount == self.skip + 1: self._LOGGER.info(f"Skipped {self.skip} accessions. Starting now.") - if not self.just_object: + if not self.just_object or not self.acc_anno: self._LOGGER.info( f"\033[38;5;200mProcessing accession {ncount} of {nkeys}: '{acc_GSE}'\033[0m" ) @@ -2355,6 +2418,11 @@ def _parse_cmdl(cmdl): action="store_true", help="Use just the keys defined in this module when writing out metadata.", ) + raw_group.add_argument( + "--bam-conversion", + action="store_true", + help="specify this argument to convert bam files", + ) logmuse.add_logging_options(parser) return parser.parse_args(cmdl) diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index ae0029c..da9f788 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -133,6 +133,7 @@ class TestListRawMetaFiles: """ Testing downloading and saving raw files and metadata """ + @pytest.fixture(scope="function") def initiate_geofetcher(self, tmpdir): instance = Geofetcher( @@ -294,10 +295,12 @@ def test_large_meta_separation( assert samp == result_sample assert proj == result_proj + class TestPeppyInitProcessed: """ Testing downloading and saving raw files and metadata """ + @pytest.fixture(scope="function") def initiate_geofetcher(self, tmpdir): instance = Geofetcher( @@ -306,19 +309,20 @@ def initiate_geofetcher(self, tmpdir): name="test", metadata_folder=tmpdir, discard_soft=True, - data_source="all" + data_source="all", ) yield instance def test_creating_processed_peppy(self, initiate_geofetcher): p_prop = initiate_geofetcher.get_project("GSE190287") - assert isinstance(p_prop['_samples'], peppy.Project) - assert isinstance(p_prop['_series'], peppy.Project) + assert isinstance(p_prop["_samples"], peppy.Project) + assert isinstance(p_prop["_series"], peppy.Project) def test_number_of_samples(self, initiate_geofetcher): p_prop = initiate_geofetcher.get_project("GSE190287") - assert len(p_prop['_samples'].samples) == 8 #it has 11 files but 8 samples - assert len(p_prop['_series'].samples) == 2 + assert len(p_prop["_samples"].samples) == 8 # it has 11 files but 8 samples + assert len(p_prop["_series"].samples) == 2 + class TestPeppyInitRaw: """ @@ -338,12 +342,12 @@ def initiate_geofetcher(self, tmpdir): def test_creating_processed_peppy(self, initiate_geofetcher): p_prop = initiate_geofetcher.get_project("GSE189141") - assert isinstance(p_prop['raw'], peppy.Project) + assert isinstance(p_prop["raw"], peppy.Project) def test_number_of_samples(self, initiate_geofetcher): p_prop = initiate_geofetcher.get_project("GSE189141") - a = ([d['sample_name'] for d in p_prop['raw'].samples]) - assert len(p_prop['raw'].samples) == 16 #it has 16 samples + a = [d["sample_name"] for d in p_prop["raw"].samples] + assert len(p_prop["raw"].samples) == 16 # it has 16 samples def test_clean_func(tmpdir): From 528a7fc6afc3a900abe5bb29cff0c4e969a414d7 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 6 Sep 2022 14:47:20 -0400 Subject: [PATCH 34/61] Added documentation --- .github/workflows/run-codecov.yml | 2 +- docs/README.md | 18 + docs/changelog.md | 7 +- docs/img/arguments_outputs.svg | 8186 +++++++++++++++++ docs/img/pipeline.svg | 8072 ++++++++++++++++ docs/install.md | 32 +- docs/metadata_output.md | 9 +- docs/usage.md | 112 +- .../build/processed-data-downloading.md | 125 +- docs_jupyter/processed-data-downloading.ipynb | 152 +- docs_jupyter/python-usage.ipynb | 725 ++ geofetch/geofetch.py | 5 - mkdocs.yml | 2 + 13 files changed, 17128 insertions(+), 319 deletions(-) create mode 100644 docs/img/arguments_outputs.svg create mode 100644 docs/img/pipeline.svg create mode 100644 docs_jupyter/python-usage.ipynb diff --git a/.github/workflows/run-codecov.yml b/.github/workflows/run-codecov.yml index a41a1fd..1db19ff 100644 --- a/.github/workflows/run-codecov.yml +++ b/.github/workflows/run-codecov.yml @@ -2,7 +2,7 @@ name: Run codecov on: pull_request: - branches: [master] + branches: [master, dev] jobs: pytest: diff --git a/docs/README.md b/docs/README.md index 1cc4626..2f21910 100644 --- a/docs/README.md +++ b/docs/README.md @@ -12,6 +12,8 @@ - Produce a standardized [PEP](http://pepkit.github.io) sample table. This makes it really easy to run [looper](https://pepkit.github.io/docs/looper/)-compatible pipelines on public datasets by handling data acquisition and metadata formatting and standardization for you. - Prepare a project to run with [sraconvert](sra_convert.md) to convert SRA files into FASTQ files. +![](./img/pipeline.svg) + ## Quick example `geofetch` runs on the command line. This command will download the raw data and metadata for the given GSE number. @@ -38,5 +40,21 @@ geofetch -i GSE95654 --just-metadata geofetch -i GSE95654 --processed --just-metadata ``` +### Check out what exactly argument you want to use to download data: + +![](./img/arguments_outputs.svg) + +### New geofetch 0.11.0 feature: +- Now geofetch is available as Python package to straight initiate [peppy](http://peppy.databio.org/) projects without downloading any soft files. +```python +from geofetch import Geofetcher + +# initiate Geofetcher with all necessary arguments: +geof = Geofetcher(processed=True, acc_anno=True, discard_soft=True) + +# get projects by providing as input GSE or file with GSEs +geof.get_project("GSE160204") +``` + For more details, check out the [usage](usage.md) reference, [installation instructions](install.md), or head on over to the [tutorial for raw data](raw-data-downloading.md) and [tutorial for processed data](processed-data-downloading.md) for a detailed walkthrough. diff --git a/docs/changelog.md b/docs/changelog.md index 89bcb72..23c3729 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,11 +1,12 @@ # Changelog -## [0.11.0] -- 2022-XX-XX +## [0.11.0] -- 2022-09-06 - Added initialization of peppy Project without saving any files functionality - Added progress bar - Fixed None issue in config file -- Changed way of saving soft files to request library -- +- Switched way of saving soft files to request library +- Fixed saving raw peps bug +- Refactored code ## [0.10.1] -- 2022-08-04 - Updated metadata fetching requests from SRA database diff --git a/docs/img/arguments_outputs.svg b/docs/img/arguments_outputs.svg new file mode 100644 index 0000000..89cb3c5 --- /dev/null +++ b/docs/img/arguments_outputs.svg @@ -0,0 +1,8186 @@ + + + +--processed --just-metadata --data-source samples--processed--data-source samples--processed --just-metadata--data-source series--processed--data-source series--processed--data-source all--processed --just-metadata --data-source allArguments#12345678--just-metadataOutputData SourceSamplesSamplesSeriesSeriesallallSamplesSamplesMetadataProcessedProcessedProcessedProcessedProcessedProcessedRawRawDataProcessedProcessedProcessedNoneNoneNoneRawNone diff --git a/docs/img/pipeline.svg b/docs/img/pipeline.svg new file mode 100644 index 0000000..ef4d501 --- /dev/null +++ b/docs/img/pipeline.svg @@ -0,0 +1,8072 @@ + + + +geofetchgeofetchPortableEncapsulatedProjectpeppypeprGEO/SRAData diff --git a/docs/install.md b/docs/install.md index 355cd70..01c9c12 100644 --- a/docs/install.md +++ b/docs/install.md @@ -1,21 +1,5 @@ # Installing geofetch -## Prerequisites - -You must have the [sratoolkit from NCBI](https://www.ncbi.nlm.nih.gov/books/NBK158900/) installed, with the tools in your PATH. Once it's installed, you should check to make sure you can run `prefetch`. Also, make sure it's configured to store SRA files where you want them. For more information, see how to change sratools download location. - -## Setting data download location for `sratools` - -`geofetch` is using the [sratoolkit](https://trace.ncbi.nlm.nih.gov/Traces/sra/?view=toolkit_doc&f=std) to download raw data from SRA -- which means it's stuck with the [default path for downloading SRA data](http://databio.org/posts/downloading_sra_data.html), which is in your home directory. So before you run `geofetch`, make sure you have set up your download location to the correct place. In our group, we use a shared group environment variable called `${SRARAW}`, which points to a shared folder (`${DATA}/sra`) where the whole group has access to downloaded SRA data. You can point the `sratoolkit` (and therefore `geofetch`) to use that location with this one-time configuration code: - -``` -echo "/repository/user/main/public/root = \"$DATA\"" > ${HOME}/.ncbi/user-settings.mkfg -``` - -Now `sratoolkit` will download data into an `/sra` folder in `${DATA}`, which is what `${SRARAW}` points to. - -If you are getting an error that the `.ncbi` folder does not exist in your home directory, you can just make a folder `.ncbi` with an empty file `user-settings.mkfg` and follow the same command above. - ## Installing geofetch Releases are posted as [GitHub releases](https://github.com/pepkit/geofetch/releases), or you can install from PyPI using `pip`: @@ -35,3 +19,19 @@ If the executable in not in your $PATH, append this to your `.bashrc` or `.profi ``` export PATH=~/.local/bin:$PATH ``` + +## Prerequisites for SRA data downloading + +To download **raw data** You must have the [sratoolkit from NCBI](https://www.ncbi.nlm.nih.gov/books/NBK158900/) installed, with the tools in your PATH. Once it's installed, you should check to make sure you can run `prefetch`. Also, make sure it's configured to store SRA files where you want them. For more information, see how to change sratools download location. + +## Setting data download location for `sratools` + +`geofetch` is using the [sratoolkit](https://trace.ncbi.nlm.nih.gov/Traces/sra/?view=toolkit_doc&f=std) to download raw data from SRA -- which means it's stuck with the [default path for downloading SRA data](http://databio.org/posts/downloading_sra_data.html), which is in your home directory. So before you run `geofetch`, make sure you have set up your download location to the correct place. In our group, we use a shared group environment variable called `${SRARAW}`, which points to a shared folder (`${DATA}/sra`) where the whole group has access to downloaded SRA data. You can point the `sratoolkit` (and therefore `geofetch`) to use that location with this one-time configuration code: + +``` +echo "/repository/user/main/public/root = \"$DATA\"" > ${HOME}/.ncbi/user-settings.mkfg +``` + +Now `sratoolkit` will download data into an `/sra` folder in `${DATA}`, which is what `${SRARAW}` points to. + +If you are getting an error that the `.ncbi` folder does not exist in your home directory, you can just make a folder `.ncbi` with an empty file `user-settings.mkfg` and follow the same command above. \ No newline at end of file diff --git a/docs/metadata_output.md b/docs/metadata_output.md index 3c2eace..e9f5fde 100644 --- a/docs/metadata_output.md +++ b/docs/metadata_output.md @@ -1,12 +1,17 @@ # Metadata output -For each GSE input accession (ACC), `geofetch` produces: +Geofetch produces [PEPs](http://pep.databio.org/) for either processed or raw data (including metadata from SRA). + +# Outdated: + +For each GSE input accession (ACC), `geofetch` produces (if discard-soft is not set): - GSE_ACC####.soft a SOFT file (annotating the experiment itself) - GSM_ACC####.soft a SOFT file (annotating the samples within the experiment) - SRA_ACC####.soft a CSV file (annotating each SRA Run, retrieved from GSE->GSM->SRA) -In addition, a single combined metadata file (.csv) for the whole input, +For raw data: +a single combined metadata file (.csv) will be created for the whole input, including SRA and GSM annotations for each sample. Here, "combined" means that it will have rows for every sample in every GSE included in your input. So if you just gave a single GSE, then the combined file is the same as the GSE file. If any "merged" samples exist diff --git a/docs/usage.md b/docs/usage.md index 89b3fad..29ba6b1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,19 +1,24 @@ -# - -Usage reference - -`geofetch` command-line usage instructions: +# Usage reference +geofetch command-line usage instructions: +`geofetch -V` +```console +geofetch 0.11.0 +``` `geofetch --help` -```{console} -usage: geofetch [-h] [-V] -i INPUT [-n NAME] [-m METADATA_ROOT] [-u METADATA_FOLDER] [--just-metadata] [-r] [--config-template CONFIG_TEMPLATE] - [--pipeline_samples PIPELINE_SAMPLES] [--pipeline_project PIPELINE_PROJECT] [-k SKIP] [--acc-anno] [--discard-soft] - [--const-limit-project CONST_LIMIT_PROJECT] [--const-limit-discard CONST_LIMIT_DISCARD] - [--attr-limit-truncate ATTR_LIMIT_TRUNCATE] [-p] [--data-source {all,samples,series}] [--filter FILTER] - [--filter-size FILTER_SIZE] [-g GEO_FOLDER] [-x] [-b BAM_FOLDER] [-f FQ_FOLDER] [--use-key-subset] [--silent] [--verbosity V] - [--logdev] +```console +usage: geofetch [-h] [-V] -i INPUT [-n NAME] [-m METADATA_ROOT] [-u METADATA_FOLDER] + [--just-metadata] [-r] [--config-template CONFIG_TEMPLATE] + [--pipeline-samples PIPELINE_SAMPLES] [--pipeline-project PIPELINE_PROJECT] + [--disable-progressbar] [-k SKIP] [--acc-anno] [--discard-soft] + [--const-limit-project CONST_LIMIT_PROJECT] + [--const-limit-discard CONST_LIMIT_DISCARD] + [--attr-limit-truncate ATTR_LIMIT_TRUNCATE] [--add-dotfile] [-p] + [--data-source {all,samples,series}] [--filter FILTER] + [--filter-size FILTER_SIZE] [-g GEO_FOLDER] [-x] [-b BAM_FOLDER] + [-f FQ_FOLDER] [--use-key-subset] [--silent] [--verbosity V] [--logdev] Automatic GEO and SRA data downloader @@ -21,34 +26,47 @@ optional arguments: -h, --help show this help message and exit -V, --version show program's version number and exit -i INPUT, --input INPUT - required: a GEO (GSE) accession, or a file with a list of GSE numbers + required: a GEO (GSE) accession, or a file with a list of GSE + numbers -n NAME, --name NAME Specify a project name. Defaults to GSE number -m METADATA_ROOT, --metadata-root METADATA_ROOT - Specify a parent folder location to store metadata. The project name will be added as a subfolder [Default: $SRAMETA:] + Specify a parent folder location to store metadata. The project name + will be added as a subfolder [Default: $SRAMETA:] -u METADATA_FOLDER, --metadata-folder METADATA_FOLDER - Specify an absolute folder location to store metadata. No subfolder will be added. Overrides value of --metadata-root - [Default: Not used (--metadata-root is used by default)] + Specify an absolute folder location to store metadata. No subfolder + will be added. Overrides value of --metadata-root [Default: Not used + (--metadata-root is used by default)] --just-metadata If set, don't actually run downloads, just create metadata -r, --refresh-metadata If set, re-download metadata even if it exists. --config-template CONFIG_TEMPLATE Project config yaml file template. - --pipeline_samples PIPELINE_SAMPLES - Optional: Specify one or more filepaths to SAMPLES pipeline interface yaml files. These will be added to the project - config file to make it immediately compatible with looper. [Default: null] - --pipeline_project PIPELINE_PROJECT - Optional: Specify one or more filepaths to PROJECT pipeline interface yaml files. These will be added to the project - config file to make it immediately compatible with looper. [Default: null] + --pipeline-samples PIPELINE_SAMPLES + Optional: Specify one or more filepaths to SAMPLES pipeline + interface yaml files. These will be added to the project config file + to make it immediately compatible with looper. [Default: null] + --pipeline-project PIPELINE_PROJECT + Optional: Specify one or more filepaths to PROJECT pipeline + interface yaml files. These will be added to the project config file + to make it immediately compatible with looper. [Default: null] + --disable-progressbar + Optional: Disable progressbar -k SKIP, --skip SKIP Skip some accessions. [Default: no skip]. - --acc-anno Optional: Produce annotation sheets for each accession. Project combined PEP for the whole project won't be produced. - --discard-soft Optional: After creation of PEP files, all soft and additional files will be deleted + --acc-anno Optional: Produce annotation sheets for each accession. Project + combined PEP for the whole project won't be produced. + --discard-soft Optional: After creation of PEP files, all soft and additional files + will be deleted --const-limit-project CONST_LIMIT_PROJECT - Optional: Limit of the number of the constant sample characters that should not be in project yaml. [Default: 50] + Optional: Limit of the number of the constant sample characters that + should not be in project yaml. [Default: 50] --const-limit-discard CONST_LIMIT_DISCARD - Optional: Limit of the number of the constant sample characters that should not be discarded [Default: 250] + Optional: Limit of the number of the constant sample characters that + should not be discarded [Default: 250] --attr-limit-truncate ATTR_LIMIT_TRUNCATE - Optional: Limit of the number of sample characters.Any attribute with more than X characters will truncate to the first - X, where X is a number of characters [Default: 500] + Optional: Limit of the number of sample characters.Any attribute + with more than X characters will truncate to the first X, where X is + a number of characters [Default: 500] + --add-dotfile Optional: Add .pep.yaml file that points .yaml PEP file --silent Silence logging. Overrides verbosity. --verbosity V Set logging level (1-5 or logging module level name) --logdev Expand content of logging message format. @@ -56,27 +74,35 @@ optional arguments: processed: -p, --processed Download processed data [Default: download raw data]. --data-source {all,samples,series} - Optional: Specifies the source of data on the GEO record to retrieve processed data, which may be attached to the - collective series entity, or to individual samples. Allowable values are: samples, series or both (all). Ignored unless - 'processed' flag is set. [Default: samples] - --filter FILTER Optional: Filter regex for processed filenames [Default: None].Ignored unless 'processed' flag is set. + Optional: Specifies the source of data on the GEO record to retrieve + processed data, which may be attached to the collective series + entity, or to individual samples. Allowable values are: samples, + series or both (all). Ignored unless 'processed' flag is set. + [Default: samples] + --filter FILTER Optional: Filter regex for processed filenames [Default: + None].Ignored unless 'processed' flag is set. --filter-size FILTER_SIZE - Optional: Filter size for processed files that are stored as sample repository [Default: None]. Works only for sample - data. Supported input formats : 12B, 12KB, 12MB, 12GB. Ignored unless 'processed' flag is set. + Optional: Filter size for processed files that are stored as sample + repository [Default: None]. Works only for sample data. Supported + input formats : 12B, 12KB, 12MB, 12GB. Ignored unless 'processed' + flag is set. -g GEO_FOLDER, --geo-folder GEO_FOLDER - Optional: Specify a location to store processed GEO files. Ignored unless 'processed' flag is set.[Default: $GEODATA:] + Optional: Specify a location to store processed GEO files. Ignored + unless 'processed' flag is set.[Default: $GEODATA:] raw: -x, --split-experiments - Split SRR runs into individual samples. By default, SRX experiments with multiple SRR Runs will have a single entry in - the annotation table, with each run as a separate row in the subannotation table. This setting instead treats each run as - a separate sample + Split SRR runs into individual samples. By default, SRX experiments + with multiple SRR Runs will have a single entry in the annotation + table, with each run as a separate row in the subannotation table. + This setting instead treats each run as a separate sample -b BAM_FOLDER, --bam-folder BAM_FOLDER - Optional: Specify folder of bam files. Geofetch will not download sra files when corresponding bam files already exist. - [Default: $SRABAM:] + Optional: Specify folder of bam files. Geofetch will not download + sra files when corresponding bam files already exist. [Default: + $SRABAM:] -f FQ_FOLDER, --fq-folder FQ_FOLDER - Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding fastq files already - exist. [Default: $SRAFQ:] + Optional: Specify folder of fastq files. Geofetch will not download + sra files when corresponding fastq files already exist. [Default: + $SRAFQ:] --use-key-subset Use just the keys defined in this module when writing out metadata. - ``` diff --git a/docs_jupyter/build/processed-data-downloading.md b/docs_jupyter/build/processed-data-downloading.md index 052d413..b851a61 100644 --- a/docs_jupyter/build/processed-data-downloading.md +++ b/docs_jupyter/build/processed-data-downloading.md @@ -15,126 +15,6 @@ geofetch 0.10.1 To see your CLI options, invoke `geofetch -h`: - -```bash -geofetch -h -``` - -```.output -usage: geofetch [-h] [-V] -i INPUT [-n NAME] [-m METADATA_ROOT] - [-u METADATA_FOLDER] [--just-metadata] [-r] - [--config-template CONFIG_TEMPLATE] - [--pipeline-samples PIPELINE_SAMPLES] - [--pipeline-project PIPELINE_PROJECT] [-k SKIP] [--acc-anno] - [--discard-soft] [--const-limit-project CONST_LIMIT_PROJECT] - [--const-limit-discard CONST_LIMIT_DISCARD] - [--attr-limit-truncate ATTR_LIMIT_TRUNCATE] [--add-dotfile] - [-p] [--data-source {all,samples,series}] [--filter FILTER] - [--filter-size FILTER_SIZE] [-g GEO_FOLDER] [-x] - [-b BAM_FOLDER] [-f FQ_FOLDER] [--use-key-subset] [--silent] - [--verbosity V] [--logdev] - -Automatic GEO and SRA data downloader - -optional arguments: - -h, --help show this help message and exit - -V, --version show program's version number and exit - -i INPUT, --input INPUT - required: a GEO (GSE) accession, or a file with a list - of GSE numbers - -n NAME, --name NAME Specify a project name. Defaults to GSE number - -m METADATA_ROOT, --metadata-root METADATA_ROOT - Specify a parent folder location to store metadata. - The project name will be added as a subfolder - [Default: $SRAMETA:] - -u METADATA_FOLDER, --metadata-folder METADATA_FOLDER - Specify an absolute folder location to store metadata. - No subfolder will be added. Overrides value of - --metadata-root [Default: Not used (--metadata-root is - used by default)] - --just-metadata If set, don't actually run downloads, just create - metadata - -r, --refresh-metadata - If set, re-download metadata even if it exists. - --config-template CONFIG_TEMPLATE - Project config yaml file template. - --pipeline-samples PIPELINE_SAMPLES - Optional: Specify one or more filepaths to SAMPLES - pipeline interface yaml files. These will be added to - the project config file to make it immediately - compatible with looper. [Default: null] - --pipeline-project PIPELINE_PROJECT - Optional: Specify one or more filepaths to PROJECT - pipeline interface yaml files. These will be added to - the project config file to make it immediately - compatible with looper. [Default: null] - -k SKIP, --skip SKIP Skip some accessions. [Default: no skip]. - --acc-anno Optional: Produce annotation sheets for each - accession. Project combined PEP for the whole project - won't be produced. - --discard-soft Optional: After creation of PEP files, all soft and - additional files will be deleted - --const-limit-project CONST_LIMIT_PROJECT - Optional: Limit of the number of the constant sample - characters that should not be in project yaml. - [Default: 50] - --const-limit-discard CONST_LIMIT_DISCARD - Optional: Limit of the number of the constant sample - characters that should not be discarded [Default: 250] - --attr-limit-truncate ATTR_LIMIT_TRUNCATE - Optional: Limit of the number of sample characters.Any - attribute with more than X characters will truncate to - the first X, where X is a number of characters - [Default: 500] - --add-dotfile Optional: Add .pep.yaml file that points .yaml PEP - file - --silent Silence logging. Overrides verbosity. - --verbosity V Set logging level (1-5 or logging module level name) - --logdev Expand content of logging message format. - -processed: - -p, --processed Download processed data [Default: download raw data]. - --data-source {all,samples,series} - Optional: Specifies the source of data on the GEO - record to retrieve processed data, which may be - attached to the collective series entity, or to - individual samples. Allowable values are: samples, - series or both (all). Ignored unless 'processed' flag - is set. [Default: samples] - --filter FILTER Optional: Filter regex for processed filenames - [Default: None].Ignored unless 'processed' flag is - set. - --filter-size FILTER_SIZE - Optional: Filter size for processed files that are - stored as sample repository [Default: None]. Works - only for sample data. Supported input formats : 12B, - 12KB, 12MB, 12GB. Ignored unless 'processed' flag is - set. - -g GEO_FOLDER, --geo-folder GEO_FOLDER - Optional: Specify a location to store processed GEO - files. Ignored unless 'processed' flag is - set.[Default: $GEODATA:] - -raw: - -x, --split-experiments - Split SRR runs into individual samples. By default, - SRX experiments with multiple SRR Runs will have a - single entry in the annotation table, with each run as - a separate row in the subannotation table. This - setting instead treats each run as a separate sample - -b BAM_FOLDER, --bam-folder BAM_FOLDER - Optional: Specify folder of bam files. Geofetch will - not download sra files when corresponding bam files - already exist. [Default: $SRABAM:] - -f FQ_FOLDER, --fq-folder FQ_FOLDER - Optional: Specify folder of fastq files. Geofetch will - not download sra files when corresponding fastq files - already exist. [Default: $SRAFQ:] - --use-key-subset Use just the keys defined in this module when writing - out metadata. - -``` - Calling geofetch will do 4 tasks: 1. download all or filtered processed files from `GSE#####` into your geo folder. @@ -144,6 +24,11 @@ Calling geofetch will do 4 tasks: Complete details about geofetch outputs is cataloged in the [metadata outputs reference](metadata_output.md). +from IPython.core.display import SVG +SVG(filename='logo.svg') + +![arguments_outputs.svg](attachment:arguments_outputs.svg) + ## Download the data First, create the metadata for processed data (by adding --processed and --just-metadata): diff --git a/docs_jupyter/processed-data-downloading.ipynb b/docs_jupyter/processed-data-downloading.ipynb index c691aea..b386a74 100644 --- a/docs_jupyter/processed-data-downloading.ipynb +++ b/docs_jupyter/processed-data-downloading.ipynb @@ -33,135 +33,6 @@ "To see your CLI options, invoke `geofetch -h`:" ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: geofetch [-h] [-V] -i INPUT [-n NAME] [-m METADATA_ROOT]\n", - " [-u METADATA_FOLDER] [--just-metadata] [-r]\n", - " [--config-template CONFIG_TEMPLATE]\n", - " [--pipeline-samples PIPELINE_SAMPLES]\n", - " [--pipeline-project PIPELINE_PROJECT] [-k SKIP] [--acc-anno]\n", - " [--discard-soft] [--const-limit-project CONST_LIMIT_PROJECT]\n", - " [--const-limit-discard CONST_LIMIT_DISCARD]\n", - " [--attr-limit-truncate ATTR_LIMIT_TRUNCATE] [--add-dotfile]\n", - " [-p] [--data-source {all,samples,series}] [--filter FILTER]\n", - " [--filter-size FILTER_SIZE] [-g GEO_FOLDER] [-x]\n", - " [-b BAM_FOLDER] [-f FQ_FOLDER] [--use-key-subset] [--silent]\n", - " [--verbosity V] [--logdev]\n", - "\n", - "Automatic GEO and SRA data downloader\n", - "\n", - "optional arguments:\n", - " -h, --help show this help message and exit\n", - " -V, --version show program's version number and exit\n", - " -i INPUT, --input INPUT\n", - " required: a GEO (GSE) accession, or a file with a list\n", - " of GSE numbers\n", - " -n NAME, --name NAME Specify a project name. Defaults to GSE number\n", - " -m METADATA_ROOT, --metadata-root METADATA_ROOT\n", - " Specify a parent folder location to store metadata.\n", - " The project name will be added as a subfolder\n", - " [Default: $SRAMETA:]\n", - " -u METADATA_FOLDER, --metadata-folder METADATA_FOLDER\n", - " Specify an absolute folder location to store metadata.\n", - " No subfolder will be added. Overrides value of\n", - " --metadata-root [Default: Not used (--metadata-root is\n", - " used by default)]\n", - " --just-metadata If set, don't actually run downloads, just create\n", - " metadata\n", - " -r, --refresh-metadata\n", - " If set, re-download metadata even if it exists.\n", - " --config-template CONFIG_TEMPLATE\n", - " Project config yaml file template.\n", - " --pipeline-samples PIPELINE_SAMPLES\n", - " Optional: Specify one or more filepaths to SAMPLES\n", - " pipeline interface yaml files. These will be added to\n", - " the project config file to make it immediately\n", - " compatible with looper. [Default: null]\n", - " --pipeline-project PIPELINE_PROJECT\n", - " Optional: Specify one or more filepaths to PROJECT\n", - " pipeline interface yaml files. These will be added to\n", - " the project config file to make it immediately\n", - " compatible with looper. [Default: null]\n", - " -k SKIP, --skip SKIP Skip some accessions. [Default: no skip].\n", - " --acc-anno Optional: Produce annotation sheets for each\n", - " accession. Project combined PEP for the whole project\n", - " won't be produced.\n", - " --discard-soft Optional: After creation of PEP files, all soft and\n", - " additional files will be deleted\n", - " --const-limit-project CONST_LIMIT_PROJECT\n", - " Optional: Limit of the number of the constant sample\n", - " characters that should not be in project yaml.\n", - " [Default: 50]\n", - " --const-limit-discard CONST_LIMIT_DISCARD\n", - " Optional: Limit of the number of the constant sample\n", - " characters that should not be discarded [Default: 250]\n", - " --attr-limit-truncate ATTR_LIMIT_TRUNCATE\n", - " Optional: Limit of the number of sample characters.Any\n", - " attribute with more than X characters will truncate to\n", - " the first X, where X is a number of characters\n", - " [Default: 500]\n", - " --add-dotfile Optional: Add .pep.yaml file that points .yaml PEP\n", - " file\n", - " --silent Silence logging. Overrides verbosity.\n", - " --verbosity V Set logging level (1-5 or logging module level name)\n", - " --logdev Expand content of logging message format.\n", - "\n", - "processed:\n", - " -p, --processed Download processed data [Default: download raw data].\n", - " --data-source {all,samples,series}\n", - " Optional: Specifies the source of data on the GEO\n", - " record to retrieve processed data, which may be\n", - " attached to the collective series entity, or to\n", - " individual samples. Allowable values are: samples,\n", - " series or both (all). Ignored unless 'processed' flag\n", - " is set. [Default: samples]\n", - " --filter FILTER Optional: Filter regex for processed filenames\n", - " [Default: None].Ignored unless 'processed' flag is\n", - " set.\n", - " --filter-size FILTER_SIZE\n", - " Optional: Filter size for processed files that are\n", - " stored as sample repository [Default: None]. Works\n", - " only for sample data. Supported input formats : 12B,\n", - " 12KB, 12MB, 12GB. Ignored unless 'processed' flag is\n", - " set.\n", - " -g GEO_FOLDER, --geo-folder GEO_FOLDER\n", - " Optional: Specify a location to store processed GEO\n", - " files. Ignored unless 'processed' flag is\n", - " set.[Default: $GEODATA:]\n", - "\n", - "raw:\n", - " -x, --split-experiments\n", - " Split SRR runs into individual samples. By default,\n", - " SRX experiments with multiple SRR Runs will have a\n", - " single entry in the annotation table, with each run as\n", - " a separate row in the subannotation table. This\n", - " setting instead treats each run as a separate sample\n", - " -b BAM_FOLDER, --bam-folder BAM_FOLDER\n", - " Optional: Specify folder of bam files. Geofetch will\n", - " not download sra files when corresponding bam files\n", - " already exist. [Default: $SRABAM:]\n", - " -f FQ_FOLDER, --fq-folder FQ_FOLDER\n", - " Optional: Specify folder of fastq files. Geofetch will\n", - " not download sra files when corresponding fastq files\n", - " already exist. [Default: $SRAFQ:]\n", - " --use-key-subset Use just the keys defined in this module when writing\n", - " out metadata.\n" - ] - } - ], - "source": [ - "geofetch -h" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -176,6 +47,29 @@ "Complete details about geofetch outputs is cataloged in the [metadata outputs reference](metadata_output.md)." ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "from IPython.core.display import SVG\n", + "SVG(filename='logo.svg')" + ] + }, + { + "attachments": { + "arguments_outputs.svg": { + "image/svg+xml": [ + "<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->

<svg
   width="82.850273mm"
   height="52.748283mm"
   viewBox="0 0 82.850273 52.748283"
   version="1.1"
   id="svg5"
   xml:space="preserve"
   sodipodi:docname="7a5380f5-bda7-468b-9a5b-9c77af4cac34.svg"
   inkscape:version="1.2 (1:1.2+202206011327+fc4e4096c5)"
   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
   xmlns:xlink="http://www.w3.org/1999/xlink"
   xmlns="http://www.w3.org/2000/svg"
   xmlns:svg="http://www.w3.org/2000/svg"><sodipodi:namedview
     id="namedview7"
     pagecolor="#ffffff"
     bordercolor="#000000"
     borderopacity="0.25"
     inkscape:showpageshadow="2"
     inkscape:pageopacity="0.0"
     inkscape:pagecheckerboard="0"
     inkscape:deskcolor="#d1d1d1"
     inkscape:document-units="mm"
     showgrid="false"
     inkscape:zoom="1.237125"
     inkscape:cx="248.96433"
     inkscape:cy="-29.50389"
     inkscape:window-width="1848"
     inkscape:window-height="1016"
     inkscape:window-x="72"
     inkscape:window-y="27"
     inkscape:window-maximized="1"
     inkscape:current-layer="layer1"
     showguides="true" /><defs
     id="defs2"><rect
       x="73.488663"
       y="174.46588"
       width="129.61227"
       height="25.640112"
       id="rect10485" /><rect
       x="89.490662"
       y="225.12338"
       width="622.98389"
       height="412.75418"
       id="rect747" /><rect
       x="509.93341"
       y="481.48856"
       width="208.25388"
       height="68.723083"
       id="rect741" /><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Drop Shadow"
       id="filter4720"
       x="-0.089591049"
       y="-0.22882433"
       width="1.1528318"
       height="1.3903474"><feFlood
         flood-opacity="0.498039"
         flood-color="rgb(0,0,0)"
         result="flood"
         id="feFlood4722" /><feComposite
         in="flood"
         in2="SourceGraphic"
         operator="out"
         result="composite1"
         id="feComposite4724" /><feGaussianBlur
         in="composite1"
         stdDeviation="2"
         result="blur"
         id="feGaussianBlur4726" /><feOffset
         dx="-2"
         dy="-2"
         result="offset"
         id="feOffset4728" /><feComposite
         in="offset"
         in2="SourceGraphic"
         operator="atop"
         result="composite2"
         id="feComposite4730" /></filter><linearGradient
       x1="26.648937"
       y1="20.603781"
       x2="135.66525"
       y2="114.39767"
       id="linearGradient1478"
       xlink:href="#linearGradient4689"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)" /><linearGradient
       id="linearGradient4689"><stop
         style="stop-color:#5a9fd4;stop-opacity:1"
         offset="0"
         id="stop4691" /><stop
         style="stop-color:#306998;stop-opacity:1"
         offset="1"
         id="stop4693" /></linearGradient><linearGradient
       x1="150.96111"
       y1="192.35176"
       x2="112.03144"
       y2="137.27299"
       id="linearGradient1475"
       xlink:href="#linearGradient4671"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)" /><linearGradient
       id="linearGradient4671"><stop
         style="stop-color:#ffd43b;stop-opacity:1"
         offset="0"
         id="stop4673" /><stop
         style="stop-color:#ffe873;stop-opacity:1"
         offset="1"
         id="stop4675" /></linearGradient><radialGradient
       cx="61.518883"
       cy="132.28575"
       r="29.036913"
       fx="61.518883"
       fy="132.28575"
       id="radialGradient1480"
       xlink:href="#linearGradient2795"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-16.490672,188.65894)" /><linearGradient
       id="linearGradient2795"><stop
         style="stop-color:#b8b8b8;stop-opacity:0.49803922"
         offset="0"
         id="stop2797" /><stop
         style="stop-color:#7f7f7f;stop-opacity:0"
         offset="1"
         id="stop2799" /></linearGradient><linearGradient
       id="gradientFill-1"
       x1="0.7414425"
       x2="590.86261"
       y1="3.6658268"
       y2="593.78699"
       gradientUnits="userSpaceOnUse"
       spreadMethod="pad"
       gradientTransform="matrix(0.11494196,0,0,0.07697987,293.05555,752.18406)"><stop
         offset="0"
         stop-color="rgb(203,206,208)"
         stop-opacity="1"
         id="stop7" /><stop
         offset="1"
         stop-color="rgb(132,131,139)"
         stop-opacity="1"
         id="stop9" /></linearGradient><linearGradient
       id="gradientFill-2"
       x1="301.02603"
       x2="703.0675"
       y1="151.4006"
       y2="553.44208"
       gradientUnits="userSpaceOnUse"
       spreadMethod="pad"
       gradientTransform="matrix(0.09311939,0,0,0.09502015,293.05555,752.18406)"><stop
         offset="0"
         stop-color="rgb(39,109,195)"
         stop-opacity="1"
         id="stop12" /><stop
         offset="1"
         stop-color="rgb(22,92,170)"
         stop-opacity="1"
         id="stop14" /></linearGradient><style
       type="text/css"
       id="style6">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><rect
       x="464.36908"
       y="335.98523"
       width="205.5535"
       height="130.52345"
       id="rect4431" /><rect
       x="285.05399"
       y="323.80231"
       width="42.166115"
       height="22.848269"
       id="rect3912" /><rect
       x="73.488663"
       y="174.46588"
       width="129.61227"
       height="25.640112"
       id="rect10485-3" /><rect
       x="89.490662"
       y="225.12338"
       width="622.98389"
       height="412.75418"
       id="rect747-6" /><rect
       x="509.93341"
       y="481.48856"
       width="208.25388"
       height="68.723083"
       id="rect741-7" /><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Drop Shadow"
       id="filter4720-5"
       x="-0.089591049"
       y="-0.22882433"
       width="1.1528318"
       height="1.3903474"><feFlood
         flood-opacity="0.498039"
         flood-color="rgb(0,0,0)"
         result="flood"
         id="feFlood4722-3" /><feComposite
         in="flood"
         in2="SourceGraphic"
         operator="out"
         result="composite1"
         id="feComposite4724-5" /><feGaussianBlur
         in="composite1"
         stdDeviation="2"
         result="blur"
         id="feGaussianBlur4726-6" /><feOffset
         dx="-2"
         dy="-2"
         result="offset"
         id="feOffset4728-2" /><feComposite
         in="offset"
         in2="SourceGraphic"
         operator="atop"
         result="composite2"
         id="feComposite4730-9" /></filter><radialGradient
       cx="61.518883"
       cy="132.28575"
       r="29.036913"
       fx="61.518883"
       fy="132.28575"
       id="radialGradient1480-9"
       xlink:href="#linearGradient2795"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-16.490672,188.65894)" /><style
       type="text/css"
       id="style6-18">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2-7">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-1">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-0">
   
    .fil0 {fill:#000000;fill-rule:nonzero}
   
  </style><rect
       x="73.488663"
       y="174.46588"
       width="129.61227"
       height="25.640112"
       id="rect10485-9" /><rect
       x="89.490662"
       y="225.12338"
       width="622.98389"
       height="412.75418"
       id="rect747-4" /><rect
       x="509.93341"
       y="481.48856"
       width="208.25388"
       height="68.723083"
       id="rect741-9" /><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Drop Shadow"
       id="filter4720-0"
       x="-0.089591049"
       y="-0.22882433"
       width="1.1528318"
       height="1.3903474"><feFlood
         flood-opacity="0.498039"
         flood-color="rgb(0,0,0)"
         result="flood"
         id="feFlood4722-9" /><feComposite
         in="flood"
         in2="SourceGraphic"
         operator="out"
         result="composite1"
         id="feComposite4724-1" /><feGaussianBlur
         in="composite1"
         stdDeviation="2"
         result="blur"
         id="feGaussianBlur4726-7" /><feOffset
         dx="-2"
         dy="-2"
         result="offset"
         id="feOffset4728-7" /><feComposite
         in="offset"
         in2="SourceGraphic"
         operator="atop"
         result="composite2"
         id="feComposite4730-1" /></filter><radialGradient
       cx="61.518883"
       cy="132.28575"
       r="29.036913"
       fx="61.518883"
       fy="132.28575"
       id="radialGradient1480-7"
       xlink:href="#linearGradient2795"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-16.490672,188.65894)" /><style
       type="text/css"
       id="style6-3">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2-9">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><linearGradient
       id="linearGradient2869"
       inkscape:swatch="solid"><stop
         style="stop-color:#000000;stop-opacity:1;"
         offset="0"
         id="stop2867" /></linearGradient><linearGradient
       id="linearGradient2811"
       inkscape:swatch="solid"><stop
         style="stop-color:#ffe443;stop-opacity:1;"
         offset="0"
         id="stop2809" /></linearGradient><style
       type="text/css"
       id="style6-35">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath1241"><path
         d="M -1.999995,-2.0000045 H 503 V 295 H -1.999995 Z"
         id="path1243" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath1245"><path
         d="M -1.999995,-2.0000045 H 503 V 295 H -1.999995 Z"
         id="path1247" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath1249"><path
         d="M -1.999995,-2.0000045 H 503 V 295 H -1.999995 Z"
         id="path1251" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath2405"><path
         d="M -2,-2 H 525 V 295 H -2 Z"
         id="path2403" /></clipPath><rect
       x="73.488663"
       y="174.46588"
       width="129.61227"
       height="25.640112"
       id="rect10485-2" /><rect
       x="89.490662"
       y="225.12338"
       width="622.98389"
       height="412.75418"
       id="rect747-2" /><rect
       x="509.93341"
       y="481.48856"
       width="208.25388"
       height="68.723083"
       id="rect741-8" /><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Drop Shadow"
       id="filter4720-9"
       x="-0.089591049"
       y="-0.22882433"
       width="1.1528318"
       height="1.3903474"><feFlood
         flood-opacity="0.498039"
         flood-color="rgb(0,0,0)"
         result="flood"
         id="feFlood4722-7" /><feComposite
         in="flood"
         in2="SourceGraphic"
         operator="out"
         result="composite1"
         id="feComposite4724-3" /><feGaussianBlur
         in="composite1"
         stdDeviation="2"
         result="blur"
         id="feGaussianBlur4726-61" /><feOffset
         dx="-2"
         dy="-2"
         result="offset"
         id="feOffset4728-29" /><feComposite
         in="offset"
         in2="SourceGraphic"
         operator="atop"
         result="composite2"
         id="feComposite4730-3" /></filter><linearGradient
       x1="26.648937"
       y1="20.603781"
       x2="135.66525"
       y2="114.39767"
       id="linearGradient1478-1"
       xlink:href="#linearGradient4689"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)" /><linearGradient
       x1="150.96111"
       y1="192.35176"
       x2="112.03144"
       y2="137.27299"
       id="linearGradient1475-7"
       xlink:href="#linearGradient4671"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)" /><radialGradient
       cx="61.518883"
       cy="132.28575"
       r="29.036913"
       fx="61.518883"
       fy="132.28575"
       id="radialGradient1480-5"
       xlink:href="#linearGradient2795"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-16.490672,188.65894)" /><linearGradient
       id="gradientFill-1-6"
       x1="0.7414425"
       x2="590.86261"
       y1="3.6658268"
       y2="593.78699"
       gradientUnits="userSpaceOnUse"
       spreadMethod="pad"
       gradientTransform="matrix(0.11494196,0,0,0.07697987,293.05555,752.18406)"><stop
         offset="0"
         stop-color="rgb(203,206,208)"
         stop-opacity="1"
         id="stop7-1" /><stop
         offset="1"
         stop-color="rgb(132,131,139)"
         stop-opacity="1"
         id="stop9-0" /></linearGradient><linearGradient
       id="gradientFill-2-6"
       x1="301.02603"
       x2="703.0675"
       y1="151.4006"
       y2="553.44208"
       gradientUnits="userSpaceOnUse"
       spreadMethod="pad"
       gradientTransform="matrix(0.09311939,0,0,0.09502015,293.05555,752.18406)"><stop
         offset="0"
         stop-color="rgb(39,109,195)"
         stop-opacity="1"
         id="stop12-3" /><stop
         offset="1"
         stop-color="rgb(22,92,170)"
         stop-opacity="1"
         id="stop14-2" /></linearGradient><style
       type="text/css"
       id="style6-06">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2-1">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><rect
       x="73.488663"
       y="174.46588"
       width="129.61227"
       height="25.640112"
       id="rect10485-38" /><rect
       x="89.490662"
       y="225.12338"
       width="622.98389"
       height="412.75418"
       id="rect747-60" /><rect
       x="509.93341"
       y="481.48856"
       width="208.25388"
       height="68.723083"
       id="rect741-4" /><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Drop Shadow"
       id="filter4720-8"
       x="-0.089591049"
       y="-0.22882433"
       width="1.1528318"
       height="1.3903474"><feFlood
         flood-opacity="0.498039"
         flood-color="rgb(0,0,0)"
         result="flood"
         id="feFlood4722-8" /><feComposite
         in="flood"
         in2="SourceGraphic"
         operator="out"
         result="composite1"
         id="feComposite4724-8" /><feGaussianBlur
         in="composite1"
         stdDeviation="2"
         result="blur"
         id="feGaussianBlur4726-9" /><feOffset
         dx="-2"
         dy="-2"
         result="offset"
         id="feOffset4728-77" /><feComposite
         in="offset"
         in2="SourceGraphic"
         operator="atop"
         result="composite2"
         id="feComposite4730-6" /></filter><radialGradient
       cx="61.518883"
       cy="132.28575"
       r="29.036913"
       fx="61.518883"
       fy="132.28575"
       id="radialGradient1480-2"
       xlink:href="#linearGradient2795"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-16.490672,188.65894)" /><style
       type="text/css"
       id="style6-22">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2-4">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><rect
       x="464.36908"
       y="335.98523"
       width="205.5535"
       height="130.52345"
       id="rect4431-7" /><rect
       x="285.05399"
       y="323.80231"
       width="42.166115"
       height="22.848269"
       id="rect3912-7" /><rect
       x="73.488663"
       y="174.46588"
       width="129.61227"
       height="25.640112"
       id="rect10485-3-5" /><rect
       x="89.490662"
       y="225.12338"
       width="622.98389"
       height="412.75418"
       id="rect747-6-4" /><rect
       x="509.93341"
       y="481.48856"
       width="208.25388"
       height="68.723083"
       id="rect741-7-8" /><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Drop Shadow"
       id="filter4720-5-1"
       x="-0.089591049"
       y="-0.22882433"
       width="1.1528318"
       height="1.3903474"><feFlood
         flood-opacity="0.498039"
         flood-color="rgb(0,0,0)"
         result="flood"
         id="feFlood4722-3-2" /><feComposite
         in="flood"
         in2="SourceGraphic"
         operator="out"
         result="composite1"
         id="feComposite4724-5-8" /><feGaussianBlur
         in="composite1"
         stdDeviation="2"
         result="blur"
         id="feGaussianBlur4726-6-9" /><feOffset
         dx="-2"
         dy="-2"
         result="offset"
         id="feOffset4728-2-3" /><feComposite
         in="offset"
         in2="SourceGraphic"
         operator="atop"
         result="composite2"
         id="feComposite4730-9-6" /></filter><radialGradient
       cx="61.518883"
       cy="132.28575"
       r="29.036913"
       fx="61.518883"
       fy="132.28575"
       id="radialGradient1480-9-8"
       xlink:href="#linearGradient2795"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-16.490672,188.65894)" /><style
       type="text/css"
       id="style6-18-0">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2-7-2">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-1-1">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-0-0">
   
    .fil0 {fill:#000000;fill-rule:nonzero}
   
  </style><rect
       x="73.488663"
       y="174.46588"
       width="129.61227"
       height="25.640112"
       id="rect10485-9-5" /><rect
       x="89.490662"
       y="225.12338"
       width="622.98389"
       height="412.75418"
       id="rect747-4-1" /><rect
       x="509.93341"
       y="481.48856"
       width="208.25388"
       height="68.723083"
       id="rect741-9-1" /><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Drop Shadow"
       id="filter4720-0-0"
       x="-0.089591049"
       y="-0.22882433"
       width="1.1528318"
       height="1.3903474"><feFlood
         flood-opacity="0.498039"
         flood-color="rgb(0,0,0)"
         result="flood"
         id="feFlood4722-9-8" /><feComposite
         in="flood"
         in2="SourceGraphic"
         operator="out"
         result="composite1"
         id="feComposite4724-1-5" /><feGaussianBlur
         in="composite1"
         stdDeviation="2"
         result="blur"
         id="feGaussianBlur4726-7-0" /><feOffset
         dx="-2"
         dy="-2"
         result="offset"
         id="feOffset4728-7-6" /><feComposite
         in="offset"
         in2="SourceGraphic"
         operator="atop"
         result="composite2"
         id="feComposite4730-1-4" /></filter><radialGradient
       cx="61.518883"
       cy="132.28575"
       r="29.036913"
       fx="61.518883"
       fy="132.28575"
       id="radialGradient1480-7-6"
       xlink:href="#linearGradient2795"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.7490565e-8,-0.23994696,1.054668,3.7915457e-7,-16.490672,188.65894)" /><style
       type="text/css"
       id="style6-3-2">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2-9-5">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><filter
       style="color-interpolation-filters:sRGB"
       inkscape:label="Blur"
       id="filter27755"
       x="-0.036472555"
       y="-0.035948038"
       width="1.0729451"
       height="1.0718961"><feGaussianBlur
         stdDeviation="0.15 0.15"
         result="fbSourceGraphic"
         id="feGaussianBlur27753" /><feColorMatrix
         result="fbSourceGraphicAlpha"
         in="fbSourceGraphic"
         values="0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0 0 0 0 1 0"
         id="feColorMatrix27781" /><feGaussianBlur
         id="feGaussianBlur27783"
         stdDeviation="0.15 0.15"
         result="blur"
         in="fbSourceGraphic" /></filter><marker
       inkscape:isstock="true"
       style="overflow:visible"
       id="marker23158"
       refX="0"
       refY="0"
       orient="auto"
       inkscape:stockid="TriangleOutL"><path
         transform="scale(0.8)"
         style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:1pt;stroke-opacity:1"
         d="M 5.77,0 -2.88,5 V -5 Z"
         id="path23156" /></marker><marker
       inkscape:isstock="true"
       style="overflow:visible"
       id="marker23010"
       refX="0"
       refY="0"
       orient="auto"
       inkscape:stockid="TriangleInL"><path
         transform="scale(-0.8)"
         style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:1pt;stroke-opacity:1"
         d="M 5.77,0 -2.88,5 V -5 Z"
         id="path23008" /></marker><marker
       inkscape:stockid="TriangleOutL"
       orient="auto"
       refY="0"
       refX="0"
       id="marker17508"
       style="overflow:visible"
       inkscape:isstock="true"><path
         id="path17506"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#aed6dc;fill-opacity:1;fill-rule:evenodd;stroke:#aed6dc;stroke-width:1pt;stroke-opacity:1"
         transform="scale(0.8)" /></marker><marker
       inkscape:stockid="TriangleInL"
       orient="auto"
       refY="0"
       refX="0"
       id="marker17372"
       style="overflow:visible"
       inkscape:isstock="true"><path
         id="path17370"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#aed6dc;fill-opacity:1;fill-rule:evenodd;stroke:#aed6dc;stroke-width:1pt;stroke-opacity:1"
         transform="scale(-0.8)" /></marker><marker
       inkscape:stockid="TriangleOutL"
       orient="auto"
       refY="0"
       refX="0"
       id="marker16851"
       style="overflow:visible"
       inkscape:isstock="true"><path
         id="path16849"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#fffbff;fill-opacity:1;fill-rule:evenodd;stroke:#fffbff;stroke-width:1pt;stroke-opacity:1"
         transform="scale(0.8)" /></marker><marker
       inkscape:stockid="TriangleOutL"
       orient="auto"
       refY="0"
       refX="0"
       id="marker83833"
       style="overflow:visible"
       inkscape:isstock="true"><path
         id="path83831"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#aed6dc;fill-opacity:1;fill-rule:evenodd;stroke:#aed6dc;stroke-width:1pt;stroke-opacity:1"
         transform="scale(0.8)" /></marker><inkscape:path-effect
       effect="spiro"
       id="path-effect36682"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect36678"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect36662"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect36658"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect36633"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       effect="bspline"
       id="path-effect36629"
       is_visible="true"
       weight="33.333333"
       steps="2"
       helper_size="0"
       apply_no_weight="true"
       apply_with_weight="true"
       only_selected="false"
       lpeversion="0" /><marker
       inkscape:stockid="DiamondLend"
       orient="auto"
       refY="0"
       refX="0"
       id="marker36269"
       style="overflow:visible"
       inkscape:isstock="true"><path
         id="path8115"
         d="M 0,-7.0710768 -7.0710894,0 0,7.0710589 7.0710462,0 Z"
         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
         transform="matrix(0.8,0,0,0.8,-5.6,0)" /></marker><marker
       inkscape:isstock="true"
       style="overflow:visible"
       id="marker13509"
       refX="0"
       refY="0"
       orient="auto"
       inkscape:stockid="TriangleInL"><path
         transform="scale(-0.8)"
         style="fill:#b9b9b9;fill-opacity:1;fill-rule:evenodd;stroke:#b9b9b9;stroke-width:1pt;stroke-opacity:1"
         d="M 5.77,0 -2.88,5 V -5 Z"
         id="path13507" /></marker><marker
       inkscape:stockid="DiamondLend"
       orient="auto"
       refY="0"
       refX="0"
       id="DiamondLend"
       style="overflow:visible"
       inkscape:isstock="true"><path
         id="path10173"
         d="M 0,-7.0710768 -7.0710894,0 0,7.0710589 7.0710462,0 Z"
         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
         transform="matrix(0.8,0,0,0.8,-5.6,0)" /></marker><marker
       inkscape:stockid="Arrow1Lstart"
       orient="auto"
       refY="0"
       refX="0"
       id="Arrow1Lstart"
       style="overflow:visible"
       inkscape:isstock="true"><path
         id="path10076"
         d="M 0,0 5,-5 -12.5,0 5,5 Z"
         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
         transform="matrix(0.8,0,0,0.8,10,0)" /></marker><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath4273"><rect
         style="opacity:1;fill:#4a536b;fill-opacity:0.941176"
         id="rect4275"
         width="7.487751"
         height="7.4936004"
         x="69.016541"
         y="101.34444" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath4371"><rect
         style="opacity:1;fill:#4a536b;fill-opacity:0.941176"
         id="rect4373"
         width="11.050144"
         height="8.6990528"
         x="89.106499"
         y="138.52672" /></clipPath><clipPath
       id="clipPath1026"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path1024"
         d="M 0,0 H 360 V 360 H 0 Z" /></clipPath><clipPath
       id="clipPath1038"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path1036"
         d="m 33.48,325.29 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath1296"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path1294"
         d="m 33.48,313.74 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath1622"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path1620"
         d="m 33.48,302.19 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath1790"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path1788"
         d="m 33.48,290.63 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath1946"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path1944"
         d="m 33.48,279.08 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath2168"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path2166-0"
         d="m 33.48,267.52 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath2336"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path2334"
         d="m 33.48,255.97 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath2526"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path2524"
         d="m 33.48,244.42 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath2702"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path2700"
         d="m 33.48,232.86 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath2900"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path2898"
         d="m 33.48,221.31 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3074"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3072"
         d="M 33.48,209.75 H 315.74 V 221.3 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3184"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3182"
         d="m 33.48,198.2 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3292"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3290"
         d="M 33.48,186.65 H 315.74 V 198.2 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3402"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3400"
         d="m 33.48,175.09 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3532"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3530"
         d="m 33.48,163.54 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3630"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3628"
         d="m 33.48,151.99 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3854"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3852"
         d="m 33.48,140.43 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath3954"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path3952"
         d="m 33.48,128.88 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath4110"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4108"
         d="m 33.48,117.32 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath4152"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4150"
         d="m 33.48,105.77 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath4236"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4234"
         d="m 33.48,94.22 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath4282"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4280"
         d="M 33.48,82.66 H 315.74 V 94.21 H 33.48 Z" /></clipPath><clipPath
       id="clipPath4318"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4316"
         d="M 33.48,71.11 H 315.74 V 82.66 H 33.48 Z" /></clipPath><clipPath
       id="clipPath4398"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4396"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4406"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4404-7"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4418"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4416-9"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4426"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4424"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4436"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4434"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4444"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4442-3"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4456"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4454-8"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4464"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4462"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4474"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4472"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4482"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4480"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4494"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4492-0"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4502"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4500"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4512"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4510"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4520"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4518"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4532"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4530"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4540"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4538-2"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4550"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4548"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4558"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4556"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4570"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4568"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4578"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4576"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4588"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4586"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4596"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4594"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4608"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4606"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4616"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4614"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4626"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4624"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4634"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4632"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4646"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4644"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4654"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4652"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4664"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4662"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4672"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4670"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4684"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4682"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4692"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4690"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4702"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4700"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4710"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4708"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4722"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4720"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4730"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4728"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4740"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4738"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4748"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4746"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4760"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4758"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4768"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4766"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4778"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4776"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4786"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4784"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4798"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4796"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4806"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4804"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4816"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4814"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4824"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4822"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4836"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4834"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4844"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4842"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4854"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4852"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4862"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4860"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4874"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4872"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4882"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4880"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4892"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4890"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4900"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4898"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4912"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4910"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4920"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4918"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4930"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4928"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4938"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4936"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4950"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4948"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4958"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4956"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4968"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4966"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4976"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4974"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4988"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4986"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath4996"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path4994"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5006"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5004"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5014"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5012"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5026"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5024"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5034"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5032"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5044"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5042"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5052"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5050"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5064"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5062"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5072"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5070"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5082"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5080"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5090"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5088"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5102"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5100"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5110"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5108"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5120"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5118"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5128"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5126"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5140"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5138"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5148"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5146"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5158"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5156"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5166"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5164"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5178"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5176"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5186"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5184"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5196"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5194"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5204"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5202"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5216"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5214"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5224"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5222"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5234"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5232"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5242"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5240"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5254"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5252"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath5262"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path5260"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath12612"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path12610"
         d="M 0,0 H 360 V 360 H 0 Z" /></clipPath><clipPath
       id="clipPath12624"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path12622"
         d="m 33.48,325.29 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath12882"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path12880"
         d="m 33.48,313.74 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath13208"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path13206"
         d="m 33.48,302.19 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath13376"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path13374"
         d="m 33.48,290.63 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath13532"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path13530"
         d="m 33.48,279.08 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath13754"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path13752"
         d="m 33.48,267.52 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath13922"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path13920"
         d="m 33.48,255.97 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath14112"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path14110"
         d="m 33.48,244.42 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath14288"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path14286"
         d="m 33.48,232.86 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath14486"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path14484"
         d="m 33.48,221.31 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath14660"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path14658"
         d="M 33.48,209.75 H 315.74 V 221.3 H 33.48 Z" /></clipPath><clipPath
       id="clipPath14770"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path14768"
         d="m 33.48,198.2 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath14878"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path14876"
         d="M 33.48,186.65 H 315.74 V 198.2 H 33.48 Z" /></clipPath><clipPath
       id="clipPath14988"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path14986"
         d="m 33.48,175.09 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15118"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15116"
         d="m 33.48,163.54 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15216"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15214"
         d="m 33.48,151.99 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15440"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15438"
         d="m 33.48,140.43 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15540"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15538"
         d="m 33.48,128.88 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15696"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15694"
         d="m 33.48,117.32 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15738"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15736"
         d="m 33.48,105.77 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15822"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15820"
         d="m 33.48,94.22 h 282.26 v 11.55 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15868"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15866"
         d="M 33.48,82.66 H 315.74 V 94.21 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15904"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15902"
         d="M 33.48,71.11 H 315.74 V 82.66 H 33.48 Z" /></clipPath><clipPath
       id="clipPath15984"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15982"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath15992"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path15990"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16004"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16002"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16012"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16010"
         d="m 315.74,325.29 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16022"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16020"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16030"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16028"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16042"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16040"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16050"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16048"
         d="m 315.74,313.74 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16060"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16058"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16068"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16066"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16080"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16078"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16088"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16086"
         d="m 315.74,302.19 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16098"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16096"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16106"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16104"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16118"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16116"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16126"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16124"
         d="m 315.74,290.63 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16136"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16134"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16144"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16142"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16156"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16154"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16164"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16162"
         d="m 315.74,279.08 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16174"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16172"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16182"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16180"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16194"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16192"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16202"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16200"
         d="m 315.74,267.52 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16212"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16210"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16220"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16218"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16232"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16230"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16240"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16238"
         d="m 315.74,255.97 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16250"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16248"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16258"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16256"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16270"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16268"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16278"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16276"
         d="m 315.74,244.42 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16288"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16286"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16296"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16294"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16308"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16306"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16316"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16314"
         d="m 315.74,232.86 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16326"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16324"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16334"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16332"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16346"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16344"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16354"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16352"
         d="m 315.74,221.31 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16364"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16362"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16372"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16370"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16384"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16382"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16392"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16390"
         d="m 315.74,209.75 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16402"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16400"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16410"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16408"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16422"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16420"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16430"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16428"
         d="m 315.74,198.2 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16440"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16438"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16448"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16446"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16460"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16458"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16468"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16466"
         d="m 315.74,186.65 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16478"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16476"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16486"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16484"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16498"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16496"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16506"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16504"
         d="m 315.74,175.09 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16516"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16514"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16524"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16522"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16536"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16534"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16544"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16542"
         d="m 315.74,163.54 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16554"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16552"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16562"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16560"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16574"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16572"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16582"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16580"
         d="m 315.74,151.99 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16592"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16590"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16600"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16598"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16612"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16610"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16620"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16618"
         d="m 315.74,140.43 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16630"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16628"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16638"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16636"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16650"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16648"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16658"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16656"
         d="m 315.74,128.88 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16668"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16666"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16676"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16674"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16688"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16686"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16696"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16694"
         d="m 315.74,117.32 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16706"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16704"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16714"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16712"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16726"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16724"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16734"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16732"
         d="m 315.74,105.77 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16744"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16742"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16752"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16750"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16764"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16762"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16772"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16770"
         d="m 315.74,94.22 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16782"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16780"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16790"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16788"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16802"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16800"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16810"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16808"
         d="m 315.74,82.66 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16820"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16818"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16828"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16826"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16840"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16838"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       id="clipPath16848"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path16846"
         d="m 315.74,71.11 h 38.78 v 11.55 h -38.78 z" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27224"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27226"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27228"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27230"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27232"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27234"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27236"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27238"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27240"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27242"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27244"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27246"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27248"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27250"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27252"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27254"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27256"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27258"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27260"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27262"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27264"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27266"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27268"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27270"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27272"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27274"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27276"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27278"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27280"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27282"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27284"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27286"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27288"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27290"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27292"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27294"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27296"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27298"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27300"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27302"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27304"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27306"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27308"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27310"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27312"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27314"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27316"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27318"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27320"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27322"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27324"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27326"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27328"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27330"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27332"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27334"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27336"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27338"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27340"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27342"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27344"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27346"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27348"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27350"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27352"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27354"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27356"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27358"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27360"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27362"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27364"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27366"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27368"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27370"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27372"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27374"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27376"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27378"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27380"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27382"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27384"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27386"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27388"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27390"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27392"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27394"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27396"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27398"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27400"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27402"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27404"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27406"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27408"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27410"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27412"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27414"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27416"><rect
         y="-218.88358"
         x="12.924264"
         height="163.22566"
         width="201.9333"
         id="rect27418"
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27422"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27424"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27426"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27428"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27430"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27432"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27434"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27436"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27438"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27440"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27442"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27444"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27446"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27448"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27450"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27452"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27454"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27456"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27458"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27460"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27462"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27464"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27466"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27468"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27470"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27472"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27474"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27476"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27478"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27480"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27482"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27484"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27486"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27488"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27490"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27492"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27494"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27496"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27498"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27500"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27502"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27504"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27506"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27508"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27510"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27512"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27514"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27516"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27518"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27520"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27522"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27524"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27526"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27528"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27530"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27532"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27534"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27536"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27538"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27540"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27542"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27544"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27546"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27548"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27550"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27552"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27554"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27556"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27558"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27560"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27562"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27564"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27566"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27568"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27570"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27572"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27574"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27576"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27578"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27580"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27582"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27584"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27586"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27588"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27590"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27592"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27594"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27596"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27598"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27600"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27602"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27604"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27606"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27608"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27610"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27612"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath27614"><rect
         style="fill:#4a536b;fill-opacity:1;stroke:#d7b8b5;stroke-width:2.48992;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect27616"
         width="140.23146"
         height="163.22566"
         x="12.122927"
         y="-218.88358"
         transform="scale(1,-1)" /></clipPath><clipPath
       id="clipPath28557"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path28555"
         d="M 51.25,85.03 H 354.52 V 210.52 H 51.25 Z" /></clipPath><clipPath
       id="clipPath29577"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path29575"
         d="M 12.1,0 H 347.91 V 216 H 12.1 Z" /></clipPath><clipPath
       id="clipPath29589"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path29587"
         d="M 40.57,31.91 H 184.3 V 175.64 H 40.57 Z" /></clipPath><clipPath
       id="clipPath30001"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path29999"
         d="M 198.7,31.91 H 342.43 V 175.64 H 198.7 Z" /></clipPath><clipPath
       id="clipPath30413"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30411"
         d="M 40.57,175.64 H 184.3 v 17.21 H 40.57 Z" /></clipPath><clipPath
       id="clipPath30421"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30419"
         d="M 40.57,175.64 H 184.3 v 17.21 H 40.57 Z" /></clipPath><clipPath
       id="clipPath30433"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30431"
         d="M 40.57,175.64 H 184.3 v 17.21 H 40.57 Z" /></clipPath><clipPath
       id="clipPath30441"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30439"
         d="M 40.57,175.64 H 184.3 v 17.21 H 40.57 Z" /></clipPath><clipPath
       id="clipPath30451"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30449"
         d="m 198.7,175.64 h 143.73 v 17.21 H 198.7 Z" /></clipPath><clipPath
       id="clipPath30459"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30457"
         d="m 198.7,175.64 h 143.73 v 17.21 H 198.7 Z" /></clipPath><clipPath
       id="clipPath30471"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30469"
         d="m 198.7,175.64 h 143.73 v 17.21 H 198.7 Z" /></clipPath><clipPath
       id="clipPath30479"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path30477"
         d="m 198.7,175.64 h 143.73 v 17.21 H 198.7 Z" /></clipPath><clipPath
       id="clipPath31895"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path31893"
         d="M 0,0 H 288 V 360 H 0 Z" /></clipPath><clipPath
       id="clipPath31907"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path31905"
         d="M 42.55,116.69 H 186.86 V 354.52 H 42.55 Z" /></clipPath><clipPath
       id="clipPath32787"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32785"
         d="M 0,0 H 432 V 360 H 0 Z" /></clipPath><clipPath
       id="clipPath32799"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32797"
         d="M 42.55,238.35 H 297.4 V 354.52 H 42.55 Z" /></clipPath><clipPath
       id="clipPath32835"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32833"
         d="M 42.55,116.69 H 297.4 V 232.86 H 42.55 Z" /></clipPath><clipPath
       id="clipPath32871"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32869"
         d="m 297.4,238.35 h 33.46 V 354.52 H 297.4 Z" /></clipPath><clipPath
       id="clipPath32879"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32877"
         d="m 297.4,238.35 h 33.46 V 354.52 H 297.4 Z" /></clipPath><clipPath
       id="clipPath32891"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32889"
         d="m 297.4,238.35 h 33.46 V 354.52 H 297.4 Z" /></clipPath><clipPath
       id="clipPath32899"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32897"
         d="m 297.4,238.35 h 33.46 V 354.52 H 297.4 Z" /></clipPath><clipPath
       id="clipPath32909"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32907"
         d="m 297.4,116.69 h 33.46 V 232.86 H 297.4 Z" /></clipPath><clipPath
       id="clipPath32917"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32915"
         d="m 297.4,116.69 h 33.46 V 232.86 H 297.4 Z" /></clipPath><clipPath
       id="clipPath32929"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32927"
         d="m 297.4,116.69 h 33.46 V 232.86 H 297.4 Z" /></clipPath><clipPath
       id="clipPath32937"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path32935"
         d="m 297.4,116.69 h 33.46 V 232.86 H 297.4 Z" /></clipPath><clipPath
       id="clipPath33878"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path33876"
         d="m 28.5,0 h 231 v 288 h -231 z" /></clipPath><clipPath
       id="clipPath33890"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path33888"
         d="M 64.48,75.31 H 254.02 V 264.85 H 64.48 Z" /></clipPath><clipPath
       id="clipPath34418"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path34416"
         d="M 87.19,0 H 200.81 V 216 H 87.19 Z" /></clipPath><clipPath
       id="clipPath34430"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path34428"
         d="m 120.66,118.18 h 74.67 v 74.67 h -74.67 z" /></clipPath><clipPath
       id="clipPath34976"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path34974"
         d="M 28.47,71.11 H 282.52 V 264.85 H 28.47 Z" /></clipPath><clipPath
       id="clipPath35479"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path35477"
         d="M 42.6,0 H 245.39 V 216 H 42.6 Z" /></clipPath><clipPath
       id="clipPath35489"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path35487"
         d="M 121.58,92.19 H 239.91 V 210.52 H 121.58 Z" /></clipPath><marker
       inkscape:stockid="TriangleInL"
       orient="auto"
       refY="0"
       refX="0"
       id="TriangleInL-7"
       style="overflow:visible"
       inkscape:isstock="true"
       viewBox="0 0 8.519053 9.8486161"
       markerWidth="8.5190525"
       markerHeight="9.8486156"
       preserveAspectRatio="xMidYMid"><path
         inkscape:connector-curvature="0"
         id="path8151-9"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
         transform="scale(-0.8)" /></marker><style
       id="style16606"
       type="text/css">
   
    .fil0 {fill:black}
   
  </style><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath23682"><rect
         style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.801;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
         id="rect23684"
         width="27.516666"
         height="16.139584"
         x="162.71875"
         y="129.03749" /></clipPath><clipPath
       id="clipPath48651"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48649"
         d="M 0,0 H 432 V 360 H 0 Z" /></clipPath><clipPath
       id="clipPath48663"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48661"
         d="M 38.1,221.65 H 284.26 V 354.52 H 38.1 Z" /></clipPath><clipPath
       id="clipPath48697"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48695"
         d="M 38.1,83.31 H 284.26 V 216.18 H 38.1 Z" /></clipPath><clipPath
       id="clipPath48731"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48729"
         d="m 284.26,221.65 h 33.46 v 132.87 h -33.46 z" /></clipPath><clipPath
       id="clipPath48739"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48737"
         d="m 284.26,221.65 h 33.46 v 132.87 h -33.46 z" /></clipPath><clipPath
       id="clipPath48751"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48749"
         d="m 284.26,221.65 h 33.46 v 132.87 h -33.46 z" /></clipPath><clipPath
       id="clipPath48759"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48757"
         d="m 284.26,221.65 h 33.46 v 132.87 h -33.46 z" /></clipPath><clipPath
       id="clipPath48769"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48767"
         d="m 284.26,83.31 h 33.46 v 132.87 h -33.46 z" /></clipPath><clipPath
       id="clipPath48777"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48775"
         d="m 284.26,83.31 h 33.46 v 132.87 h -33.46 z" /></clipPath><clipPath
       id="clipPath48789"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48787"
         d="m 284.26,83.31 h 33.46 v 132.87 h -33.46 z" /></clipPath><clipPath
       id="clipPath48797"
       clipPathUnits="userSpaceOnUse"><path
         inkscape:connector-curvature="0"
         id="path48795"
         d="m 284.26,83.31 h 33.46 v 132.87 h -33.46 z" /></clipPath><style
       id="style18982"
       type="text/css">
   
    .fil0 {fill:black}
   
  </style><marker
       inkscape:stockid="TriangleInL"
       orient="auto"
       refY="0"
       refX="0"
       id="TriangleInL-7-7"
       style="overflow:visible"
       inkscape:isstock="true"
       viewBox="0 0 8.519053 9.8486161"
       markerWidth="8.5190525"
       markerHeight="9.8486156"
       preserveAspectRatio="xMidYMid"><path
         inkscape:connector-curvature="0"
         id="path8151-9-3"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
         transform="scale(-0.8)" /></marker><marker
       inkscape:stockid="TriangleInL"
       orient="auto"
       refY="0"
       refX="0"
       id="marker60059"
       style="overflow:visible"
       inkscape:isstock="true"
       viewBox="0 0 8.519053 9.8486161"
       markerWidth="8.5190525"
       markerHeight="9.8486156"
       preserveAspectRatio="xMidYMid"><path
         inkscape:connector-curvature="0"
         id="path60057"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
         transform="scale(-0.8)" /></marker><marker
       inkscape:stockid="TriangleInL"
       orient="auto"
       refY="0"
       refX="0"
       id="TriangleInL-7-4"
       style="overflow:visible"
       inkscape:isstock="true"
       viewBox="0 0 8.519053 9.8486161"
       markerWidth="8.5190525"
       markerHeight="9.8486156"
       preserveAspectRatio="xMidYMid"><path
         inkscape:connector-curvature="0"
         id="path8151-9-7"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
         transform="scale(-0.8)" /></marker><marker
       inkscape:stockid="TriangleInL"
       orient="auto"
       refY="0"
       refX="0"
       id="marker60059-9"
       style="overflow:visible"
       inkscape:isstock="true"
       viewBox="0 0 8.519053 9.8486161"
       markerWidth="8.5190525"
       markerHeight="9.8486156"
       preserveAspectRatio="xMidYMid"><path
         inkscape:connector-curvature="0"
         id="path60057-7"
         d="M 5.77,0 -2.88,5 V -5 Z"
         style="fill:#000000;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1pt;stroke-opacity:1"
         transform="scale(-0.8)" /></marker><inkscape:path-effect
       is_visible="true"
       id="path-effect2601"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="bspline"
       id="path-effect2610"
       is_visible="true"
       weight="33.333333"
       steps="2"
       helper_size="0"
       apply_no_weight="true"
       apply_with_weight="true"
       only_selected="false"
       lpeversion="0" /><inkscape:path-effect
       effect="bspline"
       id="path-effect2600"
       is_visible="true"
       weight="33.333333"
       steps="2"
       helper_size="0"
       apply_no_weight="true"
       apply_with_weight="true"
       only_selected="false"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect3503"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect6891"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect4560"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect2871"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect2819"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect2744"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect9899"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect8929"
       is_visible="true"
       lpeversion="0" /><linearGradient
       id="linearGradient5359"><stop
         stop-color="#eeeeec"
         id="stop5355"
         style="stop-color:#ffffff;stop-opacity:1" /><stop
         stop-color="#d3d7cf"
         offset="1"
         id="stop5357"
         style="stop-color:#d3d7cf;stop-opacity:0.03243243" /></linearGradient><inkscape:path-effect
       is_visible="true"
       id="path-effect6332"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect6320"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect6150"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect5727"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect5100"
       is_visible="true"
       lpeversion="0" /><inkscape:path-effect
       is_visible="true"
       id="path-effect4874"
       effect="spiro"
       lpeversion="0" /><inkscape:path-effect
       effect="spiro"
       id="path-effect4870"
       is_visible="true"
       lpeversion="0" /><linearGradient
       gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)"
       gradientUnits="userSpaceOnUse"
       xlink:href="#linearGradient4689"
       id="linearGradient1478-3"
       y2="114.39767"
       x2="135.66525"
       y1="20.603781"
       x1="26.648937" /><linearGradient
       gradientTransform="matrix(0.562541,0,0,0.567972,-9.399749,-5.305317)"
       gradientUnits="userSpaceOnUse"
       xlink:href="#linearGradient4671"
       id="linearGradient1475-2"
       y2="137.27299"
       x2="112.03144"
       y1="192.35176"
       x1="150.96111" /><radialGradient
       gradientTransform="matrix(2.382716e-8,-0.296405,1.43676,4.683673e-7,-128.544,150.5202)"
       gradientUnits="userSpaceOnUse"
       xlink:href="#linearGradient2795"
       id="radialGradient1480-58"
       fy="132.28575"
       fx="61.518883"
       r="29.036913"
       cy="132.28575"
       cx="61.518883" /><linearGradient
       gradientTransform="matrix(0.11494196,0,0,0.07697987,293.05555,752.18406)"
       spreadMethod="pad"
       gradientUnits="userSpaceOnUse"
       y2="593.78699"
       y1="3.6658268"
       x2="590.86261"
       x1="0.7414425"
       id="gradientFill-1-0"><stop
         id="stop7-7"
         stop-opacity="1"
         stop-color="rgb(203,206,208)"
         offset="0" /><stop
         id="stop9-3"
         stop-opacity="1"
         stop-color="rgb(132,131,139)"
         offset="1" /></linearGradient><linearGradient
       gradientTransform="matrix(0.09311939,0,0,0.09502015,293.05555,752.18406)"
       spreadMethod="pad"
       gradientUnits="userSpaceOnUse"
       y2="553.44208"
       y1="151.4006"
       x2="703.0675"
       x1="301.02603"
       id="gradientFill-2-8"><stop
         id="stop12-2"
         stop-opacity="1"
         stop-color="rgb(39,109,195)"
         offset="0" /><stop
         id="stop14-3"
         stop-opacity="1"
         stop-color="rgb(22,92,170)"
         offset="1" /></linearGradient><marker
       style="overflow:visible"
       id="TriangleOutM"
       refX="0"
       refY="0"
       orient="auto"
       inkscape:stockid="TriangleOutM"><path
         inkscape:connector-curvature="0"
         transform="scale(0.4)"
         style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt"
         d="M 5.77,0 -2.88,5 V -5 Z"
         id="path4156" /></marker><linearGradient
       id="linearGradient892-3-0"><stop
         id="stop893-2-3"
         offset="0.00000000"
         style="stop-color:#ffffff;stop-opacity:0.00000000;" /><stop
         id="stop894-2-1"
         offset="1"
         style="stop-color:#fff;stop-opacity:1;" /></linearGradient><linearGradient
       id="linearGradient4492"
       y2="89"
       gradientUnits="userSpaceOnUse"
       y1="20"
       x2="100"
       x1="35"><stop
         id="stop4488"
         stop-color="#00c0c0"
         stop-opacity=".75294"
         offset="0" /><stop
         id="stop4490"
         stop-color="#80f5f5"
         stop-opacity=".30196"
         offset="1" /></linearGradient><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="108.87218"
       y1="108.87218"
       xlink:href="#linearGradient15251"
       x2="292.65842"
       x1="285.47037"
       id="linearGradient15378"
       gradientTransform="matrix(0.89839544,0,0,1.4262987,-2583.0381,2966.5502)" /><linearGradient
       id="linearGradient15251"><stop
         stop-color="#11ffff"
         offset="0"
         id="stop15252" /><stop
         stop-color="#e1ebff"
         stop-opacity="0"
         offset="1"
         id="stop15253" /></linearGradient><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="186.81477"
       y1="186.81477"
       xlink:href="#linearGradient15251"
       x2="244.25108"
       x1="239.22408"
       id="linearGradient15379"
       gradientTransform="matrix(1.2750153,0,0,1.0049921,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="111.47942"
       y1="111.47942"
       xlink:href="#linearGradient15251"
       x2="311.97397"
       x1="305.36746"
       id="linearGradient15380"
       gradientTransform="matrix(1.148607,0,0,1.115595,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="270.58075"
       y1="270.58075"
       xlink:href="#linearGradient15251"
       x2="271.39532"
       x1="266.74103"
       id="linearGradient15383"
       gradientTransform="matrix(1.5031099,0,0,0.85248603,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="212.56871"
       y1="212.56871"
       xlink:href="#linearGradient15251"
       x2="175.87711"
       x1="169.62802"
       id="linearGradient15384"
       gradientTransform="matrix(1.149307,0,0,1.1149155,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="220.00958"
       y1="220.00958"
       xlink:href="#linearGradient15251"
       x2="215.18179"
       x1="208.20387"
       id="linearGradient15385"
       gradientTransform="matrix(0.9480899,0,0,1.3515387,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="264.51785"
       y1="264.51785"
       xlink:href="#linearGradient15251"
       x2="219.02859"
       x1="216.48724"
       id="linearGradient15386"
       gradientTransform="matrix(1.6080623,0,0,0.79684736,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="529.10437"
       y1="529.10437"
       xlink:href="#linearGradient15251"
       x2="186.3316"
       x1="184.40688"
       id="linearGradient15387"
       gradientTransform="matrix(1.847117,0,0,0.69371905,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="245.84831"
       y1="245.84831"
       xlink:href="#linearGradient15251"
       x2="315.26337"
       x1="307.05576"
       id="linearGradient15388"
       gradientTransform="matrix(0.92408916,0,0,1.3866413,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="168.36928"
       y1="168.36928"
       xlink:href="#linearGradient15251"
       x2="630.80371"
       x1="622.69678"
       id="linearGradient15389"
       gradientTransform="matrix(0.72507885,0,0,1.7672288,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="234.81921"
       y1="234.81921"
       xlink:href="#linearGradient15251"
       x2="495.96729"
       x1="491.17938"
       id="linearGradient15390"
       gradientTransform="matrix(0.93320216,0,0,1.3731004,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="225.18878"
       y1="225.18878"
       xlink:href="#linearGradient15251"
       x2="535.78436"
       x1="527.91168"
       id="linearGradient15391"
       gradientTransform="matrix(0.97200857,0,0,1.3182808,-2583.0381,2966.5502)" /><linearGradient
       gradientUnits="userSpaceOnUse"
       y2="238.44827"
       y1="238.44827"
       xlink:href="#linearGradient15251"
       x2="206.7001"
       x1="204.62263"
       id="linearGradient15392"
       gradientTransform="matrix(1.2300616,0,0,1.0417204,-2583.0381,2966.5502)" /><linearGradient
       id="E"><stop
         id="R"
         stop-color="#2e3436" /><stop
         id="S"
         offset="1"
         stop-color="#2e3436"
         stop-opacity="0" /></linearGradient><linearGradient
       id="D"><stop
         id="P"
         stop-color="#eeeeec" /><stop
         id="Q"
         offset="1"
         stop-color="#d3d7cf" /></linearGradient><filter
       x="-0.16"
       y="-0.15099999"
       width="1.321"
       height="1.302"
       id="K"
       style="color-interpolation-filters:sRGB"><feGaussianBlur
         stdDeviation="0.5327"
         id="T" /></filter><radialGradient
       cx="25.712"
       cy="48.735001"
       r="21.856001"
       id="F"
       xlink:href="#E"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(2.6607075,0,0,0.21711748,-259.90161,564.66242)" /><radialGradient
       cx="13.559"
       cy="12.06"
       r="16.219"
       id="G"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.5010936,6.2249964,-9.2108788,2.221111,-129.81603,374.91355)" /><linearGradient
       gradientTransform="matrix(2.4311654,0,0,2.4311654,-249.83623,466.9785)"
       y1="2.438"
       x2="0"
       y2="43.34"
       id="H"
       xlink:href="#linearGradient892-3-0"
       gradientUnits="userSpaceOnUse" /><radialGradient
       cx="34.130001"
       cy="8.6090002"
       r="3.03"
       id="I"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(4.8623306,5.0367582e-7,-5.2704226e-7,5.0879031,-332.79973,444.08618)" /><linearGradient
       gradientTransform="matrix(2.4311654,0,0,2.4311654,-249.83623,466.61717)"
       x1="34.130001"
       y1="8.75"
       x2="36.533001"
       y2="6.3629999"
       id="J"
       xlink:href="#linearGradient892-3-0"
       gradientUnits="userSpaceOnUse" /><filter
       x="-0.16"
       y="-0.15099999"
       width="1.321"
       height="1.302"
       id="K-0"
       style="color-interpolation-filters:sRGB"><feGaussianBlur
         stdDeviation="0.5327"
         id="T-0" /></filter><radialGradient
       cx="25.712"
       cy="48.735001"
       r="21.856001"
       id="F-5"
       xlink:href="#E"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(2.6607075,0,0,0.21711748,-281.84156,543.48915)" /><radialGradient
       cx="13.559"
       cy="12.06"
       r="16.219"
       id="G-3"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.5010936,6.2249964,-9.2108788,2.221111,-151.75598,353.74028)" /><linearGradient
       gradientTransform="matrix(2.4311654,0,0,2.4311654,-271.77618,445.80523)"
       y1="2.438"
       x2="0"
       y2="43.34"
       id="H-7"
       xlink:href="#linearGradient892-3-0"
       gradientUnits="userSpaceOnUse" /><radialGradient
       cx="34.130001"
       cy="8.6090002"
       r="3.03"
       id="I-9"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(4.8623306,5.0367582e-7,-5.2704226e-7,5.0879031,-354.73969,422.91291)" /><filter
       x="-0.16"
       y="-0.15099999"
       width="1.321"
       height="1.302"
       id="K-0-3"
       style="color-interpolation-filters:sRGB"><feGaussianBlur
         stdDeviation="0.5327"
         id="T-0-5" /></filter><radialGradient
       cx="25.712"
       cy="48.735001"
       r="21.856001"
       id="F-5-6"
       xlink:href="#E"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(6.2327409,0,0,0.50860043,4277.7065,-1100.5728)" /><radialGradient
       cx="13.559"
       cy="12.06"
       r="16.219"
       id="G-3-2"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(3.516331,14.582132,-21.5766,5.2029803,4582.4336,-1545.062)" /><linearGradient
       gradientTransform="matrix(5.6950353,0,0,5.6950353,4301.2847,-1329.3986)"
       y1="2.438"
       x2="0"
       y2="43.34"
       id="H-7-9"
       xlink:href="#linearGradient892-3-0"
       gradientUnits="userSpaceOnUse" /><filter
       x="-0.16"
       y="-0.15099999"
       width="1.321"
       height="1.302"
       id="filter4619"
       style="color-interpolation-filters:sRGB"><feGaussianBlur
         stdDeviation="0.5327"
         id="feGaussianBlur4621" /></filter><radialGradient
       cx="34.130001"
       cy="8.6090002"
       r="3.03"
       id="I-9-1"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(11.390071,1.1798669e-6,-1.2346031e-6,11.918477,4106.9418,-1383.0242)" /><filter
       x="-0.16"
       y="-0.15099999"
       width="1.321"
       height="1.302"
       id="K-2"
       style="color-interpolation-filters:sRGB"><feGaussianBlur
         stdDeviation="0.5327"
         id="T-7" /></filter><radialGradient
       cx="13.559"
       cy="12.06"
       r="16.219"
       id="G-0"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(1.8558945,7.6963458,-11.387976,2.746096,496.7991,197.14557)" /><linearGradient
       gradientTransform="matrix(3.0057991,0,0,3.0057991,348.41075,310.9711)"
       y1="2.438"
       x2="0"
       y2="43.34"
       id="H-9"
       xlink:href="#linearGradient892-3-0"
       gradientUnits="userSpaceOnUse" /><filter
       x="-0.16"
       y="-0.15099999"
       width="1.321"
       height="1.302"
       id="filter4688"
       style="color-interpolation-filters:sRGB"><feGaussianBlur
         stdDeviation="0.5327"
         id="feGaussianBlur4690" /></filter><radialGradient
       cx="34.130001"
       cy="8.6090002"
       r="3.03"
       id="I-3"
       xlink:href="#D"
       gradientUnits="userSpaceOnUse"
       gradientTransform="matrix(6.0115982,6.2272537e-7,-6.5161475e-7,6.2904874,245.83788,282.66792)" /><linearGradient
       gradientTransform="matrix(3.0057991,0,0,3.0057991,348.41075,310.52437)"
       x1="34.130001"
       y1="8.75"
       x2="36.533001"
       y2="6.3629999"
       id="J-6"
       xlink:href="#linearGradient892-3-0"
       gradientUnits="userSpaceOnUse" /><style
       type="text/css"
       id="style6-07">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-2-77">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       id="style6-7"
       type="text/css">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style6-6">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       id="style6-3-6"
       type="text/css">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       id="style5236">.cls-1{fill:none;stroke:#000000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:4px;}</style><style
       id="style6-35-9"
       type="text/css">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath1-0"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path6579" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5090-1"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5092-5" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5094"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5096" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5098"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5100-1" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5102-9"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5104" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5106"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5108-0" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath1-0-1"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path6579-9" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5090-7"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5092-1" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5094-3"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5096-1" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5098-2"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5100-0" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5102-6"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5104-8" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath5106-7"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 542.63356 V 191.75772 H 0 Z"
         id="path5108-4" /></clipPath><style
       type="text/css"
       id="style6-23">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       id="style6-3-8"
       type="text/css">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       id="style5236-5">.cls-1{fill:none;stroke:#000000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:4px;}</style><style
       id="style6-35-0"
       type="text/css">
   
    .fil1 {fill:none}
    .fil0 {fill:black}
   
  </style><style
       type="text/css"
       id="style4320">
   
    .fil0 {fill:#000000;fill-rule:nonzero}
   
  </style><style
       type="text/css"
       id="style6-1-8">
   
    .fil0 {fill:#000000;fill-rule:nonzero}
   
  </style><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath2-5"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 380.45548 H 1.0199879 Z"
         id="path17263" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath3-4"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 42.839491 H 1.0199879 Z"
         id="path17266" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22348"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 42.839491 H 1.0199879 Z"
         id="path22346" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath4-09"><path
         inkscape:connector-curvature="0"
         d="m 540.59358,1.0199879 h 109.1387 V 42.839491 h -109.1387 z"
         id="path17269" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath5-0"><path
         inkscape:connector-curvature="0"
         d="M 650.75227,1.0199879 H 760.91096 V 42.839491 H 650.75227 Z"
         id="path17272" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath6-6"><path
         inkscape:connector-curvature="0"
         d="M 761.93095,1.0199879 H 868.00969 V 42.839491 H 761.93095 Z"
         id="path17275" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22358"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 42.839491 H 1.0199879 Z"
         id="path22356" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath7-89"><path
         inkscape:connector-curvature="0"
         d="M 961.84858,1.0199879 H 1072.0073 V 42.839491 H 961.84858 Z"
         id="path17278" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22364"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 42.839491 H 1.0199879 Z"
         id="path22362" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22368"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 380.45548 H 1.0199879 Z"
         id="path22366" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath8-8"><path
         inkscape:connector-curvature="0"
         d="M 68.339188,43.859479 H 165.23804 V 155.03816 H 68.339188 Z"
         id="path17281" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath9-5"><path
         inkscape:connector-curvature="0"
         d="M 166.25803,43.859479 H 292.73652 V 155.03816 H 166.25803 Z"
         id="path17284" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath10-1"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,43.859479 H 1812.5185 V 155.03816 H 1.0199879 Z"
         id="path17287" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath11-31"><path
         inkscape:connector-curvature="0"
         d="m 540.59358,43.859479 h 109.1387 V 155.03816 h -109.1387 z"
         id="path17290" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath12-32"><path
         inkscape:connector-curvature="0"
         d="M 650.75227,43.859479 H 760.91096 V 155.03816 H 650.75227 Z"
         id="path17293" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22382"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,43.859479 H 1812.5185 V 155.03816 H 1.0199879 Z"
         id="path22380" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath13-07"><path
         inkscape:connector-curvature="0"
         d="m 869.02968,43.859479 h 91.79891 V 155.03816 h -91.79891 z"
         id="path17296" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath14-4"><path
         inkscape:connector-curvature="0"
         d="M 961.84858,43.859479 H 1072.0073 V 155.03816 H 961.84858 Z"
         id="path17299" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath15-8"><path
         inkscape:connector-curvature="0"
         d="m 1073.0273,43.859479 h 89.7589 V 155.03816 h -89.7589 z"
         id="path17302" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath16-2"><path
         inkscape:connector-curvature="0"
         d="m 1163.8062,43.859479 h 122.3985 V 155.03816 h -122.3985 z"
         id="path17305" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22394"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,43.859479 H 1812.5185 V 155.03816 H 1.0199879 Z"
         id="path22392" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath17-06"><path
         inkscape:connector-curvature="0"
         d="m 1395.3434,43.859479 h 106.0788 V 155.03816 h -106.0788 z"
         id="path17308" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath18-12"><path
         inkscape:connector-curvature="0"
         d="m 1502.4422,43.859479 h 127.4984 V 155.03816 h -127.4984 z"
         id="path17311" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22402"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,43.859479 H 1812.5185 V 155.03816 H 1.0199879 Z"
         id="path22400" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22406"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 380.45548 H 1.0199879 Z"
         id="path22404" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath19-3"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,156.05815 H 1812.5185 V 267.23683 H 1.0199879 Z"
         id="path17314" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath20-0"><path
         inkscape:connector-curvature="0"
         d="M 166.25803,156.05815 H 292.73652 V 267.23683 H 166.25803 Z"
         id="path17317" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22414"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,156.05815 H 1812.5185 V 267.23683 H 1.0199879 Z"
         id="path22412" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath21-1"><path
         inkscape:connector-curvature="0"
         d="m 540.59358,156.05815 h 109.1387 v 111.17868 h -109.1387 z"
         id="path17320" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22420"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,156.05815 H 1812.5185 V 267.23683 H 1.0199879 Z"
         id="path22418" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath22-7"><path
         inkscape:connector-curvature="0"
         d="M 761.93095,156.05815 H 868.00969 V 267.23683 H 761.93095 Z"
         id="path17323" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22426"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,156.05815 H 1812.5185 V 267.23683 H 1.0199879 Z"
         id="path22424" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath23-0"><path
         inkscape:connector-curvature="0"
         d="M 961.84858,156.05815 H 1072.0073 V 267.23683 H 961.84858 Z"
         id="path17326" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22432"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,156.05815 H 1812.5185 V 267.23683 H 1.0199879 Z"
         id="path22430" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath24-6"><path
         inkscape:connector-curvature="0"
         d="m 1163.8062,156.05815 h 122.3985 v 111.17868 h -122.3985 z"
         id="path17329" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath25-9"><path
         inkscape:connector-curvature="0"
         d="m 1287.2247,156.05815 h 107.0987 v 111.17868 h -107.0987 z"
         id="path17332" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22440"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,156.05815 H 1812.5185 V 267.23683 H 1.0199879 Z"
         id="path22438" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath26-1"><path
         inkscape:connector-curvature="0"
         d="m 1502.4422,156.05815 h 127.4984 v 111.17868 h -127.4984 z"
         id="path17335" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath27-2"><path
         inkscape:connector-curvature="0"
         d="m 1630.9606,156.05815 h 180.5379 v 111.17868 h -180.5379 z"
         id="path17338" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22448"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 380.45548 H 1.0199879 Z"
         id="path22446" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath28-4"><path
         inkscape:connector-curvature="0"
         d="M 68.339188,268.25681 H 165.23804 V 379.43549 H 68.339188 Z"
         id="path17341" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath29-2"><path
         inkscape:connector-curvature="0"
         d="M 166.25803,268.25681 H 292.73652 V 379.43549 H 166.25803 Z"
         id="path17344" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath30-6"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,268.25681 H 1812.5185 V 379.43549 H 1.0199879 Z"
         id="path17347" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath31-7"><path
         inkscape:connector-curvature="0"
         d="m 461.03452,268.25681 h 78.53907 v 111.17868 h -78.53907 z"
         id="path17350" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath32-3"><path
         inkscape:connector-curvature="0"
         d="m 540.59358,268.25681 h 109.1387 v 111.17868 h -109.1387 z"
         id="path17353" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath33-75"><path
         inkscape:connector-curvature="0"
         d="M 650.75227,268.25681 H 760.91096 V 379.43549 H 650.75227 Z"
         id="path17356" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22464"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,268.25681 H 1812.5185 V 379.43549 H 1.0199879 Z"
         id="path22462" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath34-8"><path
         inkscape:connector-curvature="0"
         d="M 961.84858,268.25681 H 1072.0073 V 379.43549 H 961.84858 Z"
         id="path17359" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22470"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,268.25681 H 1812.5185 V 379.43549 H 1.0199879 Z"
         id="path22468" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath35-8"><path
         inkscape:connector-curvature="0"
         d="m 1630.9606,268.25681 h 180.5379 v 111.17868 h -180.5379 z"
         id="path17362" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath1-06"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path17260" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22478"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22476" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22482"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22480" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22486"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22484" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22490"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22488" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22494"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22492" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22498"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22496" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22502"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22500" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22506"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22504" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22510"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22508" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22514"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22512" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22518"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22516" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22522"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22520" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22526"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22524" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22530"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22528" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22534"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22532" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22538"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22536" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22542"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22540" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22546"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22544" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22550"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22548" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22554"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22552" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22558"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22556" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipPath22562"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path22560" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath3-4-8"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 42.839491 H 1.0199879 Z"
         id="path17266-0" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath3-4-82"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 42.839491 H 1.0199879 Z"
         id="path17266-8" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath3-4-84"><path
         inkscape:connector-curvature="0"
         d="M 1.0199879,1.0199879 H 1812.5185 V 42.839491 H 1.0199879 Z"
         id="path17266-2" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath4-09-6"><path
         inkscape:connector-curvature="0"
         d="m 540.59358,1.0199879 h 109.1387 V 42.839491 h -109.1387 z"
         id="path17269-2" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath1-06-3"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path17260-3" /></clipPath><clipPath
       clipPathUnits="userSpaceOnUse"
       id="clipEmfPath1-06-36"><path
         inkscape:connector-curvature="0"
         d="M 0,0 H 1812.5185 V 380.45548 H 0 Z"
         id="path17260-7" /></clipPath><style
       type="text/css"
       id="style4320-7">
   
    .fil0 {fill:#000000;fill-rule:nonzero}
   
  </style><style
       type="text/css"
       id="style6-5">
   
    .fil0 {fill:#000000;fill-rule:nonzero}
   
  </style></defs><g
     inkscape:label="Layer 1"
     inkscape:groupmode="layer"
     id="layer1"
     transform="translate(-10.261877,-55.023323)"><text
       xml:space="preserve"
       transform="matrix(0.26458333,0,0,0.26458333,-13.026547,-15.577666)"
       id="text10483"
       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:15.689px;font-family:'Latin Modern Sans Quotation';-inkscape-font-specification:'Latin Modern Sans Quotation, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect10485);display:inline;fill:none;fill-opacity:1;stroke:#f1f32f;stroke-width:4.208;stroke-linecap:round;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;paint-order:normal" /><g
       style="clip-rule:evenodd;fill-rule:evenodd;image-rendering:optimizeQuality;shape-rendering:geometricPrecision;text-rendering:geometricPrecision"
       id="g994"
       transform="matrix(-0.07200507,0.25459695,-0.25459695,-0.07200507,65.661784,86.197514)" /><g
       id="g405"
       transform="matrix(0.39425757,0,0,0.39425757,34.29456,-6.7961931)" /><text
       xml:space="preserve"
       transform="matrix(0.10431398,0,0,0.10431398,24.616682,17.594605)"
       id="text10483-2"
       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:15.689px;font-family:'Latin Modern Sans Quotation';-inkscape-font-specification:'Latin Modern Sans Quotation, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect10485-3);display:inline;fill:none;fill-opacity:1;stroke:#f1f32f;stroke-width:4.208;stroke-linecap:round;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;paint-order:normal" /><g
       style="clip-rule:evenodd;fill-rule:evenodd;image-rendering:optimizeQuality;shape-rendering:geometricPrecision;text-rendering:geometricPrecision"
       id="g994-0"
       transform="matrix(-0.02838854,0.10037677,-0.10037677,-0.02838854,55.640152,57.720242)" /><text
       xml:space="preserve"
       transform="matrix(0.10431398,0,0,0.10431398,34.29456,-6.7961931)"
       id="text4429"
       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12px;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect4431);display:inline;fill:#000000;fill-opacity:0;stroke:#000000;stroke-width:2.26772;stroke-linecap:round;stroke-linejoin:bevel" /><text
       xml:space="preserve"
       transform="matrix(0.10431398,0,0,0.10431398,34.29456,-6.7961931)"
       id="text3910"
       style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:12px;font-family:Arial;-inkscape-font-specification:'Arial, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect3912);display:inline;fill:#000000;fill-opacity:0;stroke:#000000;stroke-width:2.26772;stroke-linecap:round;stroke-linejoin:bevel" /><text
       xml:space="preserve"
       transform="matrix(0.10431398,0,0,0.10431398,23.020692,-0.78875449)"
       id="text10483-8"
       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:15.689px;font-family:'Latin Modern Sans Quotation';-inkscape-font-specification:'Latin Modern Sans Quotation, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect10485-9);display:inline;fill:none;fill-opacity:1;stroke:#f1f32f;stroke-width:4.208;stroke-linecap:round;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;paint-order:normal" /><g
       style="clip-rule:evenodd;fill-rule:evenodd;image-rendering:optimizeQuality;shape-rendering:geometricPrecision;text-rendering:geometricPrecision"
       id="g994-1"
       transform="matrix(-0.02838854,0.10037677,-0.10037677,-0.02838854,54.044162,39.336883)" /><g
       style="clip-rule:evenodd;fill:#008000;fill-rule:evenodd;image-rendering:optimizeQuality;shape-rendering:geometricPrecision;text-rendering:geometricPrecision"
       id="g411-2"
       transform="matrix(-0.07929881,0.06777245,-0.06777245,-0.07929881,141.21313,13.732638)" /><text
       xml:space="preserve"
       style="font-size:5.29167px;font-family:Arial;-inkscape-font-specification:'Arial, Normal';text-align:start;text-anchor:start;fill:#099800;stroke:#0588ff;stroke-width:0"
       x="106.87308"
       y="17.84951"
       id="text20796"><tspan
         sodipodi:role="line"
         id="tspan20794"
         style="stroke-width:0"
         x="106.87308"
         y="17.84951" /></text><text
       xml:space="preserve"
       style="font-size:2.01705px;font-family:Arial;-inkscape-font-specification:'Arial, Normal';fill:#000000;stroke:#008000;stroke-width:0.131754;stroke-linecap:round;stroke-linejoin:bevel"
       x="7.5012379"
       y="52.818657"
       id="text1147"><tspan
         sodipodi:role="line"
         id="tspan1145"
         style="stroke-width:0.131754"
         x="7.5012379"
         y="52.818657" /></text><g
       id="g2918-6-7-6"
       transform="matrix(0.41759198,0,0,0.27111337,-39.211617,93.864568)" /><text
       xml:space="preserve"
       transform="matrix(0.24131531,0,0,0.24131531,-11.327163,-11.644414)"
       id="text10483-5"
       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:15.689px;font-family:'Latin Modern Sans Quotation';-inkscape-font-specification:'Latin Modern Sans Quotation, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect10485-2);display:inline;fill:none;fill-opacity:1;stroke:#f1f32f;stroke-width:4.208;stroke-linecap:round;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;paint-order:normal" /><g
       style="clip-rule:evenodd;fill-rule:evenodd;image-rendering:optimizeQuality;shape-rendering:geometricPrecision;text-rendering:geometricPrecision"
       id="g994-3"
       transform="matrix(-0.06567279,0.23220715,-0.23220715,-0.06567279,60.441149,81.180441)" /><text
       xml:space="preserve"
       transform="matrix(0.26458333,0,0,0.26458333,-14.295602,-17.805674)"
       id="text10483-6"
       style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:15.689px;font-family:'Latin Modern Sans Quotation';-inkscape-font-specification:'Latin Modern Sans Quotation, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;white-space:pre;shape-inside:url(#rect10485-38);display:inline;fill:none;fill-opacity:1;stroke:#f1f32f;stroke-width:4.208;stroke-linecap:round;stroke-linejoin:bevel;stroke-miterlimit:4;stroke-dasharray:none;paint-order:normal" /><g
       style="clip-rule:evenodd;fill-rule:evenodd;image-rendering:optimizeQuality;shape-rendering:geometricPrecision;text-rendering:geometricPrecision"
       id="g994-2"
       transform="matrix(-0.07200507,0.25459695,-0.25459695,-0.07200507,64.392729,83.969506)" /><g
       transform="matrix(0.35277777,0,0,-0.35277777,86.27427,-159.73078)"
       inkscape:label="chromDistribution_2"
       id="g894"><g
         id="g896" /><g
         id="g898" /><g
         id="g900" /><g
         id="g902" /><g
         id="g904" /><g
         id="g906" /><g
         id="g908" /><g
         id="g910" /><g
         id="g912" /><g
         id="g914" /><g
         id="g916" /><g
         id="g918" /><g
         id="g920" /><g
         id="g922" /><g
         id="g924" /><g
         id="g926" /><g
         id="g928" /><g
         id="g930" /><g
         id="g932" /><g
         id="g934" /><g
         id="g936" /><g
         id="g938" /><g
         id="g940" /><g
         id="g942" /><g
         id="g944" /><g
         id="g946" /><g
         id="g948" /><g
         id="g950" /><g
         id="g952" /><g
         id="g954" /><g
         id="g956" /><g
         id="g958" /><g
         id="g960" /><g
         id="g962" /><g
         id="g964" /><g
         id="g966" /><g
         id="g968" /><g
         id="g970" /><g
         id="g972" /><g
         id="g974" /><g
         id="g976" /><g
         id="g978" /><g
         id="g980" /><g
         id="g982" /><g
         id="g984" /><g
         id="g986" /><g
         id="g988" /><g
         id="g990" /><g
         id="g992" /><g
         id="g994-21" /><g
         id="g996" /><g
         id="g998" /><g
         id="g1000" /><g
         id="g1002" /><g
         id="g1004" /><g
         id="g1006" /><g
         id="g1008" /><g
         id="g1010" /><g
         id="g1012" /><g
         id="g1014" /><g
         id="g1016" /><g
         id="g1018" /><g
         id="g1030" /><g
         id="g1288" /><g
         id="g1614" /><g
         id="g1782" /><g
         id="g1938" /><g
         id="g2160-7" /><g
         id="g2328" /><g
         id="g2518" /><g
         id="g2694" /><g
         id="g2892" /><g
         id="g3066" /><g
         id="g3176" /><g
         id="g3284" /><g
         id="g3394" /><g
         id="g3524" /><g
         id="g3622" /><g
         id="g3846" /><g
         id="g3946" /><g
         id="g4102" /><g
         id="g4144" /><g
         id="g4228" /><g
         id="g4274" /><g
         id="g4310" /><g
         id="g4390" /><g
         id="g4392"><g
           clip-path="url(#clipPath4398)"
           id="g4394" /></g><g
         id="g4412"><g
           clip-path="url(#clipPath4418)"
           id="g4414" /></g><g
         id="g4420"><g
           clip-path="url(#clipPath4426)"
           id="g4422" /></g><g
         id="g4428" /><g
         id="g4430"><g
           clip-path="url(#clipPath4436)"
           id="g4432" /></g><g
         id="g4450"><g
           clip-path="url(#clipPath4456)"
           id="g4452" /></g><g
         id="g4458"><g
           clip-path="url(#clipPath4464)"
           id="g4460" /></g><g
         id="g4466" /><g
         id="g4468"><g
           clip-path="url(#clipPath4474)"
           id="g4470" /></g><g
         id="g4488"><g
           clip-path="url(#clipPath4494)"
           id="g4490" /></g><g
         id="g4496"><g
           clip-path="url(#clipPath4502)"
           id="g4498" /></g><g
         id="g4504" /><g
         id="g4506"><g
           clip-path="url(#clipPath4512)"
           id="g4508" /></g><g
         id="g4526"><g
           clip-path="url(#clipPath4532)"
           id="g4528" /></g><g
         id="g4534"><g
           clip-path="url(#clipPath4540)"
           id="g4536" /></g><g
         id="g4542" /><g
         id="g4544"><g
           clip-path="url(#clipPath4550)"
           id="g4546" /></g><g
         id="g4564"><g
           clip-path="url(#clipPath4570)"
           id="g4566" /></g><g
         id="g4572"><g
           clip-path="url(#clipPath4578)"
           id="g4574" /></g><g
         id="g4580" /><g
         id="g4582"><g
           clip-path="url(#clipPath4588)"
           id="g4584" /></g><g
         id="g4602"><g
           clip-path="url(#clipPath4608)"
           id="g4604" /></g><g
         id="g4610"><g
           clip-path="url(#clipPath4616)"
           id="g4612" /></g><g
         id="g4618" /><g
         id="g4620"><g
           clip-path="url(#clipPath4626)"
           id="g4622" /></g><g
         id="g4640"><g
           clip-path="url(#clipPath4646)"
           id="g4642" /></g><g
         id="g4648"><g
           clip-path="url(#clipPath4654)"
           id="g4650" /></g><g
         id="g4656" /><g
         id="g4658"><g
           clip-path="url(#clipPath4664)"
           id="g4660" /></g><g
         id="g4678"><g
           clip-path="url(#clipPath4684)"
           id="g4680" /></g><g
         id="g4686"><g
           clip-path="url(#clipPath4692)"
           id="g4688" /></g><g
         id="g4694" /><g
         id="g4696"><g
           clip-path="url(#clipPath4702)"
           id="g4698" /></g><g
         id="g4716"><g
           clip-path="url(#clipPath4722)"
           id="g4718" /></g><g
         id="g4724"><g
           clip-path="url(#clipPath4730)"
           id="g4726" /></g><g
         id="g4732" /><g
         id="g4734"><g
           clip-path="url(#clipPath4740)"
           id="g4736" /></g><g
         id="g4754"><g
           clip-path="url(#clipPath4760)"
           id="g4756-5" /></g><g
         id="g4762"><g
           clip-path="url(#clipPath4768)"
           id="g4764" /></g><g
         id="g4770" /><g
         id="g4772"><g
           clip-path="url(#clipPath4778)"
           id="g4774" /></g><g
         id="g4792"><g
           clip-path="url(#clipPath4798)"
           id="g4794" /></g><g
         id="g4800"><g
           clip-path="url(#clipPath4806)"
           id="g4802" /></g><g
         id="g4808" /><g
         id="g4810"><g
           clip-path="url(#clipPath4816)"
           id="g4812" /></g><g
         id="g4830"><g
           clip-path="url(#clipPath4836)"
           id="g4832" /></g><g
         id="g4838"><g
           clip-path="url(#clipPath4844)"
           id="g4840" /></g><g
         id="g4846" /><g
         id="g4848"><g
           clip-path="url(#clipPath4854)"
           id="g4850" /></g><g
         id="g4868"><g
           clip-path="url(#clipPath4874)"
           id="g4870" /></g><g
         id="g4876"><g
           clip-path="url(#clipPath4882)"
           id="g4878" /></g><g
         id="g4884" /><g
         id="g4886"><g
           clip-path="url(#clipPath4892)"
           id="g4888" /></g><g
         id="g4906"><g
           clip-path="url(#clipPath4912)"
           id="g4908" /></g><g
         id="g4914"><g
           clip-path="url(#clipPath4920)"
           id="g4916" /></g><g
         id="g4922" /><g
         id="g4924"><g
           clip-path="url(#clipPath4930)"
           id="g4926" /></g><g
         id="g4944"><g
           clip-path="url(#clipPath4950)"
           id="g4946" /></g><g
         id="g4952"><g
           clip-path="url(#clipPath4958)"
           id="g4954" /></g><g
         id="g4960" /><g
         id="g4962"><g
           clip-path="url(#clipPath4968)"
           id="g4964" /></g><g
         id="g4982"><g
           clip-path="url(#clipPath4988)"
           id="g4984" /></g><g
         id="g4990"><g
           clip-path="url(#clipPath4996)"
           id="g4992" /></g><g
         id="g4998" /><g
         id="g5000"><g
           clip-path="url(#clipPath5006)"
           id="g5002" /></g><g
         id="g5020"><g
           clip-path="url(#clipPath5026)"
           id="g5022" /></g><g
         id="g5028"><g
           clip-path="url(#clipPath5034)"
           id="g5030" /></g><g
         id="g5036" /><g
         id="g5038"><g
           clip-path="url(#clipPath5044)"
           id="g5040" /></g><g
         id="g5058"><g
           clip-path="url(#clipPath5064)"
           id="g5060" /></g><g
         id="g5066"><g
           clip-path="url(#clipPath5072)"
           id="g5068" /></g><g
         id="g5074" /><g
         id="g5076"><g
           clip-path="url(#clipPath5082)"
           id="g5078" /></g><g
         id="g5096"><g
           clip-path="url(#clipPath5102)"
           id="g5098" /></g><g
         id="g5104"><g
           clip-path="url(#clipPath5110)"
           id="g5106" /></g><g
         id="g5112" /><g
         id="g5114"><g
           clip-path="url(#clipPath5120)"
           id="g5116" /></g><g
         id="g5134"><g
           clip-path="url(#clipPath5140)"
           id="g5136" /></g><g
         id="g5142"><g
           clip-path="url(#clipPath5148)"
           id="g5144" /></g><g
         id="g5150" /><g
         id="g5152"><g
           clip-path="url(#clipPath5158)"
           id="g5154" /></g><g
         id="g5172"><g
           clip-path="url(#clipPath5178)"
           id="g5174" /></g><g
         id="g5180"><g
           clip-path="url(#clipPath5186)"
           id="g5182" /></g><g
         id="g5188" /><g
         id="g5190"><g
           clip-path="url(#clipPath5196)"
           id="g5192" /></g><g
         id="g5210"><g
           clip-path="url(#clipPath5216)"
           id="g5212" /></g><g
         id="g5218"><g
           clip-path="url(#clipPath5224)"
           id="g5220" /></g><g
         id="g5226" /><g
         id="g5228"><g
           clip-path="url(#clipPath5234)"
           id="g5230" /></g><g
         id="g5248"><g
           clip-path="url(#clipPath5254)"
           id="g5250" /></g><g
         id="g5256"><g
           clip-path="url(#clipPath5262)"
           id="g5258" /></g><g
         id="g5264" /><g
         id="g5266" /><g
         id="g5268" /><g
         id="g5270" /><g
         id="g5276" /><g
         id="g5278" /><g
         id="g5284" /><g
         id="g5292" /><g
         id="g5294" /><g
         id="g5296" /><g
         id="g5298" /><g
         id="g5304" /><g
         id="g5306" /><g
         id="g5314" /><g
         id="g5320" /><g
         id="g5322" /><g
         id="g5324" /><g
         id="g5326" /><g
         id="g5332" /><g
         id="g5334" /><g
         id="g5342" /><g
         id="g5348" /><g
         id="g5350" /><g
         id="g5352" /><g
         id="g5354" /><g
         id="g5360" /><g
         id="g5362" /><g
         id="g5370" /><g
         id="g5376" /><g
         id="g5378" /><g
         id="g5380" /><g
         id="g5382" /><g
         id="g5388" /><g
         id="g5390" /><g
         id="g5398" /><g
         id="g5404" /><g
         id="g5406" /><g
         id="g5408" /><g
         id="g5410" /><g
         id="g5416" /><g
         id="g5418" /><g
         id="g5426" /><g
         id="g5432" /><g
         id="g5434" /><g
         id="g5436" /><g
         id="g5438" /><g
         id="g5444" /><g
         id="g5446" /><g
         id="g5454" /><g
         id="g5460" /><g
         id="g5462" /><g
         id="g5464" /><g
         id="g5466" /><g
         id="g5472" /><g
         id="g5474" /><g
         id="g5482" /><g
         id="g5488" /><g
         id="g5490" /><g
         id="g5492" /><g
         id="g5494" /><g
         id="g5500" /><g
         id="g5502" /><g
         id="g5510" /><g
         id="g5516" /><g
         id="g5518" /><g
         id="g5520" /><g
         id="g5522" /><g
         id="g5528" /><g
         id="g5530" /><g
         id="g5538" /><g
         id="g5544" /><g
         id="g5546" /><g
         id="g5548" /><g
         id="g5550" /><g
         id="g5556" /><g
         id="g5558" /><g
         id="g5566" /><g
         id="g5572" /><g
         id="g5574" /><g
         id="g5576" /><g
         id="g5578" /><g
         id="g5584" /><g
         id="g5586" /><g
         id="g5594" /><g
         id="g5600" /><g
         id="g5602" /><g
         id="g5604" /><g
         id="g5606" /><g
         id="g5612" /><g
         id="g5614" /><g
         id="g5622" /><g
         id="g5628" /><g
         id="g5630" /><g
         id="g5632" /><g
         id="g5634" /><g
         id="g5640" /><g
         id="g5642" /><g
         id="g5650" /><g
         id="g5656" /><g
         id="g5658" /><g
         id="g5660" /><g
         id="g5662" /><g
         id="g5668" /><g
         id="g5670" /><g
         id="g5678" /><g
         id="g5684" /><g
         id="g5686" /><g
         id="g5688" /><g
         id="g5690" /><g
         id="g5696" /><g
         id="g5698" /><g
         id="g5706" /><g
         id="g5712" /><g
         id="g5714" /><g
         id="g5716" /><g
         id="g5718" /><g
         id="g5724" /><g
         id="g5726" /><g
         id="g5734" /><g
         id="g5740" /><g
         id="g5742" /><g
         id="g5744" /><g
         id="g5746" /><g
         id="g5752" /><g
         id="g5754" /><g
         id="g5762" /><g
         id="g5768" /><g
         id="g5770" /><g
         id="g5772" /><g
         id="g5774" /><g
         id="g5780" /><g
         id="g5782" /><g
         id="g5790" /><g
         id="g5796" /><g
         id="g5798" /><g
         id="g5800" /><g
         id="g5802" /><g
         id="g5808" /><g
         id="g5810" /><g
         id="g5818" /><g
         id="g5824" /><g
         id="g5826" /><g
         id="g5828" /><g
         id="g5830" /><g
         id="g5836" /><g
         id="g5838" /><g
         id="g5846" /><g
         id="g5852" /><g
         id="g5854" /><g
         id="g5856" /><g
         id="g5858" /><g
         id="g5864" /><g
         id="g5866" /><g
         id="g5874" /><g
         id="g5880" /><g
         id="g5882" /><g
         id="g5884" /><g
         id="g5886" /><g
         id="g5892" /><g
         id="g5894" /><g
         id="g5902" /><g
         id="g5908" /><g
         id="g5910" /><g
         id="g5912" /><g
         id="g5914" /><g
         id="g5920" /><g
         id="g5922" /><g
         id="g5930" /><g
         id="g5936" /><g
         id="g5938" /><g
         id="g5940" /><g
         id="g5942" /><g
         id="g5944" /><g
         id="g5946" /><g
         id="g5948" /><g
         id="g5950" /><g
         id="g5952" /><g
         id="g5954" /><g
         id="g5956" /><g
         id="g5958" /><g
         id="g5960" /><g
         id="g5962" /><g
         id="g5964" /><g
         id="g5966" /><g
         id="g5968" /><g
         id="g5970" /><g
         id="g5972" /><g
         id="g5974" /><g
         id="g5976" /><g
         id="g5978" /><g
         id="g5980" /><g
         id="g5982" /><g
         id="g5984" /><g
         id="g5986" /><g
         id="g5988" /><g
         id="g5990" /><g
         id="g5992" /><g
         id="g5994" /><g
         id="g5996" /><g
         id="g5998" /><g
         id="g6000" /><g
         id="g6002" /><g
         id="g6004" /><g
         id="g6006" /><g
         id="g6008" /><g
         id="g6010" /><g
         id="g6012" /><g
         id="g6014" /><g
         id="g6016" /><g
         id="g6018" /><g
         id="g6020" /><g
         id="g6022" /><g
         id="g6024" /><g
         id="g6026" /><g
         id="g6028" /><g
         id="g6030" /><g
         id="g6032" /><g
         id="g6034" /><g
         id="g6036" /><g
         id="g6044" /><g
         id="g6052" /><g
         id="g6054" /><g
         id="g6056" /><g
         id="g6058" /><g
         id="g6060" /><g
         id="g6062" /><g
         id="g6064" /><g
         id="g6070" /><g
         id="g6072" /><g
         id="g6074" /><g
         id="g6082" /><g
         id="g6084" /><g
         id="g6086" /><g
         id="g6088" /><g
         id="g6094" /><g
         id="g6096" /><g
         id="g6098" /><g
         id="g6104" /><g
         id="g6106" /><g
         id="g6108" /><g
         id="g6116" /><g
         id="g6118" /><g
         id="g6120" /><g
         id="g6122" /><g
         id="g6130" /><g
         id="g6132" /><g
         id="g6134" /><g
         id="g6136" /><g
         id="g6138" /><g
         id="g6140" /><g
         id="g6148" /><g
         id="g6150" /><g
         id="g6152" /><g
         id="g6154" /><g
         id="g6156" /><g
         id="g6158" /></g><g
       id="g12180"
       transform="matrix(0.52682267,0,0,0.5304505,-6.109635,-144.32627)"><g
         id="g12148"><g
           id="g12146" /></g></g><g
       id="g12482-6"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12484"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12486"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12488"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12490"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12492"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12494"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12496"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12498"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12500"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12502"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12504"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12506"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12508"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12510"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12512"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12514"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12516"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12518"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12520"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12522"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12524"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12526"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12528"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12530"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12532"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12534"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12536"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12538"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12540"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12542"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12544"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12546"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12548"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12550"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12552"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12554"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12556"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12558"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12560"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12562"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12564"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12566"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12568"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12570"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12572"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12574"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12576"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12578"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12580"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12582"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12584"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12586"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12588"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12590"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12592"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12594"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12596"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12598"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12600"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12602-5"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12604"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12616"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g12874"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g13200"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g13368"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g13524"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g13746"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g13914"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g14104"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g14280"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g14478"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g14652"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g14762"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g14870"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g14980"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15110"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15208"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15432"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15532"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15688"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15730"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15814"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15860"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15896"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15976"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g15978"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g15980"
         clip-path="url(#clipPath15984)" /></g><g
       id="g15998"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16000"
         clip-path="url(#clipPath16004)" /></g><g
       id="g16006"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16008"
         clip-path="url(#clipPath16012)" /></g><g
       id="g16014"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16016"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16018"
         clip-path="url(#clipPath16022)" /></g><g
       id="g16036"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16038"
         clip-path="url(#clipPath16042)" /></g><g
       id="g16044"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16046"
         clip-path="url(#clipPath16050)" /></g><g
       id="g16052"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16054"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16056"
         clip-path="url(#clipPath16060)" /></g><g
       id="g16074"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16076"
         clip-path="url(#clipPath16080)" /></g><g
       id="g16082"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16084"
         clip-path="url(#clipPath16088)" /></g><g
       id="g16090"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16092"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16094"
         clip-path="url(#clipPath16098)" /></g><g
       id="g16112"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16114"
         clip-path="url(#clipPath16118)" /></g><g
       id="g16120"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16122"
         clip-path="url(#clipPath16126)" /></g><g
       id="g16128"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16130"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16132"
         clip-path="url(#clipPath16136)" /></g><g
       id="g16150"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16152"
         clip-path="url(#clipPath16156)" /></g><g
       id="g16158"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16160"
         clip-path="url(#clipPath16164)" /></g><g
       id="g16166"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16168"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16170"
         clip-path="url(#clipPath16174)" /></g><g
       id="g16188"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16190"
         clip-path="url(#clipPath16194)" /></g><g
       id="g16196"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16198"
         clip-path="url(#clipPath16202)" /></g><g
       id="g16204"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16206"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16208"
         clip-path="url(#clipPath16212)" /></g><g
       id="g16226"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16228"
         clip-path="url(#clipPath16232)" /></g><g
       id="g16234"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16236"
         clip-path="url(#clipPath16240)" /></g><g
       id="g16242"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16244"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16246"
         clip-path="url(#clipPath16250)" /></g><g
       id="g16264"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16266"
         clip-path="url(#clipPath16270)" /></g><g
       id="g16272"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16274"
         clip-path="url(#clipPath16278)" /></g><g
       id="g16280"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16282"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16284"
         clip-path="url(#clipPath16288)" /></g><g
       id="g16302"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16304"
         clip-path="url(#clipPath16308)" /></g><g
       id="g16310"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16312"
         clip-path="url(#clipPath16316)" /></g><g
       id="g16318"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16320"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16322"
         clip-path="url(#clipPath16326)" /></g><g
       id="g16340"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16342"
         clip-path="url(#clipPath16346)" /></g><g
       id="g16348"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16350"
         clip-path="url(#clipPath16354)" /></g><g
       id="g16356"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16358"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16360"
         clip-path="url(#clipPath16364)" /></g><g
       id="g16378"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16380"
         clip-path="url(#clipPath16384)" /></g><g
       id="g16386"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16388"
         clip-path="url(#clipPath16392)" /></g><g
       id="g16394"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16396"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16398"
         clip-path="url(#clipPath16402)" /></g><g
       id="g16416"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16418"
         clip-path="url(#clipPath16422)" /></g><g
       id="g16424"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16426"
         clip-path="url(#clipPath16430)" /></g><g
       id="g16432"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16434"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16436"
         clip-path="url(#clipPath16440)" /></g><g
       id="g16454"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16456"
         clip-path="url(#clipPath16460)" /></g><g
       id="g16462"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16464"
         clip-path="url(#clipPath16468)" /></g><g
       id="g16470"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16472"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16474"
         clip-path="url(#clipPath16478)" /></g><g
       id="g16492"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16494"
         clip-path="url(#clipPath16498)" /></g><g
       id="g16500"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16502"
         clip-path="url(#clipPath16506)" /></g><g
       id="g16508"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16510"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16512"
         clip-path="url(#clipPath16516)" /></g><g
       id="g16530"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16532"
         clip-path="url(#clipPath16536)" /></g><g
       id="g16538"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16540"
         clip-path="url(#clipPath16544)" /></g><g
       id="g16546"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16548"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16550"
         clip-path="url(#clipPath16554)" /></g><g
       id="g16568"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16570"
         clip-path="url(#clipPath16574)" /></g><g
       id="g16576"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16578"
         clip-path="url(#clipPath16582)" /></g><g
       id="g16584"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16586"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16588"
         clip-path="url(#clipPath16592)" /></g><g
       id="g16606"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16608"
         clip-path="url(#clipPath16612)" /></g><g
       id="g16614"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16616"
         clip-path="url(#clipPath16620)" /></g><g
       id="g16622"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16624"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16626"
         clip-path="url(#clipPath16630)" /></g><g
       id="g16644"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16646"
         clip-path="url(#clipPath16650)" /></g><g
       id="g16652"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16654"
         clip-path="url(#clipPath16658)" /></g><g
       id="g16660"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16662"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16664"
         clip-path="url(#clipPath16668)" /></g><g
       id="g16682"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16684"
         clip-path="url(#clipPath16688)" /></g><g
       id="g16690"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16692"
         clip-path="url(#clipPath16696)" /></g><g
       id="g16698"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16700"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16702"
         clip-path="url(#clipPath16706)" /></g><g
       id="g16720"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16722"
         clip-path="url(#clipPath16726)" /></g><g
       id="g16728"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16730"
         clip-path="url(#clipPath16734)" /></g><g
       id="g16736"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16738"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16740"
         clip-path="url(#clipPath16744)" /></g><g
       id="g16758"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16760"
         clip-path="url(#clipPath16764)" /></g><g
       id="g16766"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16768"
         clip-path="url(#clipPath16772)" /></g><g
       id="g16774"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16776"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16778"
         clip-path="url(#clipPath16782)" /></g><g
       id="g16796"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16798"
         clip-path="url(#clipPath16802)" /></g><g
       id="g16804"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16806"
         clip-path="url(#clipPath16810)" /></g><g
       id="g16812"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16814"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16816"
         clip-path="url(#clipPath16820)" /></g><g
       id="g16834"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16836"
         clip-path="url(#clipPath16840)" /></g><g
       id="g16842"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)"><g
         id="g16844"
         clip-path="url(#clipPath16848)" /></g><g
       id="g16850"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16852"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16854"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16856"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16862"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16864"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16870"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16878"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16880"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16882"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16884"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16890"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16892"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16900"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16906"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16908"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16910"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16912"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16918"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16920"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16928"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16934"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16936"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16938"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16940"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16946"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16948"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16956"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16962"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16964"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16966"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16968"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16974"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16976"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16984"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16990"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16992"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16994"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g16996"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17002"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17004"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17012"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17018"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17020"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17022"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17024"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17030"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17032"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17040"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17046"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17048"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17050"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17052"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17058"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17060"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17068"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17074"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17076"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17078"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17080"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17086"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17088"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17096"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17102"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17104"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17106"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17108"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17114"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17116"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17124"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17130"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17132"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17134"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17136"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17142"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17144"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17152"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17158"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17160"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17162"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17164"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17170"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17172"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17180"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17186"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17188"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17190"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17192"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17198"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17200"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17208"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17214"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17216"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17218"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17220"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17226"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17228"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17236"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17242"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17244"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17246"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17248"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17254"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17256"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17264"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17270"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17272"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17274"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17276"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17282"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17284"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17292"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17298"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17300"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17302"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17304"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17310"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17312"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17320"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17326"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17328"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17330"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17332"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17338"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17340"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17348"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17354"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17356"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17358"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17360"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17366"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17368"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17376"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17382"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17384"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17386"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17388"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17394"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17396"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17404"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17410"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17412"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17414"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17416"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17422"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17424"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17432"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17438"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17440"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17442"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17444"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17450"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17452"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17460"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17466"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17468"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17470"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17472"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17478"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17480"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17488"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17494"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17496"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17498"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17500"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17506"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17508"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17516"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17522"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17524"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17526"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17528"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17530"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17532"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17534"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17536"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17538"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17540"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17542"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17544"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17546"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17548"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17550"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17552"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17554"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17556"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17558"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17560"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17562"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17564"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17566"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17568"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17570"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17572"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17574"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17576"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17578"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17580"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17582"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17584"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17586"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17588"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17590"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17592"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17594"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17596"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17598"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17600"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17602"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17604"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17606"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17608"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17610"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17612"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17614"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17616"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17618"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17620"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17622"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17630"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17638"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17640"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17642"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17644"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17646"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17648"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17650"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17656"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17658"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17660"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17668"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17670"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17672"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17674"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17680"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17682"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17684"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17690"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17692"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17694"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17702"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17704"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17706"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17708"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17716"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17718"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17720"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17722"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17724"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17726"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17734"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17736"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17738"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17740"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17742"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g17744"
       transform="matrix(0.33138644,0,0,-0.31073276,85.43818,-127.48438)" /><g
       id="g28517"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28519"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28521"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28523"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28525"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28527"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28529"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28531"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28533"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28535"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28537"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28539"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28541"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28543"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28549"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28965"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28967"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28969"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28971"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28973"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28975"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28977"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28979"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28981"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28983"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28985"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28987"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28993"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g28995"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29007"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29015"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29017"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29019"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29021"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29023"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29025"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29031"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29033"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29041"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29053"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29055"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29057"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29059"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29061"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29069"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29077"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29079"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29081"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29083"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29085"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29087"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29089"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29095"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29103"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29123"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29125"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29127"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29135"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29137"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29157"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29159"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29161"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29163"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29165"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29167"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29169"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29171"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29173"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29175"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29177"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29179"
       transform="matrix(0.35277777,0,0,-0.35277777,96.36514,-109.38849)" /><g
       id="g29543"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29545"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29547"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29549"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29551"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29553"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29555"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29557"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29559"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29561"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29563"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29565"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29567"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29569"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29581"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g29993"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30405"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30407"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)"><g
         id="g30409"
         clip-path="url(#clipPath30413)" /></g><g
       id="g30427"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)"><g
         id="g30429"
         clip-path="url(#clipPath30433)" /></g><g
       id="g30435"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)"><g
         id="g30437"
         clip-path="url(#clipPath30441)" /></g><g
       id="g30443"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30445"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)"><g
         id="g30447"
         clip-path="url(#clipPath30451)" /></g><g
       id="g30465"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)"><g
         id="g30467"
         clip-path="url(#clipPath30471)" /></g><g
       id="g30473"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)"><g
         id="g30475"
         clip-path="url(#clipPath30479)" /></g><g
       id="g30481"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30483"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30485"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30487"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30489"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30491"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30497"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30499"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30703"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30715"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30717"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30719"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30721"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30727"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30729"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30933"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30945"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30947"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30949"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30951"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30957"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30959"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30979"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30991"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30993"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30995"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30997"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g30999"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31001"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31003"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31011"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31019"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31021"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31023"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31025"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31027"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31035"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31037"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31039"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31041"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31043"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g31045"
       transform="matrix(0.31019625,0,0,-0.29884804,102.05424,-166.56769)" /><g
       id="g32717"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32719"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32721"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32723"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32725"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32727"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32729"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32731"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32733"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32735"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32737"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32739"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32741"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32743"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32745"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32747"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32749"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32751"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32753"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32755"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32757"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32759"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32761"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32763"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32765"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32767"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32769"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32771"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32773"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32775"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32777"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32779"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32791"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32827"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32863"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32865"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)"><g
         id="g32867"
         clip-path="url(#clipPath32871)" /></g><g
       id="g32885"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)"><g
         id="g32887"
         clip-path="url(#clipPath32891)" /></g><g
       id="g32893"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)"><g
         id="g32895"
         clip-path="url(#clipPath32899)" /></g><g
       id="g32901"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32903"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)"><g
         id="g32905"
         clip-path="url(#clipPath32909)" /></g><g
       id="g32923"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)"><g
         id="g32925"
         clip-path="url(#clipPath32929)" /></g><g
       id="g32931"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)"><g
         id="g32933"
         clip-path="url(#clipPath32937)" /></g><g
       id="g32939"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32941"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32943"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32945"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32947"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32949"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32951"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g32977"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33025"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33027"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33029"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33031"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33033"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33035"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33037"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33061"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33075"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33077"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33079"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33081"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33083"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33085"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33087"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33111"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33125"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33127"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33129"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33131"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33133"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33135"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33137"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33139"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33141"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33143"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33145"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33153"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33155"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33157"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33159"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33161"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33163"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33165"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33171"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33173"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33175"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33183"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33185"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33191"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33197"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33203"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33209"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33215"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33221"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33227"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33233"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33239"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33245"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33251"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33257"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33263"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33269"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33275"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33281"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33287"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33293"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33295"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33297"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33305"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33307"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33309"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33311"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33319"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33321"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33323"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33325"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33333"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33335"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33337"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33339"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33347"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33349"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33351"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33353"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33361"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33363"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33365"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33367"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33375"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33377"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33379"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33381"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33389"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33391"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33393"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33395"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33403"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33405"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33407"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33409"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33417"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33419"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33421"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33423"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33425"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33427"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33429"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33431"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33433"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33435"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33437"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33439"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33441"
       transform="matrix(0.35277777,0,0,-0.35277777,61.62487,-20.43863)" /><g
       id="g33836"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33838"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33840"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33842"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33844"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33846"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33848"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33850"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33852"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33854"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33856"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33858"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33860"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33862"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33864"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33866"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33868"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33870"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33882"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33902"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33904"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33906"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33908"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33910"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33912"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33914"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33916"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33918"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33920"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33922"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33924"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33930"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33932"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33952"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33964"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33966"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33968"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33970"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33972"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33974"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33980"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33982"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g33996"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34020"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34022"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34024"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34026"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34028"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34060"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34062"
       transform="matrix(0.19464377,0,0,-0.15524102,40.82498,26.33185)" /><g
       id="g34068"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34070"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34072"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34074"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34076"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34078"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34080"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34086"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34088"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34090"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34098"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34100"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34102"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34104"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34110"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34112"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34114"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34120"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34122"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34124"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34132"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34134"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34136"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34138"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34146"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34148"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34150"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34152"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34154"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34156"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34164"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34166"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34168"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34170"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34172"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34174"
       transform="matrix(0.35277777,0,0,-0.35277777,35.45427,35.61906)" /><g
       id="g34376"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34378"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34380"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34382"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34384"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34386"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34388"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34390"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34392"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34394"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34396"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34398"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34400"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34402"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34404"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34406"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34408"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34410"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34422"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34462"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34464"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34466"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34468"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34470"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34472"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34474"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34476"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34478"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34480"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34482"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34484"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34490"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34492"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34512"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34524"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34526"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34528"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34530"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34532"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34534"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34540"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34542"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34560"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34592"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34594"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34596"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34598"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34600"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34608"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34616"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34618"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34620"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34622"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34624"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34626"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34628"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34634"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34636"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34638"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34646"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34648"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34650"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34652"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34658"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34660"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34662"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34668"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34670"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34672"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34680"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34682"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34684"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34686"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34694"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34696"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34698"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34700"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34702"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34704"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34712"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34714"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34716"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34718"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34720"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34722"
       transform="matrix(0.35277777,0,0,-0.35277777,-71.47272,-1.68719)" /><g
       id="g34928"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34930"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34932"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34934"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34936"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34938"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34940"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34942"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34944"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34946"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34948"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34950"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34952"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34954"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34956"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34958"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34960"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34962"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34968"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34970"
       transform="matrix(0.16719903,0,0,-0.16370522,7.651459,13.3046)"
       style="stroke-width:0.604438;stroke-miterlimit:10;stroke-dasharray:none"><g
         id="g34972"
         clip-path="url(#clipPath34976)"
         style="stroke-width:0.604438;stroke-miterlimit:10;stroke-dasharray:none" /></g><g
       id="g34988"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34990"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34992"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34994"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34996"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g34998"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35000"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35002"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35004"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35006"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35008"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35010"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35016"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35018"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35038"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35050"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35052"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35054"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35056"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35058"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35060"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35066"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35068"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35082"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35106"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35108"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35110"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35112"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35114"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35116"
       transform="matrix(0.16719903,0,0,-0.16370522,7.651459,13.3046)" /><g
       id="g35122"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35130"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35132"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35134"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35136"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35138"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35140"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35142"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35148"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35150"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35152"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35160"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35162"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35164"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35166"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35172"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35178"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35180"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35182"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35188"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35194"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35196"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35198"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35206"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35208"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35210"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35212"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35220"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35222"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35224"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35226"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35228"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35230"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35238"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35240"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35242"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35244"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35246"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35248"
       transform="matrix(0.27129163,0,0,-0.28507201,-177.0672,-144.25765)" /><g
       id="g35441"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35443"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35445"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35447"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35449"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35451"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35453"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35455"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35457"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35459"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35461"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35463"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35465"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35467"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35469"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35471"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35473"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)"><g
         id="g35475"
         clip-path="url(#clipPath35479)" /></g><g
       id="g35481"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35523"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35525"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35527"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35529"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35531"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35533"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35535"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35537"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35539"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35541"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35543"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35545"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35551"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35553"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35585"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35603"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35605"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35607"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35609"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35611"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35613"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35619"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35621"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35635"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35659"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35661"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35663"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35665"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35667"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35721"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35723"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35725"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35727"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35729"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35731"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35733"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35735"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35737"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35743"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35745"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35747"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35755"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35757"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35763"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35769"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35775"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35781"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35783"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35785"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35793"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35795"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35797"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35799"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35807"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35809"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35811"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35813"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35815"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35817"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35819"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35821"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35823"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35825"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35827"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35829"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g35831"
       transform="matrix(0.35277777,0,0,-0.35277777,-197.46871,-106.72678)" /><g
       id="g48589"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48591"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48593"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48595"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48597"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48599"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48601"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48603"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48605"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48607"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48609"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48611"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48613"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48615"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48617"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48619"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48621"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48623"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48625"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48627"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48629"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48631"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48633"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48635"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48637"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48639"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48641"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48643"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48655"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48689"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48723"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48725"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)"><g
         id="g48727"
         clip-path="url(#clipPath48731)" /></g><g
       id="g48745"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)"><g
         id="g48747"
         clip-path="url(#clipPath48751)" /></g><g
       id="g48753"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)"><g
         id="g48755"
         clip-path="url(#clipPath48759)" /></g><g
       id="g48761"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48763"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)"><g
         id="g48765"
         clip-path="url(#clipPath48769)" /></g><g
       id="g48783"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)"><g
         id="g48785"
         clip-path="url(#clipPath48789)" /></g><g
       id="g48791"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)"><g
         id="g48793"
         clip-path="url(#clipPath48797)" /></g><g
       id="g48799"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48801"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48803"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48805"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48807"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48809"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48811"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48835"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48879"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48881"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48883"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48885"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48887"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48889"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48891"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48911"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48923"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48925"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48927"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48929"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48931"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48933"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48935"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48955"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48967"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48969"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48971"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48973"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48975"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48977"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48979"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48981"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48983"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48985"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48987"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48995"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48997"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48999"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49001"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49003"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49005"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49007"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49013"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49015"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49017"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49025"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49027"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49033"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49039"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49045"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49051"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49057"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49063"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49069"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49075"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49081"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49087"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49093"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49099"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49105"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49111"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49113"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49115"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49123"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49125"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49127"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49129"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49137"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49139"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49141"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49143"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49151"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49153"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49155"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49157"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49165"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49167"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49169"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49171"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49179"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49181"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49183"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49185"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49193"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49195"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49197"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49199"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49207"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49209"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49211"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49213"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49215"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49217"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49219"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49221"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49223"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49225"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49227"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49229"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g49231"
       transform="matrix(0.35277777,0,0,-0.35277777,75.38525,77.68781)" /><g
       id="g48989"
       transform="matrix(0.22217534,0,0,-0.2484394,144.0845,-61.62567)" /><flowRoot
       xml:space="preserve"
       id="flowRoot73437"
       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0.2;font-family:sans-serif;text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"
       transform="translate(-69.34183,-295.03094)"><flowRegion
         id="flowRegion73439"><rect
           id="rect73441"
           width="0"
           height="50"
           x="818"
           y="251.70079" /></flowRegion><flowPara
         id="flowPara73443" /></flowRoot><flowRoot
       xml:space="preserve"
       id="flowRoot73445"
       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0.2;font-family:sans-serif;text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"
       transform="translate(-69.34183,-295.03094)"><flowRegion
         id="flowRegion73447"><rect
           id="rect73449"
           width="1"
           height="50"
           x="816"
           y="247.70079" /></flowRegion><flowPara
         id="flowPara73451" /></flowRoot><flowRoot
       xml:space="preserve"
       id="flowRoot73453"
       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0.2;font-family:sans-serif;text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"
       transform="translate(-69.34183,-295.03094)"><flowRegion
         id="flowRegion73455"><rect
           id="rect73457"
           width="4"
           height="70"
           x="814"
           y="228.70079" /></flowRegion><flowPara
         id="flowPara73459" /></flowRoot><flowRoot
       xml:space="preserve"
       id="flowRoot73461"
       style="font-style:normal;font-weight:normal;font-size:12px;line-height:0.2;font-family:sans-serif;text-align:center;letter-spacing:0px;word-spacing:0px;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none"
       transform="translate(-69.34183,-295.03094)"><flowRegion
         id="flowRegion73463"><rect
           id="rect73465"
           width="2.1213202"
           height="50.204582"
           x="814.58704"
           y="248.52145" /></flowRegion><flowPara
         id="flowPara73467" /></flowRoot><g
       id="g102030" /><g
       id="g6109"
       transform="translate(0,-8.9958338)"><g
         id="g12446"
         transform="matrix(1.0605167,0,0,1.0605167,-99.660566,15.899908)"><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421"
           x="110.66666"
           y="63.258259"><tspan
             sodipodi:role="line"
             id="tspan2419"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="110.66666"
             y="63.258259">--processed --just-metadata </tspan></text><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-3"
           x="110.66666"
           y="65.359871"><tspan
             sodipodi:role="line"
             id="tspan2419-3"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="110.66666"
             y="65.359871">--data-source samples</tspan></text></g><g
         id="g12182"
         transform="matrix(1.0605167,0,0,1.0605167,-100.27316,17.143286)"><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-8"
           x="111.19802"
           y="68.136139"><tspan
             sodipodi:role="line"
             id="tspan2419-0"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.19802"
             y="68.136139">--processed</tspan></text><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-5"
           x="111.19802"
           y="70.20211"><tspan
             x="111.19802"
             sodipodi:role="line"
             id="tspan2419-6"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             y="70.20211">--data-source samples</tspan></text></g><g
         id="g12176"
         transform="matrix(1.0605167,0,0,1.0605167,-100.27316,5.7667075)"><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-0"
           x="111.23149"
           y="84.878197"><tspan
             sodipodi:role="line"
             id="tspan2419-04"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="84.878197">--processed --just-metadata</tspan></text><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-62"
           x="111.23149"
           y="87.169403"><tspan
             sodipodi:role="line"
             id="tspan2419-67"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="87.169403">--data-source series</tspan></text></g><g
         id="g12170"
         transform="matrix(1.0605167,0,0,1.0605167,-100.27316,6.3296868)"><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-8-9"
           x="111.23149"
           y="90.194206"><tspan
             sodipodi:role="line"
             id="tspan2419-0-1"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="90.194206">--processed</tspan></text><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-62-9"
           x="111.23149"
           y="92.449768"><tspan
             sodipodi:role="line"
             id="tspan2419-67-6"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="92.449768">--data-source series</tspan></text></g><g
         id="g12158"
         transform="matrix(1.0605167,0,0,1.0605167,-100.27316,7.4712703)"><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-8-9-9"
           x="111.23149"
           y="100.77509"><tspan
             sodipodi:role="line"
             id="tspan2419-0-1-9"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="100.77509">--processed</tspan></text><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-56"
           x="111.23149"
           y="103.03065"><tspan
             sodipodi:role="line"
             id="tspan2419-9"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="103.03065">--data-source all</tspan></text></g><g
         id="g12164"
         transform="matrix(1.0605167,0,0,1.0605167,-100.27316,7.2203764)"><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-25"
           x="111.23149"
           y="95.165184"><tspan
             sodipodi:role="line"
             id="tspan2419-44"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="95.165184">--processed --just-metadata </tspan></text><text
           xml:space="preserve"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.155966"
           id="text2421-87"
           x="111.23149"
           y="97.45639"><tspan
             sodipodi:role="line"
             id="tspan2419-2"
             style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.02756px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.155966"
             x="111.23149"
             y="97.45639">--data-source all</tspan></text></g><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-61-2"
         x="29.699821"
         y="69.65416"><tspan
           x="29.699821"
           sodipodi:role="line"
           id="tspan2419-32-6"
           style="stroke-width:0.165405"
           y="69.65416">Arguments</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-9"
         x="12.702452"
         y="70.080429"><tspan
           sodipodi:role="line"
           id="tspan2419-91"
           style="stroke-width:0.165405"
           x="12.702452"
           y="70.080429">#</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-4"
         x="12.524652"
         y="74.842537"><tspan
           sodipodi:role="line"
           id="tspan2419-910"
           style="stroke-width:0.165405"
           x="12.524652"
           y="74.842537">1</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-7"
         x="12.468565"
         y="79.156746"><tspan
           sodipodi:role="line"
           id="tspan2419-58"
           style="stroke-width:0.165405"
           x="12.468565"
           y="79.156746">2</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-70"
         x="12.448008"
         y="84.304665"><tspan
           sodipodi:role="line"
           id="tspan2419-48"
           style="stroke-width:0.165405"
           x="12.448008"
           y="84.304665">3</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-04"
         x="12.482656"
         y="90.674683"><tspan
           sodipodi:role="line"
           id="tspan2419-29"
           style="stroke-width:0.165405"
           x="12.482656"
           y="90.674683">4</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-610"
         x="12.442759"
         y="96.940735"><tspan
           sodipodi:role="line"
           id="tspan2419-42"
           style="stroke-width:0.165405"
           x="12.442759"
           y="96.940735">5</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-2"
         x="12.453258"
         y="102.91211"><tspan
           sodipodi:role="line"
           id="tspan2419-20"
           style="stroke-width:0.165405"
           x="12.453258"
           y="102.91211">6</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-55"
         x="12.442232"
         y="109.22785"><tspan
           sodipodi:role="line"
           id="tspan2419-290"
           style="stroke-width:0.165405"
           x="12.442232"
           y="109.22785">7</tspan></text><text
         xml:space="preserve"
         style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-55-2"
         x="12.448008"
         y="115.54359"><tspan
           sodipodi:role="line"
           id="tspan2419-290-8"
           style="stroke-width:0.165405"
           x="12.448008"
           y="115.54359">8</tspan></text><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.0689336;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.313595"
         d="m 10.864083,86.712559 81.920017,0.0064"
         id="path11467-4"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.0689336;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.313595"
         d="m 10.864083,93.091213 81.920017,0.0064"
         id="path11467-4-0"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.0689336;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.313595"
         d="m 10.864083,99.291909 81.920017,0.0064"
         id="path11467-4-86"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.0689336;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.31"
         d="m 10.864083,105.4548 81.920017,0.006"
         id="path11467-4-2"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.0689336;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.313595"
         d="m 10.864083,111.6551 81.920017,0.006"
         id="path11467-4-4"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.07;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.31"
         d="m 54.214723,116.65019 0.006,-47.013367"
         id="path11467-0"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.280595px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
         d="m 10.261888,71.743315 82.850255,0.0064"
         id="path11467"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.280595px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
         d="m 15.489893,116.64603 0.0064,-47.013372"
         id="path11467-0-4"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.07;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.31"
         d="m 68.900619,116.65019 0.006,-47.013367"
         id="path70866"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;opacity:0.31;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.07;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:1"
         d="m 81.727092,116.65019 0.006,-47.013367"
         id="path70868"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><text
         xml:space="preserve"
         style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.15026px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
         id="text2421-6"
         x="17.703272"
         y="78.798691"><tspan
           sodipodi:role="line"
           id="tspan2419-4"
           style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:2.15026px;font-family:monospace;-inkscape-font-specification:'monospace, Normal';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-east-asian:normal;stroke-width:0.165405"
           x="17.703272"
           y="78.798691">--just-metadata</tspan></text><g
         id="g5977"><text
           xml:space="preserve"
           style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
           id="text2421-1"
           x="71.115555"
           y="65.585655"><tspan
             sodipodi:role="line"
             id="tspan2419-5"
             style="stroke-width:0.165405"
             x="71.115555"
             y="65.585655">Output</tspan></text><path
           style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.280595px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
           d="m 60.140096,66.996214 28.49409,0.0064"
           id="path11467-6"
           inkscape:connector-type="polyline"
           inkscape:connector-curvature="0"
           sodipodi:nodetypes="cc" /><g
           id="g5911"><g
             id="g5805"><text
               xml:space="preserve"
               style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
               id="text2421-1-9"
               x="55.546135"
               y="70.080429"><tspan
                 sodipodi:role="line"
                 id="tspan2419-5-4"
                 style="stroke-width:0.165405"
                 x="55.546135"
                 y="70.080429">Data Source</tspan></text><g
               id="g5784"><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61"
                 x="57.384491"
                 y="84.076599"><tspan
                   x="57.384491"
                   sodipodi:role="line"
                   id="tspan2419-32"
                   style="stroke-width:0.165405"
                   y="84.076599">Samples</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61-0"
                 x="57.370911"
                 y="90.474152"><tspan
                   x="57.370911"
                   sodipodi:role="line"
                   id="tspan2419-32-5"
                   style="stroke-width:0.165405"
                   y="90.474152">Samples</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61-5"
                 x="58.506416"
                 y="96.964363"><tspan
                   x="58.506416"
                   sodipodi:role="line"
                   id="tspan2419-32-1"
                   style="stroke-width:0.165405"
                   y="96.964363">Series</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61-5-4"
                 x="58.506416"
                 y="103.14616"><tspan
                   x="58.506416"
                   sodipodi:role="line"
                   id="tspan2419-32-1-6"
                   style="stroke-width:0.165405"
                   y="103.14616">Series</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61-7"
                 x="60.521767"
                 y="109.22523"><tspan
                   x="60.521767"
                   sodipodi:role="line"
                   id="tspan2419-32-4"
                   style="stroke-width:0.165405"
                   y="109.22523">all</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61-7-3"
                 x="60.521767"
                 y="115.54096"><tspan
                   x="60.521767"
                   sodipodi:role="line"
                   id="tspan2419-32-4-1"
                   style="stroke-width:0.165405"
                   y="115.54096">all</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61-02"
                 x="57.384491"
                 y="74.638855"><tspan
                   x="57.384491"
                   sodipodi:role="line"
                   id="tspan2419-32-9"
                   style="stroke-width:0.165405"
                   y="74.638855">Samples</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-61-4"
                 x="57.384491"
                 y="78.953064"><tspan
                   x="57.384491"
                   sodipodi:role="line"
                   id="tspan2419-32-3"
                   style="stroke-width:0.165405"
                   y="78.953064">Samples</tspan></text></g></g><g
             id="g5827"
             transform="translate(-0.24853097)"><text
               xml:space="preserve"
               style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
               id="text2421-1-2"
               x="70.800049"
               y="70.081482"><tspan
                 sodipodi:role="line"
                 id="tspan2419-5-2"
                 style="stroke-width:0.165405"
                 x="70.800049"
                 y="70.081482">Metadata</tspan></text><g
               id="g5766"><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60"
                 x="70.480568"
                 y="84.264534"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27"
                   style="stroke-width:0.165405"
                   x="70.480568"
                   y="84.264534">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-2"
                 x="70.480568"
                 y="90.662086"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-8"
                   style="stroke-width:0.165405"
                   x="70.480568"
                   y="90.662086">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-8"
                 x="70.480568"
                 y="96.951759"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-9"
                   style="stroke-width:0.165405"
                   x="70.480568"
                   y="96.951759">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-28"
                 x="70.480568"
                 y="103.13356"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-88"
                   style="stroke-width:0.165405"
                   x="70.480568"
                   y="103.13356">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-6"
                 x="70.480568"
                 y="109.22523"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-83"
                   style="stroke-width:0.165405"
                   x="70.480568"
                   y="109.22523">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-83"
                 x="70.480568"
                 y="115.54096"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-3"
                   style="stroke-width:0.165405"
                   x="70.480568"
                   y="115.54096">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-82-3"
                 x="73.338478"
                 y="74.82679"><tspan
                   sodipodi:role="line"
                   id="tspan2419-99-8"
                   style="stroke-width:0.165405"
                   x="73.338478"
                   y="74.82679">Raw</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-82-0"
                 x="73.338478"
                 y="79.140999"><tspan
                   sodipodi:role="line"
                   id="tspan2419-99-4"
                   style="stroke-width:0.165405"
                   x="73.338478"
                   y="79.140999">Raw</tspan></text></g></g><g
             id="g5848"><text
               xml:space="preserve"
               style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
               id="text2421-1-6"
               x="85.83506"
               y="70.081482"><tspan
                 sodipodi:role="line"
                 id="tspan2419-5-41"
                 style="stroke-width:0.165405"
                 x="85.83506"
                 y="70.081482">Data</tspan></text><g
               id="g5748"><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-2-7"
                 x="83.05851"
                 y="90.662086"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-8-6"
                   style="stroke-width:0.165405"
                   x="83.05851"
                   y="90.662086">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-6-8"
                 x="83.05851"
                 y="115.54096"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-83-9"
                   style="stroke-width:0.165405"
                   x="83.05851"
                   y="115.54096">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-6-0"
                 x="83.05851"
                 y="103.13356"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-83-6"
                   style="stroke-width:0.165405"
                   x="83.05851"
                   y="103.13356">Processed</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-6-87"
                 x="85.53582"
                 y="84.264534"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-83-90"
                   style="stroke-width:0.165405"
                   x="85.53582"
                   y="84.264534">None</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-6-87-37"
                 x="85.53582"
                 y="96.951759"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-83-90-32"
                   style="stroke-width:0.165405"
                   x="85.53582"
                   y="96.951759">None</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-6-87-6"
                 x="85.53582"
                 y="109.22523"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-83-90-5"
                   style="stroke-width:0.165405"
                   x="85.53582"
                   y="109.22523">None</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-82"
                 x="85.91642"
                 y="74.82679"><tspan
                   sodipodi:role="line"
                   id="tspan2419-99"
                   style="stroke-width:0.165405"
                   x="85.91642"
                   y="74.82679">Raw</tspan></text><text
                 xml:space="preserve"
                 style="font-variant:normal;font-weight:normal;font-size:2.15026px;font-family:Arial;-inkscape-font-specification:ArialMT;writing-mode:lr-tb;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.165405"
                 id="text2421-60-6-87-3"
                 x="85.53582"
                 y="79.140999"><tspan
                   sodipodi:role="line"
                   id="tspan2419-27-83-90-3"
                   style="stroke-width:0.165405"
                   x="85.53582"
                   y="79.140999">None</tspan></text></g></g></g></g><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.0689336;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.313595"
         d="m 10.877664,80.296107 81.920017,0.0064"
         id="path11467-4-8"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /><path
         style="display:inline;fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:0.0689336;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:none;stroke-opacity:0.313595"
         d="m 10.877664,76.108452 81.920017,0.0064"
         id="path11467-4-7"
         inkscape:connector-type="polyline"
         inkscape:connector-curvature="0"
         sodipodi:nodetypes="cc" /></g></g><style
     id="style11718"
     type="text/css">
	.st0{fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10;}
	.st1{fill:none;stroke:#000000;stroke-width:2;stroke-linejoin:round;stroke-miterlimit:10;}
	.st2{fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10;stroke-dasharray:2,2;}
	.st3{fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10;}
	.st4{fill:none;stroke:#000000;stroke-width:2;stroke-linecap:round;stroke-miterlimit:10;}
	.st5{fill:none;stroke:#000000;stroke-width:2;stroke-linejoin:bevel;stroke-miterlimit:10;}
	.st6{fill:none;stroke:#000000;stroke-width:2;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:2,2;}
	.st7{fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:10;stroke-dasharray:2,3;}
</style><style
     id="style4"
     type="text/css">
	.st0{clip-path:url(#x);}
	.st1{opacity:0.54;clip-path:url(#w);}
	.st2{clip-path:url(#v);}
	.st3{opacity:0.54;clip-path:url(#u);}
	.st4{clip-path:url(#t);}
	.st5{opacity:0.54;clip-path:url(#s);}
	.st6{clip-path:url(#r);}
	.st7{opacity:0.54;clip-path:url(#q);}
	.st8{clip-path:url(#p);}
	.st9{opacity:0.54;clip-path:url(#o);}
	.st10{clip-path:url(#n);}
	.st11{opacity:0.54;clip-path:url(#m);}
</style><style
     id="style9128"
     type="text/css">
	.st0{fill:#000000;}
	.st1{fill:#000000;}
</style></svg>
" + ] + } + }, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![arguments_outputs.svg](attachment:arguments_outputs.svg)" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/docs_jupyter/python-usage.ipynb b/docs_jupyter/python-usage.ipynb new file mode 100644 index 0000000..d295392 --- /dev/null +++ b/docs_jupyter/python-usage.ipynb @@ -0,0 +1,725 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "67fc2596", + "metadata": {}, + "source": [ + "# Tutorial of usage geofetch as python package" + ] + }, + { + "cell_type": "markdown", + "id": "3ced4b1e", + "metadata": {}, + "source": [ + "♪♫*•♪♪♫*•♪♪♫*•♪♪♫*•♪♪♫*" + ] + }, + { + "cell_type": "markdown", + "id": "0093b8ef", + "metadata": {}, + "source": [ + "Geofetch provides python fuctions to fetch metadata and metadata from GEO and SRA by using python language. `get_project` function returns dictionary of peppy projects that were found using filters and input you specified.\n", + " peppy is a Python package that provides an API for handling standardized project and sample metadata. \n", + " \n", + "More information you can get here:\n", + " \n", + "http://peppy.databio.org/en/latest/\n", + "\n", + "http://pep.databio.org/en/2.0.0/" + ] + }, + { + "cell_type": "markdown", + "id": "64746e18", + "metadata": {}, + "source": [ + "### First let's import geofetch" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "60b65668", + "metadata": {}, + "outputs": [], + "source": [ + "from geofetch import Geofetcher" + ] + }, + { + "cell_type": "markdown", + "id": "b6edbdd7", + "metadata": {}, + "source": [ + "### Initiate Geofetch object by specifing parameters that you want to use for downloading metadata/data" + ] + }, + { + "cell_type": "markdown", + "id": "dc107c16", + "metadata": {}, + "source": [ + "1) If you won't specify any parameters, defaul parameters will be used" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "af268078", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Metadata folder: /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name\n" + ] + } + ], + "source": [ + "geof = Geofetcher()" + ] + }, + { + "cell_type": "markdown", + "id": "1916922e", + "metadata": {}, + "source": [ + "2) To download processed data with samples and series specify this two arguments:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d451856a", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Metadata folder: /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name\n" + ] + } + ], + "source": [ + "geof = Geofetcher(processed=True, data_source=\"all\")" + ] + }, + { + "cell_type": "markdown", + "id": "8debdd11", + "metadata": {}, + "source": [ + "3) To tune project parameter, where metadata should be stored use next parameters:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f8edb462", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Metadata folder: /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name\n" + ] + } + ], + "source": [ + "geof = Geofetcher(processed=True, data_source=\"all\", const_limit_project = 20, const_limit_discard = 500, attr_limit_truncate = 10000 )" + ] + }, + { + "cell_type": "markdown", + "id": "d2739b13", + "metadata": {}, + "source": [ + "4) To add more filter of other options see documentation" + ] + }, + { + "cell_type": "markdown", + "id": "00b66d4a", + "metadata": {}, + "source": [ + "## Run Geofetch" + ] + }, + { + "cell_type": "markdown", + "id": "5e6c5df8", + "metadata": {}, + "source": [ + "### By default: \n", + "1) No actual data will be downloaded (just_metadata=True)\n", + "\n", + "2) No soft files will be saved on the disc (discard_soft=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "12d70387", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Trying GSE95654 (not a file) as accession...\n", + "Trying GSE95654 (not a file) as accession...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ca09a8e58d97432fa8313cf788e78430", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Skipped 0 accessions. Starting now.\n", + "\u001b[38;5;200mProcessing accession 1 of 1: 'GSE95654'\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "Total number of processed SAMPLES files found is: 40\n", + "Total number of processed SERIES files found is: 0\n", + "Expanding metadata list...\n", + "Expanding metadata list...\n" + ] + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Finished processing 1 accession(s)\n", + "Cleaning soft files ...\n", + "Unifying and saving of metadata... \n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d6a43f81689c446b8b38a57f2ee5f38f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d54492fe1ca547ff9b0c839a5d2f30f1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Output()" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n",
+       "
\n" + ], + "text/plain": [ + "\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No files found. No data to save. File /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name/_series/_series.csv won't be created\n" + ] + } + ], + "source": [ + "projects = geof.get_project(\"GSE95654\")" + ] + }, + { + "cell_type": "markdown", + "id": "bc198009", + "metadata": {}, + "source": [ + "Check if projects were created by checking dict keys:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "95896f25", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['_samples'])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "projects.keys()" + ] + }, + { + "cell_type": "markdown", + "id": "4e27f971", + "metadata": {}, + "source": [ + "project for smaples was created! Now let's look into it." + ] + }, + { + "cell_type": "markdown", + "id": "fa2d0bda", + "metadata": {}, + "source": [ + "\\* the values of the dictionary are peppy projects. More information about peppy Project you can find in the documentation: http://peppy.databio.org/en/latest/" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e8642711", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "40" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(projects['_samples'].samples)" + ] + }, + { + "cell_type": "markdown", + "id": "a4d50082", + "metadata": {}, + "source": [ + "We got 40 samples from GSE95654 project. If you want to check if it's correct information go into: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE95654" + ] + }, + { + "cell_type": "markdown", + "id": "d0cd958a", + "metadata": {}, + "source": [ + "Now let's see actuall data. first 15 project and 5 clolumns:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "ba7be762", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_organism_ch1sample_channel_countsample_contact_namesample_library_sourcegenome_build
sample_name
RRBS_on_CRC_patient_8Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_adjacent_normal_colon_patient_8Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_CRC_patient_32Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_adjacent_normal_colon_patient_32Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_CRC_patient_41Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_adjacent_normal_colon_patient_41Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_CRC_patient_42Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_adjacent_normal_colon_patient_42Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_ACF_patient_173Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_ACF_patient_515Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_normal_crypts_patient_139Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_ACF_patient_143Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_normal_crypts_patient_143Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_normal_crypts_patient_165Homo sapiens1Xiwei,,Wugenomichg19
RRBS_on_ACF_patient_165Homo sapiens1Xiwei,,Wugenomichg19
\n", + "
" + ], + "text/plain": [ + " sample_organism_ch1 \\\n", + "sample_name \n", + "RRBS_on_CRC_patient_8 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_8 Homo sapiens \n", + "RRBS_on_CRC_patient_32 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_32 Homo sapiens \n", + "RRBS_on_CRC_patient_41 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_41 Homo sapiens \n", + "RRBS_on_CRC_patient_42 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_42 Homo sapiens \n", + "RRBS_on_ACF_patient_173 Homo sapiens \n", + "RRBS_on_ACF_patient_515 Homo sapiens \n", + "RRBS_on_normal_crypts_patient_139 Homo sapiens \n", + "RRBS_on_ACF_patient_143 Homo sapiens \n", + "RRBS_on_normal_crypts_patient_143 Homo sapiens \n", + "RRBS_on_normal_crypts_patient_165 Homo sapiens \n", + "RRBS_on_ACF_patient_165 Homo sapiens \n", + "\n", + " sample_channel_count \\\n", + "sample_name \n", + "RRBS_on_CRC_patient_8 1 \n", + "RRBS_on_adjacent_normal_colon_patient_8 1 \n", + "RRBS_on_CRC_patient_32 1 \n", + "RRBS_on_adjacent_normal_colon_patient_32 1 \n", + "RRBS_on_CRC_patient_41 1 \n", + "RRBS_on_adjacent_normal_colon_patient_41 1 \n", + "RRBS_on_CRC_patient_42 1 \n", + "RRBS_on_adjacent_normal_colon_patient_42 1 \n", + "RRBS_on_ACF_patient_173 1 \n", + "RRBS_on_ACF_patient_515 1 \n", + "RRBS_on_normal_crypts_patient_139 1 \n", + "RRBS_on_ACF_patient_143 1 \n", + "RRBS_on_normal_crypts_patient_143 1 \n", + "RRBS_on_normal_crypts_patient_165 1 \n", + "RRBS_on_ACF_patient_165 1 \n", + "\n", + " sample_contact_name \\\n", + "sample_name \n", + "RRBS_on_CRC_patient_8 Xiwei,,Wu \n", + "RRBS_on_adjacent_normal_colon_patient_8 Xiwei,,Wu \n", + "RRBS_on_CRC_patient_32 Xiwei,,Wu \n", + "RRBS_on_adjacent_normal_colon_patient_32 Xiwei,,Wu \n", + "RRBS_on_CRC_patient_41 Xiwei,,Wu \n", + "RRBS_on_adjacent_normal_colon_patient_41 Xiwei,,Wu \n", + "RRBS_on_CRC_patient_42 Xiwei,,Wu \n", + "RRBS_on_adjacent_normal_colon_patient_42 Xiwei,,Wu \n", + "RRBS_on_ACF_patient_173 Xiwei,,Wu \n", + "RRBS_on_ACF_patient_515 Xiwei,,Wu \n", + "RRBS_on_normal_crypts_patient_139 Xiwei,,Wu \n", + "RRBS_on_ACF_patient_143 Xiwei,,Wu \n", + "RRBS_on_normal_crypts_patient_143 Xiwei,,Wu \n", + "RRBS_on_normal_crypts_patient_165 Xiwei,,Wu \n", + "RRBS_on_ACF_patient_165 Xiwei,,Wu \n", + "\n", + " sample_library_source genome_build \n", + "sample_name \n", + "RRBS_on_CRC_patient_8 genomic hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_8 genomic hg19 \n", + "RRBS_on_CRC_patient_32 genomic hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_32 genomic hg19 \n", + "RRBS_on_CRC_patient_41 genomic hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_41 genomic hg19 \n", + "RRBS_on_CRC_patient_42 genomic hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_42 genomic hg19 \n", + "RRBS_on_ACF_patient_173 genomic hg19 \n", + "RRBS_on_ACF_patient_515 genomic hg19 \n", + "RRBS_on_normal_crypts_patient_139 genomic hg19 \n", + "RRBS_on_ACF_patient_143 genomic hg19 \n", + "RRBS_on_normal_crypts_patient_143 genomic hg19 \n", + "RRBS_on_normal_crypts_patient_165 genomic hg19 \n", + "RRBS_on_ACF_patient_165 genomic hg19 " + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "projects['_samples'].sample_table.iloc[:15 , :5]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index d7a3679..a1aabdc 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -2418,11 +2418,6 @@ def _parse_cmdl(cmdl): action="store_true", help="Use just the keys defined in this module when writing out metadata.", ) - raw_group.add_argument( - "--bam-conversion", - action="store_true", - help="specify this argument to convert bam files", - ) logmuse.add_logging_options(parser) return parser.parse_args(cmdl) diff --git a/mkdocs.yml b/mkdocs.yml index c6c6549..0d84ac9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -9,8 +9,10 @@ nav: - Introduction: README.md - Install and configure: install.md - SRA convert: sra_convert.md + - Tutorials: - Tutorial for processed data: processed-data-downloading.md - Tutorial for raw data: raw-data-downloading.md + - Python geofetch tutorial: python-usage.md - How-to Guides: - Specifying samples to download: file-specification.md - Set SRA data download location: howto-location.md From deaa72a6444f001c28f9687c1b527e01bfccb96b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 6 Sep 2022 17:37:49 -0400 Subject: [PATCH 35/61] Added additional documentation --- docs/changelog.md | 2 +- docs/how_to_get_gse.md | 6 ++++ docs/metadata_output.md | 55 ++++++++++++++++++++----------- requirements/requirements-all.txt | 1 + 4 files changed, 44 insertions(+), 20 deletions(-) create mode 100644 docs/how_to_get_gse.md diff --git a/docs/changelog.md b/docs/changelog.md index 23c3729..56bc55a 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,7 +1,7 @@ # Changelog ## [0.11.0] -- 2022-09-06 -- Added initialization of peppy Project without saving any files functionality +- Added initialization of peppy Project without saving any files - Added progress bar - Fixed None issue in config file - Switched way of saving soft files to request library diff --git a/docs/how_to_get_gse.md b/docs/how_to_get_gse.md new file mode 100644 index 0000000..b497f89 --- /dev/null +++ b/docs/how_to_get_gse.md @@ -0,0 +1,6 @@ +# How to find and get list of files of GSEs + +There is two options: +1) You can create your own file with GSE numbers that you want to download using geofetch +2) Use [gse_finder](https://github.com/pepkit/gse_finder) - Python package that enables get list +of GSEs in certain period of time and at the same time use NCBI filters \ No newline at end of file diff --git a/docs/metadata_output.md b/docs/metadata_output.md index e9f5fde..5763b2f 100644 --- a/docs/metadata_output.md +++ b/docs/metadata_output.md @@ -1,29 +1,46 @@ # Metadata output Geofetch produces [PEPs](http://pep.databio.org/) for either processed or raw data (including metadata from SRA). +A project can be created either for a single combined (whole) input or for each project separately. +(if `--acc-anno` is set). "combined" means that it will have rows for every sample in every GSE included +in your input. So if you just gave a single GSE, then the combined file is the same as the GSE file. -# Outdated: +**For raw data**: a metadata file will be created including SRA and GSM annotation. -For each GSE input accession (ACC), `geofetch` produces (if discard-soft is not set): +**For processed data**: a metadata file will be created just for GSE and GSM annotation. User +can choose which data should he download. There are 3 downloading options for processed: samples, series and both. + +### Single PEP will contain: +- project_name.csv - all metadata for sample processed data +- project_name_subannotation.csv (*just for raw data*) - for *merged* samples +(samples for which there are multiple SRR Runs for a single SRX `Experiment`) +- project_name.yaml - project config file that stores all project information + common samples metadata + +Storing common metadata in project file is an efficient way to reduce project size and complexity of csv files. +To specify and manage common metadata (where and how it should be stored) you can use next arguments: +`--const-limit-project`, `--const-limit-discard`, `--attr-limit-truncate` + +### Saving actual data: +Actual data will be saved if `--just-metadata` argument is not set. User should specify path to the folder where this +data should be downloaded. + +---- +Additionally, for each GSE input accession (ACC), `geofetch` produces (if discard-soft is not set): - GSE_ACC####.soft a SOFT file (annotating the experiment itself) - GSM_ACC####.soft a SOFT file (annotating the samples within the experiment) - SRA_ACC####.soft a CSV file (annotating each SRA Run, retrieved from GSE->GSM->SRA) -For raw data: -a single combined metadata file (.csv) will be created for the whole input, -including SRA and GSM annotations for each sample. Here, "combined" means that it will have -rows for every sample in every GSE included in your input. So if you just gave a single GSE, -then the combined file is the same as the GSE file. If any "merged" samples exist -(samples for which there are multiple SRR Runs for a single SRX `Experiment`), the -script will also produce a merge table CSV file with the relationships between -SRX and SRR. - -The way this works: Starting from a GSE, select a subset of samples (GSM Accessions) provided, -and then obtain the SRX identifier for each of these from GEO. Now, query SRA for these SRX -accessions and get any associated SRR accessions. Finally, download all of these SRR data files. - -### The most important metadata in pep format will be stored in -- NAME_annotation_sample_processed.csv - all metadata for sample processed data -- NAME_annotation.csv - all metadata for series processed data -- NAME_annotation_series_processed.csv file - all metadata for raw data +____ +# geofetch - Geofetcher using Python + +user can use geofetch in Python without saving any files. All the geofetch projects will be automatically downloaded +as peppy Project. It helps save time and processing work. + +THe output in this case will be dictionary of projects: +```python +{'key1': (some_project), + 'key2': (second_project)} +``` + +More information you can find in tutorial files. \ No newline at end of file diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index a916a7b..1eebcb5 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -10,3 +10,4 @@ xmltodict>=0.13.0 pandas>=1.3.5 peppy>=0.35.0 rich>=12.5.1 +coloredlogs>=15.0.1 From 2f447bd66658d280c153992a7ebd39e0fd76f9a1 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 6 Sep 2022 17:45:33 -0400 Subject: [PATCH 36/61] Added additional documentation2 --- docs/how_to_get_gse.md | 4 +++- mkdocs.yml | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/how_to_get_gse.md b/docs/how_to_get_gse.md index b497f89..2e7f30a 100644 --- a/docs/how_to_get_gse.md +++ b/docs/how_to_get_gse.md @@ -1,6 +1,8 @@ # How to find and get list of files of GSEs -There is two options: +### There is two options: + 1) You can create your own file with GSE numbers that you want to download using geofetch + 2) Use [gse_finder](https://github.com/pepkit/gse_finder) - Python package that enables get list of GSEs in certain period of time and at the same time use NCBI filters \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 0d84ac9..c72d2f0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -16,6 +16,7 @@ nav: - How-to Guides: - Specifying samples to download: file-specification.md - Set SRA data download location: howto-location.md + - Download list of GSEs: how_to_get_gse.md - Reference: - Metadata output: metadata_output.md - Usage: usage.md From a9536851a5d31f2273c58c9929354611c2e4f186 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 16:19:06 -0400 Subject: [PATCH 37/61] Change naming of PEPs and function --- docs/README.md | 3 ++- geofetch/geofetch.py | 15 +++++++++++---- tests/test_geofetch.py | 8 ++++---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/docs/README.md b/docs/README.md index 2f21910..19e4f98 100644 --- a/docs/README.md +++ b/docs/README.md @@ -46,6 +46,7 @@ geofetch -i GSE95654 --processed --just-metadata ### New geofetch 0.11.0 feature: - Now geofetch is available as Python package to straight initiate [peppy](http://peppy.databio.org/) projects without downloading any soft files. + ```python from geofetch import Geofetcher @@ -53,7 +54,7 @@ from geofetch import Geofetcher geof = Geofetcher(processed=True, acc_anno=True, discard_soft=True) # get projects by providing as input GSE or file with GSEs -geof.get_project("GSE160204") +geof.get_projects("GSE160204") ``` For more details, check out the [usage](usage.md) reference, [installation instructions](install.md), or head on over to the [tutorial for raw data](raw-data-downloading.md) and [tutorial for processed data](processed-data-downloading.md) for a detailed walkthrough. diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index a1aabdc..7fc44e0 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -240,7 +240,7 @@ def __init__( self.just_object = False - def get_project( + def get_projects( self, input: str, just_metadata: bool = True, discard_soft: bool = True ) -> dict: """ @@ -273,7 +273,11 @@ def get_project( ) project_dict.update(self.fetch_all(input=acc_GSE, name=acc_GSE)) else: - project_dict.update(self.fetch_all(input=input, name="")) + try: + project_n = os.path.splitext(os.path.basename(input))[0] + except TypeError: + project_n = input + project_dict.update(self.fetch_all(input=input, name=project_n)) # raw data: else: @@ -292,8 +296,12 @@ def get_project( project_dict[acc_GSE + "_raw"] = project else: + try: + project_n = os.path.splitext(os.path.basename(input))[0] + except TypeError: + project_n = input ser_dict = self.fetch_all(input=input) - project_dict["raw"] = ser_dict + project_dict[project_n + "_raw"] = ser_dict new_pr_dict = {} for pr_key in project_dict.keys(): @@ -307,7 +315,6 @@ def fetch_all(self, input: str, name: str = None): if name is not None: self.project_name = name - print(self.project_name) else: try: self.project_name = os.path.splitext(os.path.basename(input))[0] diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index da9f788..efc052b 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -314,12 +314,12 @@ def initiate_geofetcher(self, tmpdir): yield instance def test_creating_processed_peppy(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_project("GSE190287") + p_prop = initiate_geofetcher.get_projects("GSE190287") assert isinstance(p_prop["_samples"], peppy.Project) assert isinstance(p_prop["_series"], peppy.Project) def test_number_of_samples(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_project("GSE190287") + p_prop = initiate_geofetcher.get_projects("GSE190287") assert len(p_prop["_samples"].samples) == 8 # it has 11 files but 8 samples assert len(p_prop["_series"].samples) == 2 @@ -341,11 +341,11 @@ def initiate_geofetcher(self, tmpdir): yield instance def test_creating_processed_peppy(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_project("GSE189141") + p_prop = initiate_geofetcher.get_projects("GSE189141") assert isinstance(p_prop["raw"], peppy.Project) def test_number_of_samples(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_project("GSE189141") + p_prop = initiate_geofetcher.get_projects("GSE189141") a = [d["sample_name"] for d in p_prop["raw"].samples] assert len(p_prop["raw"].samples) == 16 # it has 16 samples From 34a3a3c4fef9e6a230e03b195fe545935e47bd4e Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 16:23:49 -0400 Subject: [PATCH 38/61] black + tests --- tests/test_geofetch.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/tests/test_geofetch.py b/tests/test_geofetch.py index efc052b..ad8c923 100644 --- a/tests/test_geofetch.py +++ b/tests/test_geofetch.py @@ -314,14 +314,18 @@ def initiate_geofetcher(self, tmpdir): yield instance def test_creating_processed_peppy(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_projects("GSE190287") - assert isinstance(p_prop["_samples"], peppy.Project) - assert isinstance(p_prop["_series"], peppy.Project) + gse_numb = "GSE190287" + p_prop = initiate_geofetcher.get_projects(gse_numb) + assert isinstance(p_prop[f"{gse_numb}_samples"], peppy.Project) + assert isinstance(p_prop[f"{gse_numb}_series"], peppy.Project) def test_number_of_samples(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_projects("GSE190287") - assert len(p_prop["_samples"].samples) == 8 # it has 11 files but 8 samples - assert len(p_prop["_series"].samples) == 2 + gse_numb = "GSE190287" + p_prop = initiate_geofetcher.get_projects(gse_numb) + assert ( + len(p_prop[f"{gse_numb}_samples"].samples) == 8 + ) # it has 11 files but 8 samples + assert len(p_prop[f"{gse_numb}_series"].samples) == 2 class TestPeppyInitRaw: @@ -341,13 +345,15 @@ def initiate_geofetcher(self, tmpdir): yield instance def test_creating_processed_peppy(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_projects("GSE189141") - assert isinstance(p_prop["raw"], peppy.Project) + gse_numb = "GSE189141" + p_prop = initiate_geofetcher.get_projects(gse_numb) + assert isinstance(p_prop[f"{gse_numb}_raw"], peppy.Project) def test_number_of_samples(self, initiate_geofetcher): - p_prop = initiate_geofetcher.get_projects("GSE189141") - a = [d["sample_name"] for d in p_prop["raw"].samples] - assert len(p_prop["raw"].samples) == 16 # it has 16 samples + gse_numb = "GSE189141" + p_prop = initiate_geofetcher.get_projects(gse_numb) + a = [d["sample_name"] for d in p_prop[f"{gse_numb}_raw"].samples] + assert len(p_prop[f"{gse_numb}_raw"].samples) == 16 # it has 16 samples def test_clean_func(tmpdir): From ebe586e67a9fa1140c6692ed0bd2c9f90600f008 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 7 Sep 2022 16:43:53 -0400 Subject: [PATCH 39/61] Added Finder to geofetch + documentation + peppy version --- docs/gse_finder.md | 82 +++++++++++++ docs/how_to_get_gse.md | 8 -- geofetch/__init__.py | 1 + geofetch/const.py | 16 +++ geofetch/finder.py | 186 ++++++++++++++++++++++++++++++ mkdocs.yml | 2 +- requirements/requirements-all.txt | 2 +- 7 files changed, 287 insertions(+), 10 deletions(-) create mode 100644 docs/gse_finder.md delete mode 100644 docs/how_to_get_gse.md create mode 100644 geofetch/finder.py diff --git a/docs/gse_finder.md b/docs/gse_finder.md new file mode 100644 index 0000000..dc949ab --- /dev/null +++ b/docs/gse_finder.md @@ -0,0 +1,82 @@ +s a python package that provides functions to find and retrieve a list of GSE ([GEO](https://www.ncbi.nlm.nih.gov/geo/) accession number) by using NCBI searching tool. + + +### The main features of the geofetch Finder are: +- Find GEO accession numbers (GSE) of the project that were uploaded or updated in certain period of time. +- Use the same filter query as [GEO DataSets Advanced Search Builder](https://www.ncbi.nlm.nih.gov/gds/advanced) is using +- Save list of the GSEs to file (This file with geo can be used later in **[geofetch](http://geofetch.databio.org/en/latest/)**) +- Fast execution time +- Easy to use + + +___ +## Tutorial + +0) Initiale Finder object. +```python +from geofetch import Finder +gse_obj = Finder() + +# Optionally: provide filter string and max number of retrieve elements +gse_obj = Finder(filter="((bed) OR narrow peak) AND Homo sapiens[Organism]", retmax=10) +``` + +1) Get list of all GSE in GEO +```python + +gse_list = gse_obj.get_gse_all() + +``` + +2) Get list of GSE that were uploaded and updated last week +```python + +gse_list = gse_obj.get_gse_last_week() + +``` + +3) Get list of GSE that were uploaded and updated last 3 month +```python + +gse_list = gse_obj.get_gse_last_3_month() + +``` + +4) Get list of GSE that were uploaded and updated in las *number of days* +```python + +# project that were uploaded in last 5 days: +gse_list = gse_obj.get_gse_by_day_count(5) + +``` + +5) Get list of GSE that were uploaded in certain period of time +```python + +gse_list = gse_obj.get_gse_by_date(start_date="2015/05/05", end_date="2020/05/05") + +``` + +6) Save last searched list of items to the file +```python + +gse_obj.generate_file("path/to/the/file") + +# if you want to save different list of files you can provide it to the funciton +gse_obj.generate_file("path/to/the/file", gse_list=["123", "124"]) + +``` + +7) Compare two lists: +```python + +new_gse_list = gse_obj.find_differences(list1, list2) + +``` + +---- + +More information about gse and queries and id: +- https://www.ncbi.nlm.nih.gov/geo/info/geo_paccess.html +- https://newarkcaptain.com/how-to-retrieve-ncbi-geo-information-using-apis-part1/ +- https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Using_the_Advanced_Search_Pag \ No newline at end of file diff --git a/docs/how_to_get_gse.md b/docs/how_to_get_gse.md deleted file mode 100644 index 2e7f30a..0000000 --- a/docs/how_to_get_gse.md +++ /dev/null @@ -1,8 +0,0 @@ -# How to find and get list of files of GSEs - -### There is two options: - -1) You can create your own file with GSE numbers that you want to download using geofetch - -2) Use [gse_finder](https://github.com/pepkit/gse_finder) - Python package that enables get list -of GSEs in certain period of time and at the same time use NCBI filters \ No newline at end of file diff --git a/geofetch/__init__.py b/geofetch/__init__.py index 003168a..ef3887f 100644 --- a/geofetch/__init__.py +++ b/geofetch/__init__.py @@ -1,5 +1,6 @@ """ Package-level data """ from .geofetch import * +from .finder import * from ._version import __version__ import logmuse diff --git a/geofetch/const.py b/geofetch/const.py index 566aeac..91669c3 100644 --- a/geofetch/const.py +++ b/geofetch/const.py @@ -46,3 +46,19 @@ CONFIG_PROCESSED_TEMPLATE_NAME = "config_processed_template.yaml" CONFIG_RAW_TEMPLATE_NAME = "config_template.yaml" + +# const for Finder: +RETMAX = 10000000 # once it should be increased + +# gds = geo DataSets +ETOOLS_GEO_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds" +ETOOLS_GEO_GSE_BASE = f"{ETOOLS_GEO_BASE}&term=GSE[ETYP]" + +ETOOLS_ENDING = "&retmax={retmax}&usehistory=y" + +TODAY_DATE = "3000" + +DATE_FILTER = ( + '+AND+("{start_date}"[Publication%20Date]%20:%20"{end_date}"[Publication%20Date])' +) +THREE_MONTH_FILTER = '+AND+"published+last+3+months"[Filter]' diff --git a/geofetch/finder.py b/geofetch/finder.py new file mode 100644 index 0000000..eeb105b --- /dev/null +++ b/geofetch/finder.py @@ -0,0 +1,186 @@ +from .const import ( + RETMAX, + ETOOLS_GEO_GSE_BASE, + ETOOLS_ENDING, + TODAY_DATE, + DATE_FILTER, + THREE_MONTH_FILTER, +) +import requests +import xmltodict +import re +import os +import logmuse +import coloredlogs +from datetime import datetime +from datetime import timedelta + +__author__ = "Oleksandr Khoroshevskyi" + +_LOGGER = logmuse.init_logger("pepannot") +coloredlogs.install( + logger=_LOGGER, + datefmt="%H:%M:%S", + fmt="[%(levelname)s] [%(asctime)s] %(message)s", +) + + +class Finder: + """ + Class for finding GSE accessions in special period of time + Additionally user can add specific filters for the search. + """ + + def __init__(self, filters: str = None, retmax: int = RETMAX): + """ + :param filters: filters that have to be added to query. + Filter Patterns can be found here: + https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Using_the_Advanced_Search_Pag + :param retmax: maximum items should be retrieved + """ + self.query_customized_ending = ETOOLS_ENDING.format(retmax=retmax) + self.query_filter_str = self._create_filter_str(filters) + self.last_result = [] + + def get_gse_all(self) -> list: + """ + Get list of all gse accession available in GEO + :return: list of gse accession + """ + return self.get_gse_id_by_query(url=self._compose_url()) + + def get_gse_last_3_month(self) -> list: + """ + Get list of gse accession that were uploaded or updated in last 3 month + :return: list of gse accession + """ + return self.get_gse_id_by_query(url=self._compose_url(THREE_MONTH_FILTER)) + + def get_gse_last_week(self) -> list: + """ + Get list of gse accession that were uploaded or updated in last week + :return: list of gse accession + """ + return self.get_gse_by_day_count(7) + + def get_gse_by_day_count(self, n_days: int = 1) -> list: + """ + Get list of gse accession that were uploaded or updated in last specified number of days + :param n_days: number of days from now + :return: list of gse accession + """ + today = datetime.today() + start_date = today - timedelta(days=n_days) + start_date_str = start_date.strftime("%Y/%m/%d") + return self.get_gse_by_date(start_date_str) + + def get_gse_by_date(self, start_date: str, end_date: str = None) -> list: + """ + Search gse accessions by providing start date and end date. By default, the last date is today. + :param start_date: the oldest date of update (from YYYY/MM/DD to now) [input format: 'YYYY/MM/DD'] + :param end_date: the nearest date of update (from __ to YYYY/MM/DD) [input format: 'YYYY/MM/DD'] + :return: list of gse accessions + """ + if end_date is None: + end_date = TODAY_DATE + new_date_filter = DATE_FILTER.format(start_date=start_date, end_date=end_date) + return self.get_gse_id_by_query(url=self._compose_url(new_date_filter)) + + def get_gse_id_by_query(self, url: str) -> list: + """ + Use esearch query to find uids and then convert them to gse ids + :param url: url of the query + :return: list of gse ids + """ + uids_list = self._run_search_query(url) + gse_id_list = [self.uid_to_gse(d) for d in uids_list] + self.last_result = gse_id_list + return gse_id_list + + @staticmethod + def uid_to_gse(uid: str) -> str: + """ + UID to GES accession converter + :param uid: uid string + :return: GSE id string + """ + uid_regex = re.compile(r"[1-9]+0+([1-9]+[0-9]*)") + return "GSE" + uid_regex.match(uid).group(1) + + def read_file(self, file_path: str) -> list: + """ + Getting list of gse's from file by specifying a file path + :param file_path: path to the file + :return: list of gse's + """ + pass + + @staticmethod + def find_differences(old_list: list, new_list: list) -> list: + """ + Comparing 2 lists and searching for elements that are not in old list + :param old_list: old list of elements + :param new_list: new list of elements + :return: list of elements that are not in old list but are in new_list + """ + return list(set(new_list) - set(old_list)) + + @staticmethod + def _run_search_query(url: str) -> list: + """ + Run get request and return list of uids found + :param url: url of the query + :return: list of UIDs + """ + x = requests.get(url) + if x.status_code != 200: + _LOGGER.error(f"Request status != 200. Error. Check your request") + return [] + try: + x_result = xmltodict.parse(x.text)["eSearchResult"] + _LOGGER.info(f"Found elements: {x_result['Count']}") + _LOGGER.info(f"Additional information: {x_result['TranslationSet']}") + + return x_result["IdList"]["Id"] + except Exception: + return [] + + @staticmethod + def _create_filter_str(filters: str = None) -> str: + """ + Tuning filter for url request + :param filters: filter should look like here: https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Using_the_Advanced_Search_Pag + :return: tuned filter string + """ + if filters == "" or filters is None: + return "" + return f"+(AND+{filters})" + + def _compose_url(self, date_filter: str = None) -> str: + """ + Composing final url by adding date filter + :param date_filter: date filter that has to be used in the query + :return: string of final url + """ + if date_filter is None: + date_filter = "" + + return f"{ETOOLS_GEO_GSE_BASE}{self.query_filter_str}{date_filter}{self.query_customized_ending}" + + def generate_file(self, file_path: str, gse_list: list = None): + """ + Saving list of gse numbers to the file + :param file_path: root to the file where gse accessions have to be saved + :param gse_list: list of gse accessions + :return: NoReturn + """ + if gse_list is None: + gse_list = self.last_result + file_dir = os.path.split(file_path)[0] + if not os.path.exists(file_dir) and file_dir != "": + _LOGGER.error(f"Path: '{file_dir}' does not exist! No file will be saved") + + with open(file_path, "w") as fp: + for item in gse_list: + fp.write("%s\n" % item) + _LOGGER.info("File has been saved!") diff --git a/mkdocs.yml b/mkdocs.yml index c72d2f0..69b5a07 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,10 +13,10 @@ nav: - Tutorial for processed data: processed-data-downloading.md - Tutorial for raw data: raw-data-downloading.md - Python geofetch tutorial: python-usage.md + - Use GSE Finder: gse_finder.md - How-to Guides: - Specifying samples to download: file-specification.md - Set SRA data download location: howto-location.md - - Download list of GSEs: how_to_get_gse.md - Reference: - Metadata output: metadata_output.md - Usage: usage.md diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 1eebcb5..f3ec919 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -8,6 +8,6 @@ ubiquerg>=0.6.0 requests>=2.28.1 xmltodict>=0.13.0 pandas>=1.3.5 -peppy>=0.35.0 +peppy>=0.35.1 rich>=12.5.1 coloredlogs>=15.0.1 From de2a0f1cede1fd66e519089245cb64d5fd435c06 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 8 Sep 2022 11:25:23 -0400 Subject: [PATCH 40/61] added Finder documentation --- docs/README.md | 28 ++- docs/changelog.md | 1 + docs/gse_finder.md | 5 +- docs_jupyter/python-usage.ipynb | 317 +++++++++++++++----------------- 4 files changed, 181 insertions(+), 170 deletions(-) diff --git a/docs/README.md b/docs/README.md index 19e4f98..e0b9c74 100644 --- a/docs/README.md +++ b/docs/README.md @@ -14,6 +14,15 @@ ![](./img/pipeline.svg) +Key geofetch advantages: + +- Works with GEO and SRA metadata +- Combines samples from different projects +- Standardizes output metadata +- Filters type and size of processed files (from GEO) before downloading them +- Easy to use +- Fast execution + ## Quick example `geofetch` runs on the command line. This command will download the raw data and metadata for the given GSE number. @@ -44,8 +53,9 @@ geofetch -i GSE95654 --processed --just-metadata ![](./img/arguments_outputs.svg) -### New geofetch 0.11.0 feature: -- Now geofetch is available as Python package to straight initiate [peppy](http://peppy.databio.org/) projects without downloading any soft files. +--- +### New features available in geofetch 0.11.0 : +1) Now geofetch is available as Python package. Geofetch can initialize [peppy](http://peppy.databio.org/) projects without downloading any soft files. Example: ```python from geofetch import Geofetcher @@ -57,5 +67,19 @@ geof = Geofetcher(processed=True, acc_anno=True, discard_soft=True) geof.get_projects("GSE160204") ``` +2) Now to find GSEs and save them to file you can use `Finder` - GSE finder tool: + +```python +from geofetch import Finder + +# initiate Finder (use filters if necessary) +find_gse = Finder(filters='bed') + +# get all projects that were found: +gse_list = find_gse.get_gse_all() +``` +Find more information here: [GSE Finder](./gse_finder.md) + + For more details, check out the [usage](usage.md) reference, [installation instructions](install.md), or head on over to the [tutorial for raw data](raw-data-downloading.md) and [tutorial for processed data](processed-data-downloading.md) for a detailed walkthrough. diff --git a/docs/changelog.md b/docs/changelog.md index 56bc55a..f9c3c57 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,6 +2,7 @@ ## [0.11.0] -- 2022-09-06 - Added initialization of peppy Project without saving any files +- Added Finder (searching GSE tool) - Added progress bar - Fixed None issue in config file - Switched way of saving soft files to request library diff --git a/docs/gse_finder.md b/docs/gse_finder.md index dc949ab..14a353f 100644 --- a/docs/gse_finder.md +++ b/docs/gse_finder.md @@ -1,12 +1,11 @@ -s a python package that provides functions to find and retrieve a list of GSE ([GEO](https://www.ncbi.nlm.nih.gov/geo/) accession number) by using NCBI searching tool. +is a geofetch class that provides functions to find and retrieve a list of GSE ([GEO](https://www.ncbi.nlm.nih.gov/geo/) accession number) by using NCBI searching tool. ### The main features of the geofetch Finder are: - Find GEO accession numbers (GSE) of the project that were uploaded or updated in certain period of time. - Use the same filter query as [GEO DataSets Advanced Search Builder](https://www.ncbi.nlm.nih.gov/gds/advanced) is using - Save list of the GSEs to file (This file with geo can be used later in **[geofetch](http://geofetch.databio.org/en/latest/)**) -- Fast execution time -- Easy to use +- Easier and faster to get GSEs using NCBI filter and certain period of time. ___ diff --git a/docs_jupyter/python-usage.ipynb b/docs_jupyter/python-usage.ipynb index d295392..9b34736 100644 --- a/docs_jupyter/python-usage.ipynb +++ b/docs_jupyter/python-usage.ipynb @@ -41,7 +41,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "60b65668", "metadata": {}, "outputs": [], @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "af268078", "metadata": { "scrolled": false @@ -95,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "id": "d451856a", "metadata": { "scrolled": false @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "id": "f8edb462", "metadata": { "scrolled": true @@ -170,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 6, "id": "12d70387", "metadata": {}, "outputs": [ @@ -185,7 +185,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ca09a8e58d97432fa8313cf788e78430", + "model_id": "0f96c1a1ee8c48f4af31e0dc939fe116", "version_major": 2, "version_minor": 0 }, @@ -201,20 +201,7 @@ "output_type": "stream", "text": [ "Skipped 0 accessions. Starting now.\n", - "\u001b[38;5;200mProcessing accession 1 of 1: 'GSE95654'\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + "\u001b[38;5;200mProcessing accession 1 of 1: 'GSE95654'\u001b[0m\n", "\n", "Total number of processed SAMPLES files found is: 40\n", "Total number of processed SERIES files found is: 0\n", @@ -257,7 +244,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d6a43f81689c446b8b38a57f2ee5f38f", + "model_id": "b2c4c738728b4b43938fa6e7f29615ef", "version_major": 2, "version_minor": 0 }, @@ -307,7 +294,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d54492fe1ca547ff9b0c839a5d2f30f1", + "model_id": "02401b3d938a4a588052ba99af677f84", "version_major": 2, "version_minor": 0 }, @@ -358,12 +345,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "No files found. No data to save. File /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name/_series/_series.csv won't be created\n" + "No files found. No data to save. File /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name/GSE95654_series/GSE95654_series.csv won't be created\n" ] } ], "source": [ - "projects = geof.get_project(\"GSE95654\")" + "projects = geof.get_projects(\"GSE95654\")" ] }, { @@ -376,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "id": "95896f25", "metadata": { "scrolled": false @@ -385,10 +372,10 @@ { "data": { "text/plain": [ - "dict_keys(['_samples'])" + "dict_keys(['GSE95654_samples'])" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -415,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "id": "e8642711", "metadata": {}, "outputs": [ @@ -425,13 +412,13 @@ "40" ] }, - "execution_count": 12, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "len(projects['_samples'].samples)" + "len(projects['GSE95654_samples'].samples)" ] }, { @@ -452,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 10, "id": "ba7be762", "metadata": {}, "outputs": [ @@ -477,11 +464,11 @@ " \n", " \n", " \n", - " sample_organism_ch1\n", - " sample_channel_count\n", - " sample_contact_name\n", - " sample_library_source\n", + " sample_name\n", + " sample_library_strategy\n", " genome_build\n", + " tissue\n", + " sample_organism_ch1\n", " \n", " \n", " sample_name\n", @@ -495,209 +482,209 @@ " \n", " \n", " RRBS_on_CRC_patient_8\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_CRC_patient_8\n", + " Bisulfite-Seq\n", " hg19\n", + " primary tumor\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_adjacent_normal_colon_patient_8\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_adjacent_normal_colon_patient_8\n", + " Bisulfite-Seq\n", " hg19\n", + " adjacent normal colon\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_CRC_patient_32\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_CRC_patient_32\n", + " Bisulfite-Seq\n", " hg19\n", + " primary tumor\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_adjacent_normal_colon_patient_32\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_adjacent_normal_colon_patient_32\n", + " Bisulfite-Seq\n", " hg19\n", + " adjacent normal colon\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_CRC_patient_41\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_CRC_patient_41\n", + " Bisulfite-Seq\n", " hg19\n", + " primary tumor\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_adjacent_normal_colon_patient_41\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_adjacent_normal_colon_patient_41\n", + " Bisulfite-Seq\n", " hg19\n", + " adjacent normal colon\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_CRC_patient_42\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_CRC_patient_42\n", + " Bisulfite-Seq\n", " hg19\n", + " primary tumor\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_adjacent_normal_colon_patient_42\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_adjacent_normal_colon_patient_42\n", + " Bisulfite-Seq\n", " hg19\n", + " adjacent normal colon\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_ACF_patient_173\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_ACF_patient_173\n", + " Bisulfite-Seq\n", " hg19\n", + " aberrant crypt foci\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_ACF_patient_515\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_ACF_patient_515\n", + " Bisulfite-Seq\n", " hg19\n", + " aberrant crypt foci\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_normal_crypts_patient_139\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_normal_crypts_patient_139\n", + " Bisulfite-Seq\n", " hg19\n", + " normal colonic crypt\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_ACF_patient_143\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_ACF_patient_143\n", + " Bisulfite-Seq\n", " hg19\n", + " aberrant crypt foci\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_normal_crypts_patient_143\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_normal_crypts_patient_143\n", + " Bisulfite-Seq\n", " hg19\n", + " normal colonic crypt\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_normal_crypts_patient_165\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_normal_crypts_patient_165\n", + " Bisulfite-Seq\n", " hg19\n", + " normal colonic crypt\n", + " Homo sapiens\n", " \n", " \n", " RRBS_on_ACF_patient_165\n", - " Homo sapiens\n", - " 1\n", - " Xiwei,,Wu\n", - " genomic\n", + " RRBS_on_ACF_patient_165\n", + " Bisulfite-Seq\n", " hg19\n", + " aberrant crypt foci\n", + " Homo sapiens\n", " \n", " \n", "\n", "" ], "text/plain": [ - " sample_organism_ch1 \\\n", - "sample_name \n", - "RRBS_on_CRC_patient_8 Homo sapiens \n", - "RRBS_on_adjacent_normal_colon_patient_8 Homo sapiens \n", - "RRBS_on_CRC_patient_32 Homo sapiens \n", - "RRBS_on_adjacent_normal_colon_patient_32 Homo sapiens \n", - "RRBS_on_CRC_patient_41 Homo sapiens \n", - "RRBS_on_adjacent_normal_colon_patient_41 Homo sapiens \n", - "RRBS_on_CRC_patient_42 Homo sapiens \n", - "RRBS_on_adjacent_normal_colon_patient_42 Homo sapiens \n", - "RRBS_on_ACF_patient_173 Homo sapiens \n", - "RRBS_on_ACF_patient_515 Homo sapiens \n", - "RRBS_on_normal_crypts_patient_139 Homo sapiens \n", - "RRBS_on_ACF_patient_143 Homo sapiens \n", - "RRBS_on_normal_crypts_patient_143 Homo sapiens \n", - "RRBS_on_normal_crypts_patient_165 Homo sapiens \n", - "RRBS_on_ACF_patient_165 Homo sapiens \n", + " sample_name \\\n", + "sample_name \n", + "RRBS_on_CRC_patient_8 RRBS_on_CRC_patient_8 \n", + "RRBS_on_adjacent_normal_colon_patient_8 RRBS_on_adjacent_normal_colon_patient_8 \n", + "RRBS_on_CRC_patient_32 RRBS_on_CRC_patient_32 \n", + "RRBS_on_adjacent_normal_colon_patient_32 RRBS_on_adjacent_normal_colon_patient_32 \n", + "RRBS_on_CRC_patient_41 RRBS_on_CRC_patient_41 \n", + "RRBS_on_adjacent_normal_colon_patient_41 RRBS_on_adjacent_normal_colon_patient_41 \n", + "RRBS_on_CRC_patient_42 RRBS_on_CRC_patient_42 \n", + "RRBS_on_adjacent_normal_colon_patient_42 RRBS_on_adjacent_normal_colon_patient_42 \n", + "RRBS_on_ACF_patient_173 RRBS_on_ACF_patient_173 \n", + "RRBS_on_ACF_patient_515 RRBS_on_ACF_patient_515 \n", + "RRBS_on_normal_crypts_patient_139 RRBS_on_normal_crypts_patient_139 \n", + "RRBS_on_ACF_patient_143 RRBS_on_ACF_patient_143 \n", + "RRBS_on_normal_crypts_patient_143 RRBS_on_normal_crypts_patient_143 \n", + "RRBS_on_normal_crypts_patient_165 RRBS_on_normal_crypts_patient_165 \n", + "RRBS_on_ACF_patient_165 RRBS_on_ACF_patient_165 \n", "\n", - " sample_channel_count \\\n", - "sample_name \n", - "RRBS_on_CRC_patient_8 1 \n", - "RRBS_on_adjacent_normal_colon_patient_8 1 \n", - "RRBS_on_CRC_patient_32 1 \n", - "RRBS_on_adjacent_normal_colon_patient_32 1 \n", - "RRBS_on_CRC_patient_41 1 \n", - "RRBS_on_adjacent_normal_colon_patient_41 1 \n", - "RRBS_on_CRC_patient_42 1 \n", - "RRBS_on_adjacent_normal_colon_patient_42 1 \n", - "RRBS_on_ACF_patient_173 1 \n", - "RRBS_on_ACF_patient_515 1 \n", - "RRBS_on_normal_crypts_patient_139 1 \n", - "RRBS_on_ACF_patient_143 1 \n", - "RRBS_on_normal_crypts_patient_143 1 \n", - "RRBS_on_normal_crypts_patient_165 1 \n", - "RRBS_on_ACF_patient_165 1 \n", + " sample_library_strategy genome_build \\\n", + "sample_name \n", + "RRBS_on_CRC_patient_8 Bisulfite-Seq hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_8 Bisulfite-Seq hg19 \n", + "RRBS_on_CRC_patient_32 Bisulfite-Seq hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_32 Bisulfite-Seq hg19 \n", + "RRBS_on_CRC_patient_41 Bisulfite-Seq hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_41 Bisulfite-Seq hg19 \n", + "RRBS_on_CRC_patient_42 Bisulfite-Seq hg19 \n", + "RRBS_on_adjacent_normal_colon_patient_42 Bisulfite-Seq hg19 \n", + "RRBS_on_ACF_patient_173 Bisulfite-Seq hg19 \n", + "RRBS_on_ACF_patient_515 Bisulfite-Seq hg19 \n", + "RRBS_on_normal_crypts_patient_139 Bisulfite-Seq hg19 \n", + "RRBS_on_ACF_patient_143 Bisulfite-Seq hg19 \n", + "RRBS_on_normal_crypts_patient_143 Bisulfite-Seq hg19 \n", + "RRBS_on_normal_crypts_patient_165 Bisulfite-Seq hg19 \n", + "RRBS_on_ACF_patient_165 Bisulfite-Seq hg19 \n", "\n", - " sample_contact_name \\\n", - "sample_name \n", - "RRBS_on_CRC_patient_8 Xiwei,,Wu \n", - "RRBS_on_adjacent_normal_colon_patient_8 Xiwei,,Wu \n", - "RRBS_on_CRC_patient_32 Xiwei,,Wu \n", - "RRBS_on_adjacent_normal_colon_patient_32 Xiwei,,Wu \n", - "RRBS_on_CRC_patient_41 Xiwei,,Wu \n", - "RRBS_on_adjacent_normal_colon_patient_41 Xiwei,,Wu \n", - "RRBS_on_CRC_patient_42 Xiwei,,Wu \n", - "RRBS_on_adjacent_normal_colon_patient_42 Xiwei,,Wu \n", - "RRBS_on_ACF_patient_173 Xiwei,,Wu \n", - "RRBS_on_ACF_patient_515 Xiwei,,Wu \n", - "RRBS_on_normal_crypts_patient_139 Xiwei,,Wu \n", - "RRBS_on_ACF_patient_143 Xiwei,,Wu \n", - "RRBS_on_normal_crypts_patient_143 Xiwei,,Wu \n", - "RRBS_on_normal_crypts_patient_165 Xiwei,,Wu \n", - "RRBS_on_ACF_patient_165 Xiwei,,Wu \n", + " tissue \\\n", + "sample_name \n", + "RRBS_on_CRC_patient_8 primary tumor \n", + "RRBS_on_adjacent_normal_colon_patient_8 adjacent normal colon \n", + "RRBS_on_CRC_patient_32 primary tumor \n", + "RRBS_on_adjacent_normal_colon_patient_32 adjacent normal colon \n", + "RRBS_on_CRC_patient_41 primary tumor \n", + "RRBS_on_adjacent_normal_colon_patient_41 adjacent normal colon \n", + "RRBS_on_CRC_patient_42 primary tumor \n", + "RRBS_on_adjacent_normal_colon_patient_42 adjacent normal colon \n", + "RRBS_on_ACF_patient_173 aberrant crypt foci \n", + "RRBS_on_ACF_patient_515 aberrant crypt foci \n", + "RRBS_on_normal_crypts_patient_139 normal colonic crypt \n", + "RRBS_on_ACF_patient_143 aberrant crypt foci \n", + "RRBS_on_normal_crypts_patient_143 normal colonic crypt \n", + "RRBS_on_normal_crypts_patient_165 normal colonic crypt \n", + "RRBS_on_ACF_patient_165 aberrant crypt foci \n", "\n", - " sample_library_source genome_build \n", - "sample_name \n", - "RRBS_on_CRC_patient_8 genomic hg19 \n", - "RRBS_on_adjacent_normal_colon_patient_8 genomic hg19 \n", - "RRBS_on_CRC_patient_32 genomic hg19 \n", - "RRBS_on_adjacent_normal_colon_patient_32 genomic hg19 \n", - "RRBS_on_CRC_patient_41 genomic hg19 \n", - "RRBS_on_adjacent_normal_colon_patient_41 genomic hg19 \n", - "RRBS_on_CRC_patient_42 genomic hg19 \n", - "RRBS_on_adjacent_normal_colon_patient_42 genomic hg19 \n", - "RRBS_on_ACF_patient_173 genomic hg19 \n", - "RRBS_on_ACF_patient_515 genomic hg19 \n", - "RRBS_on_normal_crypts_patient_139 genomic hg19 \n", - "RRBS_on_ACF_patient_143 genomic hg19 \n", - "RRBS_on_normal_crypts_patient_143 genomic hg19 \n", - "RRBS_on_normal_crypts_patient_165 genomic hg19 \n", - "RRBS_on_ACF_patient_165 genomic hg19 " + " sample_organism_ch1 \n", + "sample_name \n", + "RRBS_on_CRC_patient_8 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_8 Homo sapiens \n", + "RRBS_on_CRC_patient_32 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_32 Homo sapiens \n", + "RRBS_on_CRC_patient_41 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_41 Homo sapiens \n", + "RRBS_on_CRC_patient_42 Homo sapiens \n", + "RRBS_on_adjacent_normal_colon_patient_42 Homo sapiens \n", + "RRBS_on_ACF_patient_173 Homo sapiens \n", + "RRBS_on_ACF_patient_515 Homo sapiens \n", + "RRBS_on_normal_crypts_patient_139 Homo sapiens \n", + "RRBS_on_ACF_patient_143 Homo sapiens \n", + "RRBS_on_normal_crypts_patient_143 Homo sapiens \n", + "RRBS_on_normal_crypts_patient_165 Homo sapiens \n", + "RRBS_on_ACF_patient_165 Homo sapiens " ] }, - "execution_count": 29, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "projects['_samples'].sample_table.iloc[:15 , :5]" + "projects['GSE95654_samples'].sample_table.iloc[:15 , :5]" ] } ], From f8fc5e438885f4cd08e14b0c0d4a8d9d8fe3d551 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 8 Sep 2022 12:32:24 -0400 Subject: [PATCH 41/61] added package Readme --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index ed5c01e..5bf1d6d 100644 --- a/README.md +++ b/README.md @@ -9,3 +9,14 @@ `geofetch` is a command-line tool that downloads sequencing data and metadata from GEO and SRA and creates [standard PEPs](http://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/) and documentation is hosted at [geofetch.databio.org](http://geofetch.databio.org) (source in the [/docs](/docs) folder). You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command. + +Key geofetch features: + +- Works with GEO and SRA metadata +- Combines samples from different projects +- Standardizes output metadata +- Filters type and size of processed files (from GEO) before downloading them +- Easy to use +- Fast execution time +- Available GSE search tool +- Available both as command-line tool and Python execution Package From 567ca863237b6c77f244bd5a02cbe0bdb39851e5 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 8 Sep 2022 12:38:27 -0400 Subject: [PATCH 42/61] added missing .md file --- README.md | 2 +- docs_jupyter/build/python-usage.md | 360 +++++++++++++++++++++++++++++ 2 files changed, 361 insertions(+), 1 deletion(-) create mode 100644 docs_jupyter/build/python-usage.md diff --git a/README.md b/README.md index 5bf1d6d..2388aca 100644 --- a/README.md +++ b/README.md @@ -18,5 +18,5 @@ Key geofetch features: - Filters type and size of processed files (from GEO) before downloading them - Easy to use - Fast execution time -- Available GSE search tool +- Provides GSE search tool - Available both as command-line tool and Python execution Package diff --git a/docs_jupyter/build/python-usage.md b/docs_jupyter/build/python-usage.md new file mode 100644 index 0000000..13e98c6 --- /dev/null +++ b/docs_jupyter/build/python-usage.md @@ -0,0 +1,360 @@ +jupyter:True +# Tutorial of usage geofetch as python package + +♪♫*•♪♪♫*•♪♪♫*•♪♪♫*•♪♪♫* + +Geofetch provides python fuctions to fetch metadata and metadata from GEO and SRA by using python language. `get_project` function returns dictionary of peppy projects that were found using filters and input you specified. + peppy is a Python package that provides an API for handling standardized project and sample metadata. + +More information you can get here: + +http://peppy.databio.org/en/latest/ + +http://pep.databio.org/en/2.0.0/ + +### First let's import geofetch + + +```python +from geofetch import Geofetcher +``` + +### Initiate Geofetch object by specifing parameters that you want to use for downloading metadata/data + +1) If you won't specify any parameters, defaul parameters will be used + + +```python +geof = Geofetcher() +``` + +```.output +Metadata folder: /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name + +``` + +2) To download processed data with samples and series specify this two arguments: + + +```python +geof = Geofetcher(processed=True, data_source="all") +``` + +```.output +Metadata folder: /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name + +``` + +3) To tune project parameter, where metadata should be stored use next parameters: + + +```python +geof = Geofetcher(processed=True, data_source="all", const_limit_project = 20, const_limit_discard = 500, attr_limit_truncate = 10000 ) +``` + +```.output +Metadata folder: /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name + +``` + +4) To add more filter of other options see documentation + +## Run Geofetch + +### By default: +1) No actual data will be downloaded (just_metadata=True) + +2) No soft files will be saved on the disc (discard_soft=True) + + +```python +projects = geof.get_projects("GSE95654") +``` + +```.output +Trying GSE95654 (not a file) as accession... +Trying GSE95654 (not a file) as accession... + +``` + + + Output() + + +```.output +Skipped 0 accessions. Starting now. +Processing accession 1 of 1: 'GSE95654' + +Total number of processed SAMPLES files found is: 40 +Total number of processed SERIES files found is: 0 +Expanding metadata list... +Expanding metadata list... + +``` + + +

+
+
+
+
+
+
+ + + +```.output +Finished processing 1 accession(s) +Cleaning soft files ... +Unifying and saving of metadata... + +``` + + + Output() + + + +

+
+
+
+
+
+
+ + + + +
+
+ + + + + Output() + + + +

+
+
+
+
+
+
+ + + + +
+
+ + + +```.output +No files found. No data to save. File /home/bnt4me/Virginia/repos/geof2/geofetch/docs_jupyter/project_name/GSE95654_series/GSE95654_series.csv won't be created + +``` + +Check if projects were created by checking dict keys: + + +```python +projects.keys() +``` + + + + + dict_keys(['GSE95654_samples']) + + + +project for smaples was created! Now let's look into it. + +\* the values of the dictionary are peppy projects. More information about peppy Project you can find in the documentation: http://peppy.databio.org/en/latest/ + + +```python +len(projects['GSE95654_samples'].samples) +``` + + + + + 40 + + + +We got 40 samples from GSE95654 project. If you want to check if it's correct information go into: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE95654 + +Now let's see actuall data. first 15 project and 5 clolumns: + + +```python +projects['GSE95654_samples'].sample_table.iloc[:15 , :5] +``` + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
sample_namesample_library_strategygenome_buildtissuesample_organism_ch1
sample_name
RRBS_on_CRC_patient_8RRBS_on_CRC_patient_8Bisulfite-Seqhg19primary tumorHomo sapiens
RRBS_on_adjacent_normal_colon_patient_8RRBS_on_adjacent_normal_colon_patient_8Bisulfite-Seqhg19adjacent normal colonHomo sapiens
RRBS_on_CRC_patient_32RRBS_on_CRC_patient_32Bisulfite-Seqhg19primary tumorHomo sapiens
RRBS_on_adjacent_normal_colon_patient_32RRBS_on_adjacent_normal_colon_patient_32Bisulfite-Seqhg19adjacent normal colonHomo sapiens
RRBS_on_CRC_patient_41RRBS_on_CRC_patient_41Bisulfite-Seqhg19primary tumorHomo sapiens
RRBS_on_adjacent_normal_colon_patient_41RRBS_on_adjacent_normal_colon_patient_41Bisulfite-Seqhg19adjacent normal colonHomo sapiens
RRBS_on_CRC_patient_42RRBS_on_CRC_patient_42Bisulfite-Seqhg19primary tumorHomo sapiens
RRBS_on_adjacent_normal_colon_patient_42RRBS_on_adjacent_normal_colon_patient_42Bisulfite-Seqhg19adjacent normal colonHomo sapiens
RRBS_on_ACF_patient_173RRBS_on_ACF_patient_173Bisulfite-Seqhg19aberrant crypt fociHomo sapiens
RRBS_on_ACF_patient_515RRBS_on_ACF_patient_515Bisulfite-Seqhg19aberrant crypt fociHomo sapiens
RRBS_on_normal_crypts_patient_139RRBS_on_normal_crypts_patient_139Bisulfite-Seqhg19normal colonic cryptHomo sapiens
RRBS_on_ACF_patient_143RRBS_on_ACF_patient_143Bisulfite-Seqhg19aberrant crypt fociHomo sapiens
RRBS_on_normal_crypts_patient_143RRBS_on_normal_crypts_patient_143Bisulfite-Seqhg19normal colonic cryptHomo sapiens
RRBS_on_normal_crypts_patient_165RRBS_on_normal_crypts_patient_165Bisulfite-Seqhg19normal colonic cryptHomo sapiens
RRBS_on_ACF_patient_165RRBS_on_ACF_patient_165Bisulfite-Seqhg19aberrant crypt fociHomo sapiens
+
+ + From 79dbe2d0aea709070b43cd67e6b7b9c3c369b676 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 29 Sep 2022 13:29:49 -0400 Subject: [PATCH 43/61] fixed #88 --- geofetch/geofetch.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 7fc44e0..fb4b067 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -446,17 +446,16 @@ def fetch_all(self, input: str, name: str = None): pass else: self._LOGGER.info("Parsing SRA file to download SRR records") - gsm_multi_table = self._process_sra_meta( + gsm_multi_table, gsm_metadata, runs = self._process_sra_meta( srp_list_result, gsm_enter_dict, gsm_metadata ) # download raw data: if not self.just_metadata: - for file_key in gsm_multi_table.keys(): - for run in gsm_multi_table[file_key]: - # download raw data - self._LOGGER.info(f"Getting SRR: {run[2]} in ({acc_GSE})") - self._download_raw_data(run[2]) + for run in runs: + # download raw data + self._LOGGER.info(f"Getting SRR: {run} in ({acc_GSE})") + self._download_raw_data(run) else: self._LOGGER.info(f"Dry run, no data will be downloaded") @@ -514,6 +513,7 @@ def _process_sra_meta( :return: srp multitable """ gsm_multi_table = {} + runs = [] for line in srp_list_result: # Only download if it's in the include list: @@ -582,8 +582,9 @@ def _process_sra_meta( else: # The first SRR for this SRX is added to GSM metadata gsm_metadata[experiment]["SRR"] = run_name + runs.append(run_name) - return gsm_multi_table + return gsm_multi_table, gsm_metadata, runs def _download_raw_data(self, run_name): bam_file = ( From 10f3386babec27e8a30f6e396df3d1832654564f Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 3 Oct 2022 16:53:45 -0400 Subject: [PATCH 44/61] Added usage to the cli. Fixed #89 --- geofetch/geofetch.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index fb4b067..2e4cd8e 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -2176,7 +2176,18 @@ def _parse_cmdl(cmdl): parser """ parser = argparse.ArgumentParser( - description="Automatic GEO and SRA data downloader" + description="Automatic GEO and SRA data downloader", + usage="""geofetch [] + +The example how to use geofetch (to download GSE573030 just metadata): + geofetch -i GSE67303 -m `pwd` --just-metadata + +To download all processed data of GSE57303: + geofetch -i GSE67303 --processed --geo-folder `pwd` -m `pwd` + +* where `pwd` is a current directory + +""" ) processed_group = parser.add_argument_group("processed") From 375ae3d5c79f101d329d693dbb8366411d73b852 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 4 Oct 2022 12:11:50 -0400 Subject: [PATCH 45/61] Fixed #90 --- geofetch/config_processed_template.yaml | 2 ++ geofetch/config_template.yaml | 6 ++---- geofetch/geofetch.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/geofetch/config_processed_template.yaml b/geofetch/config_processed_template.yaml index 66690c9..9f8eb38 100644 --- a/geofetch/config_processed_template.yaml +++ b/geofetch/config_processed_template.yaml @@ -7,7 +7,9 @@ sample_table: {sample_table} sample_modifiers: append: output_file_path: FILES + # Project metadata: {additional_columns} + # End of project metadata {pipeline_samples} derive: attributes: [output_file_path] diff --git a/geofetch/config_template.yaml b/geofetch/config_template.yaml index a47348a..59fcaa2 100644 --- a/geofetch/config_template.yaml +++ b/geofetch/config_template.yaml @@ -5,13 +5,11 @@ pep_version: 2.1.0 sample_table: {annotation} {subannotation} -looper: - output_dir: {project_name} - pipeline_interfaces: {pipeline_interfaces} - sample_modifiers: append: + # Project metadata: {additional_columns} + # End of project metadata SRR_files: SRA {pipeline_samples} derive: diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 2e4cd8e..b9445a9 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -2187,7 +2187,7 @@ def _parse_cmdl(cmdl): * where `pwd` is a current directory -""" +""", ) processed_group = parser.add_argument_group("processed") From b826b0df6f7c3ef0f6f0182ac5ce4b1325124cca Mon Sep 17 00:00:00 2001 From: Nathan Sheffield Date: Fri, 21 Oct 2022 14:38:33 -0400 Subject: [PATCH 46/61] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 2388aca..978a127 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,7 @@ [![pypi-badge](https://img.shields.io/pypi/v/geofetch)](https://pypi.org/project/geofetch) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -`geofetch` is a command-line tool that downloads sequencing data and metadata from GEO and SRA and creates [standard PEPs](http://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/) and documentation is hosted at [geofetch.databio.org](http://geofetch.databio.org) (source in the [/docs](/docs) folder). - -You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command. +`geofetch` is a command-line tool that downloads sequencing data and metadata from GEO and SRA and creates [standard PEPs](http://pep.databio.org/). `geofetch` is hosted at [pypi](https://pypi.org/project/geofetch/). You can convert the result of geofetch into unmapped `bam` or `fastq` files with the included `sraconvert` command. Key geofetch features: @@ -18,5 +16,7 @@ Key geofetch features: - Filters type and size of processed files (from GEO) before downloading them - Easy to use - Fast execution time -- Provides GSE search tool -- Available both as command-line tool and Python execution Package +- Can search GEO to find relevant data +- Can be used either as a command-line tool or from within Python using an API + +For more information, see [complete documentation at geofetch.databio.org](http://geofetch.databio.org) (source in the [/docs](/docs) folder). From a852062dcca8fa000fc8bb4b23365fc035de8bba Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 21 Oct 2022 15:34:09 -0400 Subject: [PATCH 47/61] fixed issues in PR comments --- docs/README.md | 2 +- docs/changelog.md | 2 ++ geofetch/geofetch.py | 13 +++++-------- setup.py | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/docs/README.md b/docs/README.md index e0b9c74..b2e711b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -55,7 +55,7 @@ geofetch -i GSE95654 --processed --just-metadata --- ### New features available in geofetch 0.11.0 : -1) Now geofetch is available as Python package. Geofetch can initialize [peppy](http://peppy.databio.org/) projects without downloading any soft files. Example: +1) Now geofetch is available as Python API package. Geofetch can initialize [peppy](http://peppy.databio.org/) projects without downloading any soft files. Example: ```python from geofetch import Geofetcher diff --git a/docs/changelog.md b/docs/changelog.md index f9c3c57..e5d3406 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,6 +7,8 @@ - Fixed None issue in config file - Switched way of saving soft files to request library - Fixed saving raw peps bug +- Fixed config errors +- Improved documentation - Refactored code ## [0.10.1] -- 2022-08-04 diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index b9445a9..03e63ab 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -90,7 +90,7 @@ class Geofetcher: :param fq_folder: Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding fastq files already exist. [Default: $SRAFQ:] [Works with raw data] :param use_key_subset: Use just the keys defined in this module when writing out metadata. [Works with raw data] - :param sra_folder: [Doesn't work ]Optional: Specify a location to store sra files + :param sra_folder: Optional: Specify a location to store sra files [Default: $SRARAW:" + safe_echo("SRARAW") + ] :param bam_conversion: Optional: set True to convert bam files [Works with raw data] :param picard_path: Specify a path to the picard jar, if you want to convert fastq to bam @@ -403,11 +403,8 @@ def fetch_all(self, input: str, name: str = None): ( meta_processed_samples, meta_processed_series, - ) = self.fetch_processed_one( - gse_file_content=file_gse_content, - gsm_file_content=file_gsm_content, - gsm_filter_list=gsm_enter_dict, - ) + ) = self.fetchone_processed(gse_file_content=file_gse_content, gsm_file_content=file_gsm_content, + gsm_filter_list=gsm_enter_dict) # download processed files: if not self.just_metadata: @@ -630,7 +627,7 @@ def _download_raw_data(self, run_name): f"SRA file doesn't exist, please download it first: {err}" ) - def fetch_processed_one( + def fetchone_processed( self, gse_file_content: list, gsm_file_content: list, @@ -670,7 +667,7 @@ def _generate_processed_meta( ) -> dict: """ Generate and save PEPs for processed accessions. GEO has data in GSE and GSM, - conditions are used to decide which PEPs have to be saved. + conditions are used to decide which PEPs will be saved. :param name: name of the folder/file where PEP will be saved :param meta_processed_samples: :param meta_processed_series: diff --git a/setup.py b/setup.py index e92f5b0..79cca69 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,7 @@ def read_reqs(reqs_name): ], keywords="project, bioinformatics, sequencing, ngs, workflow, GUI", url="https://github.com/pepkit/{}/".format(PACKAGE), - author="Nathan Sheffield, Vince Reuter, Oleksandr Khoroshevskyi", + author="Oleksandr Khoroshevskyi, Nathan Sheffield, Vince Reuter, Nathan LeRoy", license="BSD2", entry_points={ "console_scripts": [ From 0e658335ae20f454edd482f33a740939d41e8e05 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Fri, 21 Oct 2022 15:35:08 -0400 Subject: [PATCH 48/61] lint --- geofetch/geofetch.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 03e63ab..992efd1 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -403,8 +403,11 @@ def fetch_all(self, input: str, name: str = None): ( meta_processed_samples, meta_processed_series, - ) = self.fetchone_processed(gse_file_content=file_gse_content, gsm_file_content=file_gsm_content, - gsm_filter_list=gsm_enter_dict) + ) = self.fetchone_processed( + gse_file_content=file_gse_content, + gsm_file_content=file_gsm_content, + gsm_filter_list=gsm_enter_dict, + ) # download processed files: if not self.just_metadata: From 5ff204fa870147271ed4291f6a577ef07b28250d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 24 Oct 2022 12:04:45 -0400 Subject: [PATCH 49/61] Fixed #92 --- geofetch/geofetch.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 992efd1..4a51ca1 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1143,7 +1143,8 @@ def _create_config_processed( with open(config_template, "r") as template_file: template = template_file.read() meta_list_str = [ - f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta + f'{list(i.keys())[0]}: "{self._sanitize_config_string(list(i.values())[0])}"' + for i in proj_meta ] modifiers_str = "\n ".join(d for d in meta_list_str) template_values = { @@ -1168,7 +1169,8 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): :return: generated, complete config file content """ meta_list_str = [ - f"{list(i.keys())[0]}: {list(i.values())[0]}" for i in proj_meta + f"{list(i.keys())[0]}: {self._sanitize_name((i.values())[0])}" + for i in proj_meta ] modifiers_str = "\n ".join(d for d in meta_list_str) # Write project config file @@ -1210,10 +1212,23 @@ def _check_sample_name_standard(self, metadata_dict: dict) -> dict: metadata_dict = self._standardize_colnames(metadata_dict) return metadata_dict + @staticmethod + def _sanitize_config_string(text: str) -> str: + """ + Function that sanitizes text in config file. + :param text: Any string that have to be sanitized + :return: sanitized strings + """ + new_str = text + punctuation1 = r""""'`""" + for odd_char in list(punctuation1): + new_str = new_str.replace(odd_char, "_") + return new_str + @staticmethod def _sanitize_name(name_str: str) -> str: """ - Function that sanitizing strings. (Replace all odd characters) + Function that sanitizes strings. (Replace all odd characters) :param str name_str: Any string value that has to be sanitized. :return: sanitized strings """ From bb9892e954669c1a19171be148f5f99206eae18b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 24 Oct 2022 12:21:04 -0400 Subject: [PATCH 50/61] Fix of #92 (2) --- geofetch/geofetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 4a51ca1..0dd6446 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1305,7 +1305,7 @@ def _separate_common_meta( new_str = nb_sample[1][this_key] if isinstance(nb_sample[1][this_key], str): new_str = nb_sample[1][this_key].replace('"', "") - new_str = re.sub("[^A-Za-z0-9]+", " ", new_str) + # new_str = re.sub("[^A-Za-z0-9]+", " ", new_str) new_meta_project.append({this_key: new_str}) first_key = False del meta_list[nb_sample[0]][this_key] From ac03f3f01fcaad7a9d7f5a47404e380cb22e6a8a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 24 Oct 2022 12:51:18 -0400 Subject: [PATCH 51/61] Fixed errors --- geofetch/geofetch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 0dd6446..55279c5 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1169,7 +1169,7 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): :return: generated, complete config file content """ meta_list_str = [ - f"{list(i.keys())[0]}: {self._sanitize_name((i.values())[0])}" + f'{list(i.keys())[0]}: "{self._sanitize_config_string(list(i.values())[0])}"' for i in proj_meta ] modifiers_str = "\n ".join(d for d in meta_list_str) From cb5c2d5ff456674a4793f9821b0be5d2b1f2ea0b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 24 Oct 2022 16:18:48 -0400 Subject: [PATCH 52/61] Fixed #95 (3) --- geofetch/geofetch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 55279c5..cdf9fd5 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1220,9 +1220,8 @@ def _sanitize_config_string(text: str) -> str: :return: sanitized strings """ new_str = text - punctuation1 = r""""'`""" - for odd_char in list(punctuation1): - new_str = new_str.replace(odd_char, "_") + new_str = new_str.replace('"', f'\\"') + new_str = new_str.replace("'", f"''") return new_str @staticmethod From 90e8bfec322ce7b61178c66db8b96dbdce8affc7 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Mon, 24 Oct 2022 21:20:27 -0400 Subject: [PATCH 53/61] refactoring cli --- geofetch/cli.py | 280 ++++++++++++++++++++++++++++++++++++++++ geofetch/geofetch.py | 298 +------------------------------------------ 2 files changed, 284 insertions(+), 294 deletions(-) create mode 100644 geofetch/cli.py diff --git a/geofetch/cli.py b/geofetch/cli.py new file mode 100644 index 0000000..53ed9f4 --- /dev/null +++ b/geofetch/cli.py @@ -0,0 +1,280 @@ +import argparse +import os +import logmuse +from ._version import __version__ + + +def _safe_echo(var): + """Returns an environment variable if it exists, or an empty string if not""" + return os.getenv(var, "") + + +def _parse_cmdl(cmdl): + """ + parser + """ + parser = argparse.ArgumentParser( + description="Automatic GEO and SRA data downloader", + usage="""geofetch [] + +The example how to use geofetch (to download GSE573030 just metadata): + geofetch -i GSE67303 -m `pwd` --just-metadata + +To download all processed data of GSE57303: + geofetch -i GSE67303 --processed --geo-folder `pwd` -m `pwd` + +* where `pwd` is a current directory + +""", + ) + + processed_group = parser.add_argument_group("processed") + raw_group = parser.add_argument_group("raw") + + parser.add_argument( + "-V", "--version", action="version", version=f"%(prog)s {__version__}" + ) + + # Required + parser.add_argument( + "-i", + "--input", + dest="input", + required=True, + help="required: a GEO (GSE) accession, or a file with a list of GSE numbers", + ) + + # Optional + parser.add_argument( + "-n", "--name", help="Specify a project name. Defaults to GSE number" + ) + + parser.add_argument( + "-m", + "--metadata-root", + dest="metadata_root", + default=_safe_echo("SRAMETA"), + help="Specify a parent folder location to store metadata. " + "The project name will be added as a subfolder " + "[Default: $SRAMETA:" + _safe_echo("SRAMETA") + "]", + ) + + parser.add_argument( + "-u", + "--metadata-folder", + help="Specify an absolute folder location to store metadata. " + "No subfolder will be added. Overrides value of --metadata-root " + "[Default: Not used (--metadata-root is used by default)]", + ) + + parser.add_argument( + "--just-metadata", + action="store_true", + help="If set, don't actually run downloads, just create metadata", + ) + + parser.add_argument( + "-r", + "--refresh-metadata", + action="store_true", + help="If set, re-download metadata even if it exists.", + ) + + parser.add_argument( + "--config-template", default=None, help="Project config yaml file template." + ) + + # Optional + parser.add_argument( + "--pipeline-samples", + default=None, + help="Optional: Specify one or more filepaths to SAMPLES pipeline interface yaml files. " + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", + ) + + # Optional + parser.add_argument( + "--pipeline-project", + default=None, + help="Optional: Specify one or more filepaths to PROJECT pipeline interface yaml files. " + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", + ) + # Optional + parser.add_argument( + "--disable-progressbar", + action="store_true", + help="Optional: Disable progressbar", + ) + + # Optional + parser.add_argument( + "-k", + "--skip", + default=0, + type=int, + help="Skip some accessions. [Default: no skip].", + ) + + parser.add_argument( + "--acc-anno", + action="store_true", + help="Optional: Produce annotation sheets for each accession." + " Project combined PEP for the whole project won't be produced.", + ) + + parser.add_argument( + "--discard-soft", + action="store_true", + help="Optional: After creation of PEP files, all soft and additional files will be deleted", + ) + + parser.add_argument( + "--const-limit-project", + type=int, + default=50, + help="Optional: Limit of the number of the constant sample characters " + "that should not be in project yaml. [Default: 50]", + ) + + parser.add_argument( + "--const-limit-discard", + type=int, + default=250, + help="Optional: Limit of the number of the constant sample characters " + "that should not be discarded [Default: 250]", + ) + + parser.add_argument( + "--attr-limit-truncate", + type=int, + default=500, + help="Optional: Limit of the number of sample characters." + "Any attribute with more than X characters will truncate to the first X," + " where X is a number of characters [Default: 500]", + ) + + parser.add_argument( + "--add-dotfile", + action="store_true", + help="Optional: Add .pep.yaml file that points .yaml PEP file", + ) + + processed_group.add_argument( + "-p", + "--processed", + default=False, + action="store_true", + help="Download processed data [Default: download raw data].", + ) + + processed_group.add_argument( + "--data-source", + dest="data_source", + choices=["all", "samples", "series"], + default="samples", + help="Optional: Specifies the source of data on the GEO record" + " to retrieve processed data, which may be attached to the" + " collective series entity, or to individual samples. " + "Allowable values are: samples, series or both (all). " + "Ignored unless 'processed' flag is set. [Default: samples]", + ) + + processed_group.add_argument( + "--filter", + default=None, + help="Optional: Filter regex for processed filenames [Default: None]." + "Ignored unless 'processed' flag is set.", + ) + + processed_group.add_argument( + "--filter-size", + dest="filter_size", + default=None, + help="""Optional: Filter size for processed files + that are stored as sample repository [Default: None]. + Works only for sample data. + Supported input formats : 12B, 12KB, 12MB, 12GB. + Ignored unless 'processed' flag is set.""", + ) + + processed_group.add_argument( + "-g", + "--geo-folder", + default=_safe_echo("GEODATA"), + help="Optional: Specify a location to store processed GEO files." + " Ignored unless 'processed' flag is set." + "[Default: $GEODATA:" + _safe_echo("GEODATA") + "]", + ) + + raw_group.add_argument( + "-x", + "--split-experiments", + action="store_true", + help="""Split SRR runs into individual samples. By default, SRX + experiments with multiple SRR Runs will have a single entry in the + annotation table, with each run as a separate row in the + subannotation table. This setting instead treats each run as a + separate sample""", + ) + + raw_group.add_argument( + "-b", + "--bam-folder", + dest="bam_folder", + default=_safe_echo("SRABAM"), + help="""Optional: Specify folder of bam files. Geofetch will not + download sra files when corresponding bam files already exist. + [Default: $SRABAM:""" + + _safe_echo("SRABAM") + + "]", + ) + + raw_group.add_argument( + "-f", + "--fq-folder", + dest="fq_folder", + default=_safe_echo("SRAFQ"), + help="""Optional: Specify folder of fastq files. Geofetch will not + download sra files when corresponding fastq files already exist. + [Default: $SRAFQ:""" + + _safe_echo("SRAFQ") + + "]", + ) + + # Deprecated; these are for bam conversion which now happens in sra_convert + # it still works here but I hide it so people don't use it, because it's confusing. + raw_group.add_argument( + "-s", + "--sra-folder", + dest="sra_folder", + default=_safe_echo("SRARAW"), + help=argparse.SUPPRESS, + # help="Optional: Specify a location to store sra files " + # "[Default: $SRARAW:" + safe_echo("SRARAW") + "]" + ) + raw_group.add_argument( + "--bam-conversion", + action="store_true", + # help="Turn on sequential bam conversion. Default: No conversion.", + help=argparse.SUPPRESS, + ) + + raw_group.add_argument( + "--picard-path", + dest="picard_path", + default=_safe_echo("PICARD"), + # help="Specify a path to the picard jar, if you want to convert " + # "fastq to bam [Default: $PICARD:" + safe_echo("PICARD") + "]", + help=argparse.SUPPRESS, + ) + + raw_group.add_argument( + "--use-key-subset", + action="store_true", + help="Use just the keys defined in this module when writing out metadata.", + ) + + logmuse.add_logging_options(parser) + return parser.parse_args(cmdl) \ No newline at end of file diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index cdf9fd5..3dd374f 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -6,20 +6,16 @@ import copy import csv import os - -# import re import sys # from string import punctuation +# import tarfile import requests import xmltodict -from rich.progress import track import yaml - -# import tarfile import time -from ._version import __version__ +from .cli import _parse_cmdl from .const import * from .utils import ( Accession, @@ -30,6 +26,8 @@ run_subprocess, ) +from rich.progress import track +import re import logmuse from ubiquerg import expandpath, is_command_callable from io import StringIO @@ -2185,294 +2183,6 @@ def _write( f.write("\n") -def _parse_cmdl(cmdl): - """ - parser - """ - parser = argparse.ArgumentParser( - description="Automatic GEO and SRA data downloader", - usage="""geofetch [] - -The example how to use geofetch (to download GSE573030 just metadata): - geofetch -i GSE67303 -m `pwd` --just-metadata - -To download all processed data of GSE57303: - geofetch -i GSE67303 --processed --geo-folder `pwd` -m `pwd` - -* where `pwd` is a current directory - -""", - ) - - processed_group = parser.add_argument_group("processed") - raw_group = parser.add_argument_group("raw") - - parser.add_argument( - "-V", "--version", action="version", version=f"%(prog)s {__version__}" - ) - - # Required - parser.add_argument( - "-i", - "--input", - dest="input", - required=True, - help="required: a GEO (GSE) accession, or a file with a list of GSE numbers", - ) - - # Optional - parser.add_argument( - "-n", "--name", help="Specify a project name. Defaults to GSE number" - ) - - parser.add_argument( - "-m", - "--metadata-root", - dest="metadata_root", - default=safe_echo("SRAMETA"), - help="Specify a parent folder location to store metadata. " - "The project name will be added as a subfolder " - "[Default: $SRAMETA:" + safe_echo("SRAMETA") + "]", - ) - - parser.add_argument( - "-u", - "--metadata-folder", - help="Specify an absolute folder location to store metadata. " - "No subfolder will be added. Overrides value of --metadata-root " - "[Default: Not used (--metadata-root is used by default)]", - ) - - parser.add_argument( - "--just-metadata", - action="store_true", - help="If set, don't actually run downloads, just create metadata", - ) - - parser.add_argument( - "-r", - "--refresh-metadata", - action="store_true", - help="If set, re-download metadata even if it exists.", - ) - - parser.add_argument( - "--config-template", default=None, help="Project config yaml file template." - ) - - # Optional - parser.add_argument( - "--pipeline-samples", - default=None, - help="Optional: Specify one or more filepaths to SAMPLES pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", - ) - - # Optional - parser.add_argument( - "--pipeline-project", - default=None, - help="Optional: Specify one or more filepaths to PROJECT pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", - ) - # Optional - parser.add_argument( - "--disable-progressbar", - action="store_true", - help="Optional: Disable progressbar", - ) - - # Optional - parser.add_argument( - "-k", - "--skip", - default=0, - type=int, - help="Skip some accessions. [Default: no skip].", - ) - - parser.add_argument( - "--acc-anno", - action="store_true", - help="Optional: Produce annotation sheets for each accession." - " Project combined PEP for the whole project won't be produced.", - ) - - parser.add_argument( - "--discard-soft", - action="store_true", - help="Optional: After creation of PEP files, all soft and additional files will be deleted", - ) - - parser.add_argument( - "--const-limit-project", - type=int, - default=50, - help="Optional: Limit of the number of the constant sample characters " - "that should not be in project yaml. [Default: 50]", - ) - - parser.add_argument( - "--const-limit-discard", - type=int, - default=250, - help="Optional: Limit of the number of the constant sample characters " - "that should not be discarded [Default: 250]", - ) - - parser.add_argument( - "--attr-limit-truncate", - type=int, - default=500, - help="Optional: Limit of the number of sample characters." - "Any attribute with more than X characters will truncate to the first X," - " where X is a number of characters [Default: 500]", - ) - - parser.add_argument( - "--add-dotfile", - action="store_true", - help="Optional: Add .pep.yaml file that points .yaml PEP file", - ) - - processed_group.add_argument( - "-p", - "--processed", - default=False, - action="store_true", - help="Download processed data [Default: download raw data].", - ) - - processed_group.add_argument( - "--data-source", - dest="data_source", - choices=["all", "samples", "series"], - default="samples", - help="Optional: Specifies the source of data on the GEO record" - " to retrieve processed data, which may be attached to the" - " collective series entity, or to individual samples. " - "Allowable values are: samples, series or both (all). " - "Ignored unless 'processed' flag is set. [Default: samples]", - ) - - processed_group.add_argument( - "--filter", - default=None, - help="Optional: Filter regex for processed filenames [Default: None]." - "Ignored unless 'processed' flag is set.", - ) - - processed_group.add_argument( - "--filter-size", - dest="filter_size", - default=None, - help="""Optional: Filter size for processed files - that are stored as sample repository [Default: None]. - Works only for sample data. - Supported input formats : 12B, 12KB, 12MB, 12GB. - Ignored unless 'processed' flag is set.""", - ) - - processed_group.add_argument( - "-g", - "--geo-folder", - default=safe_echo("GEODATA"), - help="Optional: Specify a location to store processed GEO files." - " Ignored unless 'processed' flag is set." - "[Default: $GEODATA:" + safe_echo("GEODATA") + "]", - ) - - raw_group.add_argument( - "-x", - "--split-experiments", - action="store_true", - help="""Split SRR runs into individual samples. By default, SRX - experiments with multiple SRR Runs will have a single entry in the - annotation table, with each run as a separate row in the - subannotation table. This setting instead treats each run as a - separate sample""", - ) - - raw_group.add_argument( - "-b", - "--bam-folder", - dest="bam_folder", - default=safe_echo("SRABAM"), - help="""Optional: Specify folder of bam files. Geofetch will not - download sra files when corresponding bam files already exist. - [Default: $SRABAM:""" - + safe_echo("SRABAM") - + "]", - ) - - raw_group.add_argument( - "-f", - "--fq-folder", - dest="fq_folder", - default=safe_echo("SRAFQ"), - help="""Optional: Specify folder of fastq files. Geofetch will not - download sra files when corresponding fastq files already exist. - [Default: $SRAFQ:""" - + safe_echo("SRAFQ") - + "]", - ) - - # Deprecated; these are for bam conversion which now happens in sra_convert - # it still works here but I hide it so people don't use it, because it's confusing. - raw_group.add_argument( - "-s", - "--sra-folder", - dest="sra_folder", - default=safe_echo("SRARAW"), - help=argparse.SUPPRESS, - # help="Optional: Specify a location to store sra files " - # "[Default: $SRARAW:" + safe_echo("SRARAW") + "]" - ) - raw_group.add_argument( - "--bam-conversion", - action="store_true", - # help="Turn on sequential bam conversion. Default: No conversion.", - help=argparse.SUPPRESS, - ) - - raw_group.add_argument( - "--picard-path", - dest="picard_path", - default=safe_echo("PICARD"), - # help="Specify a path to the picard jar, if you want to convert " - # "fastq to bam [Default: $PICARD:" + safe_echo("PICARD") + "]", - help=argparse.SUPPRESS, - ) - - raw_group.add_argument( - "--use-key-subset", - action="store_true", - help="Use just the keys defined in this module when writing out metadata.", - ) - - logmuse.add_logging_options(parser) - return parser.parse_args(cmdl) - - -def safe_echo(var): - """Returns an environment variable if it exists, or an empty string if not""" - return os.getenv(var, "") - - -class InvalidSoftLineException(Exception): - """Exception related to parsing SOFT line.""" - - def __init__(self, l): - """ - Create the exception by providing the problematic line. - - :param str l: the problematic SOFT line - """ - super(self, f"{l}") - - def main(): """Run the script.""" args = _parse_cmdl(sys.argv[1:]) From 2ce6b4831b3c20ce07282ccda888c3cec55d12bd Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 25 Oct 2022 12:58:46 -0400 Subject: [PATCH 54/61] refactored static function + docstring --- geofetch/cli.py | 50 ++-- geofetch/geofetch.py | 569 +++++++++++-------------------------------- geofetch/utils.py | 346 +++++++++++++++++++++++++- 3 files changed, 501 insertions(+), 464 deletions(-) diff --git a/geofetch/cli.py b/geofetch/cli.py index 53ed9f4..54ce083 100644 --- a/geofetch/cli.py +++ b/geofetch/cli.py @@ -55,16 +55,16 @@ def _parse_cmdl(cmdl): dest="metadata_root", default=_safe_echo("SRAMETA"), help="Specify a parent folder location to store metadata. " - "The project name will be added as a subfolder " - "[Default: $SRAMETA:" + _safe_echo("SRAMETA") + "]", + "The project name will be added as a subfolder " + "[Default: $SRAMETA:" + _safe_echo("SRAMETA") + "]", ) parser.add_argument( "-u", "--metadata-folder", help="Specify an absolute folder location to store metadata. " - "No subfolder will be added. Overrides value of --metadata-root " - "[Default: Not used (--metadata-root is used by default)]", + "No subfolder will be added. Overrides value of --metadata-root " + "[Default: Not used (--metadata-root is used by default)]", ) parser.add_argument( @@ -89,8 +89,8 @@ def _parse_cmdl(cmdl): "--pipeline-samples", default=None, help="Optional: Specify one or more filepaths to SAMPLES pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", ) # Optional @@ -98,8 +98,8 @@ def _parse_cmdl(cmdl): "--pipeline-project", default=None, help="Optional: Specify one or more filepaths to PROJECT pipeline interface yaml files. " - "These will be added to the project config file to make it immediately " - "compatible with looper. [Default: null]", + "These will be added to the project config file to make it immediately " + "compatible with looper. [Default: null]", ) # Optional parser.add_argument( @@ -121,7 +121,7 @@ def _parse_cmdl(cmdl): "--acc-anno", action="store_true", help="Optional: Produce annotation sheets for each accession." - " Project combined PEP for the whole project won't be produced.", + " Project combined PEP for the whole project won't be produced.", ) parser.add_argument( @@ -135,7 +135,7 @@ def _parse_cmdl(cmdl): type=int, default=50, help="Optional: Limit of the number of the constant sample characters " - "that should not be in project yaml. [Default: 50]", + "that should not be in project yaml. [Default: 50]", ) parser.add_argument( @@ -143,7 +143,7 @@ def _parse_cmdl(cmdl): type=int, default=250, help="Optional: Limit of the number of the constant sample characters " - "that should not be discarded [Default: 250]", + "that should not be discarded [Default: 250]", ) parser.add_argument( @@ -151,8 +151,8 @@ def _parse_cmdl(cmdl): type=int, default=500, help="Optional: Limit of the number of sample characters." - "Any attribute with more than X characters will truncate to the first X," - " where X is a number of characters [Default: 500]", + "Any attribute with more than X characters will truncate to the first X," + " where X is a number of characters [Default: 500]", ) parser.add_argument( @@ -175,17 +175,17 @@ def _parse_cmdl(cmdl): choices=["all", "samples", "series"], default="samples", help="Optional: Specifies the source of data on the GEO record" - " to retrieve processed data, which may be attached to the" - " collective series entity, or to individual samples. " - "Allowable values are: samples, series or both (all). " - "Ignored unless 'processed' flag is set. [Default: samples]", + " to retrieve processed data, which may be attached to the" + " collective series entity, or to individual samples. " + "Allowable values are: samples, series or both (all). " + "Ignored unless 'processed' flag is set. [Default: samples]", ) processed_group.add_argument( "--filter", default=None, help="Optional: Filter regex for processed filenames [Default: None]." - "Ignored unless 'processed' flag is set.", + "Ignored unless 'processed' flag is set.", ) processed_group.add_argument( @@ -204,8 +204,8 @@ def _parse_cmdl(cmdl): "--geo-folder", default=_safe_echo("GEODATA"), help="Optional: Specify a location to store processed GEO files." - " Ignored unless 'processed' flag is set." - "[Default: $GEODATA:" + _safe_echo("GEODATA") + "]", + " Ignored unless 'processed' flag is set." + "[Default: $GEODATA:" + _safe_echo("GEODATA") + "]", ) raw_group.add_argument( @@ -227,8 +227,8 @@ def _parse_cmdl(cmdl): help="""Optional: Specify folder of bam files. Geofetch will not download sra files when corresponding bam files already exist. [Default: $SRABAM:""" - + _safe_echo("SRABAM") - + "]", + + _safe_echo("SRABAM") + + "]", ) raw_group.add_argument( @@ -239,8 +239,8 @@ def _parse_cmdl(cmdl): help="""Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding fastq files already exist. [Default: $SRAFQ:""" - + _safe_echo("SRAFQ") - + "]", + + _safe_echo("SRAFQ") + + "]", ) # Deprecated; these are for bam conversion which now happens in sra_convert @@ -277,4 +277,4 @@ def _parse_cmdl(cmdl): ) logmuse.add_logging_options(parser) - return parser.parse_args(cmdl) \ No newline at end of file + return parser.parse_args(cmdl) diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 3dd374f..50d0a58 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -2,7 +2,6 @@ __author__ = ["Oleksandr Khoroshevskyi", "Vince Reuter", "Nathan Sheffield"] -import argparse import copy import csv import os @@ -24,13 +23,27 @@ convert_size, clean_soft_files, run_subprocess, + _get_list_of_keys, + _get_value, + _read_tar_filelist, + _check_file_existance, + _separate_list_of_files, + _update_columns, + _sanitize_name, + _sanitize_config_string, + _create_dot_yaml, + _which, + _dict_to_list_converter, + _standardize_colnames, + _separate_file_url, + _filter_gsm, + _unify_list_keys, ) from rich.progress import track import re import logmuse from ubiquerg import expandpath, is_command_callable -from io import StringIO from typing import List, Union, Dict, Tuple, NoReturn import peppy import pandas as pd @@ -39,64 +52,6 @@ class Geofetcher: """ Class to download or get projects, metadata, data from GEO and SRA - - :param input: - :param name: Specify a project name. Defaults to GSE number or name of accessions file name - :param metadata_root: Specify a parent folder location to store metadata. - The project name will be added as a subfolder [Default: $SRAMETA:] - :param metadata_folder: Specify an absolute folder location to store metadata. No subfolder will be added. - Overrides value of --metadata-root [Default: Not used (--metadata-root is used by default)] - :param just_metadata: If set, don't actually run downloads, just create metadata - :param refresh_metadata: If set, re-download metadata even if it exists. - :param config_template: Project config yaml file template. - :param pipeline_samples: Specify one or more filepaths to SAMPLES pipeline interface yaml files. - These will be added to the project config file to make it immediately compatible with looper. - [Default: null] - :param pipeline_project: Specify one or more filepaths to PROJECT pipeline interface yaml files. - These will be added to the project config file to make it immediately compatible with looper. - [Default: null] - :param acc_anno: Produce annotation sheets for each accession. - Project combined PEP for the whole project won't be produced. - :param discard_soft: Create project without downloading soft files on the disc - :param add_dotfile: Add .pep.yaml file that points .yaml PEP file - :param disable_progressbar: Set true to disable progressbar - - :param const_limit_project: Optional: Limit of the number of the constant sample characters - that should not be in project yaml. [Default: 50] - :param const_limit_discard: Optional: Limit of the number of the constant sample characters - that should not be discarded [Default: 250] - :param attr_limit_truncate: Optional: Limit of the number of sample characters. - Any attribute with more than X characters will truncate to the first X, where X is a number of characters - [Default: 500] - - :param processed: Download processed data [Default: download raw data]. - :param data_source: Specifies the source of data on the GEO record to retrieve processed data, - which may be attached to the collective series entity, or to individual samples. Allowable values are: - samples, series or both (all). Ignored unless 'processed' flag is set. [Default: samples] - :param filter: Filter regex for processed filenames [Default: None].Ignored unless 'processed' flag is set. - :param filter_size: Filter size for processed files that are stored as sample repository [Default: None]. - Works only for sample data. Supported input formats : 12B, 12KB, 12MB, 12GB. - Ignored unless 'processed' flag is set. - :param geo_folder: Specify a location to store processed GEO files. - Ignored unless 'processed' flag is set.[Default: $GEODATA:] - - :param split_experiments: Split SRR runs into individual samples. By default, SRX experiments with multiple SRR - Runs will have a single entry in the annotation table, with each run as a separate row in the - subannotation table. This setting instead treats each run as a separate sample [Works with raw data] - :param bam_folder: Optional: Specify folder of bam files. Geofetch will not download sra files when - corresponding bam files already exist. [Default: $SRABAM:] [Works with raw data] - :param fq_folder: Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding - fastq files already exist. [Default: $SRAFQ:] [Works with raw data] - :param use_key_subset: Use just the keys defined in this module when writing out metadata. [Works with raw data] - :param sra_folder: Optional: Specify a location to store sra files - [Default: $SRARAW:" + safe_echo("SRARAW") + ] - :param bam_conversion: Optional: set True to convert bam files [Works with raw data] - :param picard_path: Specify a path to the picard jar, if you want to convert fastq to bam - [Default: $PICARD:" + safe_echo("PICARD") + "] [Works with raw data] - - :param skip: Skip some accessions. [Default: no skip]. - :param opts: opts object [Optional] - :param kwargs: other values """ def __init__( @@ -133,6 +88,66 @@ def __init__( opts=None, **kwargs, ): + """ + init function + :param input: GSEnumber or path to the input file + :param name: Specify a project name. Defaults to GSE number or name of accessions file name + :param metadata_root: Specify a parent folder location to store metadata. + The project name will be added as a subfolder [Default: $SRAMETA:] + :param metadata_folder: Specify an absolute folder location to store metadata. No subfolder will be added. + Overrides value of --metadata-root [Default: Not used (--metadata-root is used by default)] + :param just_metadata: If set, don't actually run downloads, just create metadata + :param refresh_metadata: If set, re-download metadata even if it exists. + :param config_template: Project config yaml file template. + :param pipeline_samples: Specify one or more filepaths to SAMPLES pipeline interface yaml files. + These will be added to the project config file to make it immediately compatible with looper. + [Default: null] + :param pipeline_project: Specify one or more filepaths to PROJECT pipeline interface yaml files. + These will be added to the project config file to make it immediately compatible with looper. + [Default: null] + :param acc_anno: Produce annotation sheets for each accession. + Project combined PEP for the whole project won't be produced. + :param discard_soft: Create project without downloading soft files on the disc + :param add_dotfile: Add .pep.yaml file that points .yaml PEP file + :param disable_progressbar: Set true to disable progressbar + + :param const_limit_project: Optional: Limit of the number of the constant sample characters + that should not be in project yaml. [Default: 50] + :param const_limit_discard: Optional: Limit of the number of the constant sample characters + that should not be discarded [Default: 250] + :param attr_limit_truncate: Optional: Limit of the number of sample characters. + Any attribute with more than X characters will truncate to the first X, where X is a number of characters + [Default: 500] + + :param processed: Download processed data [Default: download raw data]. + :param data_source: Specifies the source of data on the GEO record to retrieve processed data, + which may be attached to the collective series entity, or to individual samples. Allowable values are: + samples, series or both (all). Ignored unless 'processed' flag is set. [Default: samples] + :param filter: Filter regex for processed filenames [Default: None].Ignored unless 'processed' flag is set. + :param filter_size: Filter size for processed files that are stored as sample repository [Default: None]. + Works only for sample data. Supported input formats : 12B, 12KB, 12MB, 12GB. + Ignored unless 'processed' flag is set. + :param geo_folder: Specify a location to store processed GEO files. + Ignored unless 'processed' flag is set.[Default: $GEODATA:] + + :param split_experiments: Split SRR runs into individual samples. By default, SRX experiments with multiple SRR + Runs will have a single entry in the annotation table, with each run as a separate row in the + subannotation table. This setting instead treats each run as a separate sample [Works with raw data] + :param bam_folder: Optional: Specify folder of bam files. Geofetch will not download sra files when + corresponding bam files already exist. [Default: $SRABAM:] [Works with raw data] + :param fq_folder: Optional: Specify folder of fastq files. Geofetch will not download sra files when corresponding + fastq files already exist. [Default: $SRAFQ:] [Works with raw data] + :param use_key_subset: Use just the keys defined in this module when writing out metadata. [Works with raw data] + :param sra_folder: Optional: Specify a location to store sra files + [Default: $SRARAW:" + safe_echo("SRARAW") + ] + :param bam_conversion: Optional: set True to convert bam files [Works with raw data] + :param picard_path: Specify a path to the picard jar, if you want to convert fastq to bam + [Default: $PICARD:" + safe_echo("PICARD") + "] [Works with raw data] + + :param skip: Skip some accessions. [Default: no skip]. + :param opts: opts object [Optional] + :param kwargs: other values + """ if opts is not None: _LOGGER = logmuse.logger_via_cli(opts) @@ -233,7 +248,7 @@ def __init__( self._LOGGER.info(f"Metadata folder: {self.metadata_expanded}") # Some sanity checks before proceeding - if bam_conversion and not just_metadata and not self._which("samtools"): + if bam_conversion and not just_metadata and not _which("samtools"): raise SystemExit("For SAM/BAM processing, samtools should be on PATH.") self.just_object = False @@ -308,8 +323,13 @@ def get_projects( return new_pr_dict - def fetch_all(self, input: str, name: str = None): - """Main script driver/workflow""" + def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Project]: + """ + Main script driver/workflow + :param input: GSE or input file with gse's + :param name: Name of the project + :return: NoReturn or peppy Project + """ if name is not None: self.project_name = name @@ -530,12 +550,12 @@ def _process_sra_meta( if not sample_name or sample_name == "": temp = gsm_metadata[experiment]["Sample_title"] - sample_name = self._sanitize_name(temp) + sample_name = _sanitize_name(temp) # Otherwise, record that there's SRA data for this run. # And set a few columns that are used as input to the Looper # print("Updating columns for looper") - self._update_columns( + _update_columns( gsm_metadata, experiment, sample_name=sample_name, @@ -584,7 +604,12 @@ def _process_sra_meta( return gsm_multi_table, gsm_metadata, runs - def _download_raw_data(self, run_name): + def _download_raw_data(self, run_name: str) -> NoReturn: + """ + Downloading raw data from SRA by providing run name + :param run_name: Run name from SRA + :return: NoReturn + """ bam_file = ( "" if self.bam_folder == "" @@ -613,7 +638,7 @@ def _download_raw_data(self, run_name): # converting sra to bam using # TODO: sam-dump has a built-in prefetch. I don't have to do # any of this stuff... This also solves the bad sam-dump issues. - self._sra_bam_conversion(bam_file, run_name) + self._sra_bam_conversion1(bam_file, run_name) # checking if bam_file converted correctly, if not --> use fastq-dump st = os.stat(bam_file) @@ -635,7 +660,7 @@ def fetchone_processed( gsm_filter_list: dict, ) -> Tuple: """ - Fetching just one processed GSE project + Fetching one processed GSE project :param gsm_file_content: gse soft file content :param gse_file_content: gsm soft file content :param gsm_filter_list: list of gsm that have to be downloaded @@ -647,9 +672,7 @@ def fetchone_processed( ) = self._get_list_of_processed_files(gse_file_content, gsm_file_content) # taking into account list of GSM that is specified in the input file - meta_processed_samples = self._filter_gsm( - meta_processed_samples, gsm_filter_list - ) + meta_processed_samples = _filter_gsm(meta_processed_samples, gsm_filter_list) # samples meta_processed_samples = self._expand_metadata_list(meta_processed_samples) @@ -658,8 +681,8 @@ def fetchone_processed( meta_processed_series = self._expand_metadata_list(meta_processed_series) # convert column names to lowercase and underscore - meta_processed_samples = self._standardize_colnames(meta_processed_samples) - meta_processed_series = self._standardize_colnames(meta_processed_series) + meta_processed_samples = _standardize_colnames(meta_processed_samples) + meta_processed_series = _standardize_colnames(meta_processed_series) return meta_processed_samples, meta_processed_series @@ -773,9 +796,9 @@ def _expand_metadata_list_in_dict(self, metadata_dict: dict) -> dict: :param metadata_dict: metadata dict :return: expanded metadata dict """ - prj_list = self._dict_to_list_convector(proj_dict=metadata_dict) + prj_list = _dict_to_list_converter(proj_dict=metadata_dict) prj_list = self._expand_metadata_list(prj_list) - return self._dict_to_list_convector(proj_list=prj_list) + return _dict_to_list_converter(proj_list=prj_list) def _expand_metadata_list(self, metadata_list: list) -> list: """ @@ -785,7 +808,7 @@ def _expand_metadata_list(self, metadata_list: list) -> list: :return list: expanded metadata list """ self._LOGGER.info("Expanding metadata list...") - list_of_keys = self._get_list_of_keys(metadata_list) + list_of_keys = _get_list_of_keys(metadata_list) for key_in_list in list_of_keys: metadata_list = self._expand_metadata_list_item(metadata_list, key_in_list) return metadata_list @@ -867,77 +890,6 @@ def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): self._LOGGER.warning("expand_metadata_list: Value Error: {err}") return metadata_list - def _filter_gsm(self, meta_processed_samples: list, gsm_list: dict) -> list: - """ - Getting metadata list of all samples of one experiment and filtering it - by the list of GSM that was specified in the input files. - And then changing names of the sample names. - - :param meta_processed_samples: list of metadata dicts of samples - :param gsm_list: list of dicts where GSM (samples) are keys and - sample names are values. Where values can be empty string - """ - - if gsm_list.keys(): - new_gsm_list = [] - for gsm_sample in meta_processed_samples: - if gsm_sample["Sample_geo_accession"] in gsm_list.keys(): - gsm_sample_new = gsm_sample - if gsm_list[gsm_sample["Sample_geo_accession"]] != "": - gsm_sample_new["sample_name"] = gsm_list[ - gsm_sample["Sample_geo_accession"] - ] - new_gsm_list.append(gsm_sample_new) - return new_gsm_list - return meta_processed_samples - - @staticmethod - def _get_list_of_keys(list_of_dict: list): - """ - Getting list of all keys that are in the dictionaries in the list - - :param list list_of_dict: list of dicts with metadata - :return list: list of dictionary keys - """ - - list_of_keys = [] - for element in list_of_dict: - list_of_keys.extend(list(element.keys())) - return list(set(list_of_keys)) - - def _unify_list_keys(self, processed_meta_list: list) -> list: - """ - Unifying list of dicts with metadata, so every dict will have - same keys - - :param list processed_meta_list: list of dicts with metadata - :return list: list of unified dicts with metadata - """ - list_of_keys = self._get_list_of_keys(processed_meta_list) - for k in list_of_keys: - for list_elem in range(len(processed_meta_list)): - if k not in processed_meta_list[list_elem]: - processed_meta_list[list_elem][k] = "" - return processed_meta_list - - def _find_genome(self, metadata_list): - """ - Create new genome column by searching joining few columns - """ - list_keys = self._get_list_of_keys(metadata_list) - genome_keys = [ - "assembly", - "genome_build", - ] - proj_gen_keys = list(set(list_keys).intersection(genome_keys)) - - for sample in enumerate(metadata_list): - sample_genome = "" - for key in proj_gen_keys: - sample_genome = " ".join([sample_genome, sample[1][key]]) - metadata_list[sample[0]][NEW_GENOME_COL_NAME] = sample_genome - return metadata_list - def _write_gsm_annotation(self, gsm_metadata: dict, file_annotation: str) -> str: """ Write metadata sheet out as an annotation file. @@ -990,7 +942,7 @@ def _write_processed_annotation( os.makedirs(pep_file_folder) self._LOGGER.info("Unifying and saving of metadata... ") - processed_metadata = self._unify_list_keys(processed_metadata) + processed_metadata = _unify_list_keys(processed_metadata) # delete rare keys processed_metadata = self._find_genome(processed_metadata) @@ -1023,7 +975,7 @@ def _write_processed_annotation( # save .pep.yaml file if self.add_dotfile: dot_yaml_path = os.path.join(pep_file_folder, ".pep.yaml") - self._create_dot_yaml(dot_yaml_path, yaml_name) + _create_dot_yaml(dot_yaml_path, yaml_name) return None @@ -1034,6 +986,27 @@ def _write_processed_annotation( proj = peppy.Project().from_pandas(pd_value, config=conf) return proj + @staticmethod + def _find_genome(metadata_list: list) -> list: + """ + Create new genome column by searching joining few columns + :param metadata_list: list with metadata dict + :return: list with metadata dict where genome column was added + """ + list_keys = _get_list_of_keys(metadata_list) + genome_keys = [ + "assembly", + "genome_build", + ] + proj_gen_keys = list(set(list_keys).intersection(genome_keys)) + + for sample in enumerate(metadata_list): + sample_genome = "" + for key in proj_gen_keys: + sample_genome = " ".join([sample_genome, sample[1][key]]) + metadata_list[sample[0]][NEW_GENOME_COL_NAME] = sample_genome + return metadata_list + def _write_raw_annotation_new( self, name, metadata_dict: dict, subannot_dict: dict = None ) -> Union[None, peppy.Project]: @@ -1104,7 +1077,7 @@ def _write_raw_annotation_new( self._write(proj_root_yaml, template, msg_pre=" Config file: ") if self.add_dotfile: - self._create_dot_yaml(dot_yaml_path, yaml_name) + _create_dot_yaml(dot_yaml_path, yaml_name) else: meta_df = pd.DataFrame.from_dict(metadata_dict, orient="index") @@ -1141,7 +1114,7 @@ def _create_config_processed( with open(config_template, "r") as template_file: template = template_file.read() meta_list_str = [ - f'{list(i.keys())[0]}: "{self._sanitize_config_string(list(i.values())[0])}"' + f'{list(i.keys())[0]}: "{_sanitize_config_string(list(i.values())[0])}"' for i in proj_meta ] modifiers_str = "\n ".join(d for d in meta_list_str) @@ -1167,7 +1140,7 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): :return: generated, complete config file content """ meta_list_str = [ - f'{list(i.keys())[0]}: "{self._sanitize_config_string(list(i.values())[0])}"' + f'{list(i.keys())[0]}: "{_sanitize_config_string(list(i.values())[0])}"' for i in proj_meta ] modifiers_str = "\n ".join(d for d in meta_list_str) @@ -1203,51 +1176,15 @@ def _check_sample_name_standard(self, metadata_dict: dict) -> dict: if value_sample["sample_name"] == "" or value_sample["sample_name"] is None: fixed_dict[key_sample]["sample_name"] = value_sample["Sample_title"] # sanitize names - fixed_dict[key_sample]["sample_name"] = self._sanitize_name( + fixed_dict[key_sample]["sample_name"] = _sanitize_name( fixed_dict[key_sample]["sample_name"] ) metadata_dict = fixed_dict - metadata_dict = self._standardize_colnames(metadata_dict) + metadata_dict = _standardize_colnames(metadata_dict) return metadata_dict @staticmethod - def _sanitize_config_string(text: str) -> str: - """ - Function that sanitizes text in config file. - :param text: Any string that have to be sanitized - :return: sanitized strings - """ - new_str = text - new_str = new_str.replace('"', f'\\"') - new_str = new_str.replace("'", f"''") - return new_str - - @staticmethod - def _sanitize_name(name_str: str) -> str: - """ - Function that sanitizes strings. (Replace all odd characters) - :param str name_str: Any string value that has to be sanitized. - :return: sanitized strings - """ - new_str = name_str - punctuation1 = r"""!"#$%&'()*,./:;<=>?@[\]^_`{|}~""" - for odd_char in list(punctuation1): - new_str = new_str.replace(odd_char, "_") - new_str = new_str.replace(" ", "_").replace("__", "_") - return new_str - - @staticmethod - def _create_dot_yaml(file_path: str, yaml_path: str) -> NoReturn: - """ - Function that creates .pep.yaml file that points to actual yaml file - :param str file_path: Path to the .pep.yaml file that we want to create - :param str yaml_path: path or name of the actual yaml file - """ - with open(file_path, "w+") as file: - file.writelines(f"config_file: {yaml_path}") - def _separate_common_meta( - self, meta_list: Union[List, Dict], max_len: int = 50, del_limit: int = 250, @@ -1267,9 +1204,9 @@ def _separate_common_meta( input_is_dict = False if isinstance(meta_list, dict): input_is_dict = True - meta_list = self._dict_to_list_convector(proj_dict=meta_list) + meta_list = _dict_to_list_converter(proj_dict=meta_list) - list_of_keys = self._get_list_of_keys(meta_list) + list_of_keys = _get_list_of_keys(meta_list) list_keys_diff = [] # finding columns with common values for this_key in list_of_keys: @@ -1323,70 +1260,9 @@ def _separate_common_meta( meta_list = new_list if input_is_dict: - meta_list = self._dict_to_list_convector(proj_list=meta_list) + meta_list = _dict_to_list_converter(proj_list=meta_list) return meta_list, new_meta_project - def _standardize_colnames(self, meta_list: Union[list, dict]) -> Union[list, dict]: - """ - Standardize column names by lower-casing and underscore - :param list meta_list: list of dictionaries of samples - :return : list of dictionaries of samples with standard colnames - """ - # check if meta_list is dict and converting it to list - input_is_dict = False - if isinstance(meta_list, dict): - input_is_dict = True - meta_list = self._dict_to_list_convector(proj_dict=meta_list) - - new_metalist = [] - list_keys = self._get_list_of_keys(meta_list) - for item_nb, values in enumerate(meta_list): - new_metalist.append({}) - for key in list_keys: - try: - new_key_name = key.lower().strip() - new_key_name = self._sanitize_name(new_key_name) - - new_metalist[item_nb][new_key_name] = values[key] - - except KeyError: - pass - - if input_is_dict: - new_metalist = self._dict_to_list_convector(proj_list=new_metalist) - - return new_metalist - - @staticmethod - def _dict_to_list_convector( - proj_dict: Dict = None, proj_list: List = None - ) -> Union[Dict, List]: - """ - Convector project dict to list and vice versa - :param proj_dict: project dictionary - :param proj_list: project list - :return: converted values - """ - if proj_dict is not None: - new_meta_list = [] - for key in proj_dict: - new_dict = proj_dict[key] - new_dict["big_key"] = key - new_meta_list.append(new_dict) - - meta_list = new_meta_list - - elif proj_list is not None: - new_sample_dict = {} - for sample in proj_list: - new_sample_dict[sample["big_key"]] = sample - meta_list = new_sample_dict - - else: - raise ValueError - - return meta_list - def _download_SRA_file(self, run_name: str): """ Downloading SRA file by ising 'prefetch' utility from the SRA Toolkit @@ -1415,28 +1291,7 @@ def _download_SRA_file(self, run_name: str): ) time.sleep(t * 2) - @staticmethod - def _which(program: str): - """ - return str: the path to a program to make sure it exists - """ - import os - - def is_exe(fp): - return os.path.isfile(fp) and os.access(fp, os.X_OK) - - fpath, fname = os.path.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ["PATH"].split(os.pathsep): - path = path.strip('"') - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - - def _sra_bam_conversion(self, bam_file: str, run_name: str) -> NoReturn: + def _sra_bam_conversion1(self, bam_file: str, run_name: str) -> NoReturn: """ Converting of SRA file to BAM file by using samtools function "sam-dump" :param str bam_file: path to BAM file that has to be created @@ -1460,53 +1315,6 @@ def _sra_bam_conversion(self, bam_file: str, run_name: str) -> NoReturn: self._LOGGER.info(f"Conversion command: {cmd}") run_subprocess(cmd, shell=True) - @staticmethod - def _update_columns( - metadata: dict, experiment_name: str, sample_name: str, read_type: str - ) -> dict: - """ - Update the metadata associated with a particular experiment. - - For the experiment indicated, this function updates the value (mapping), - including new data and populating columns used by looper based on - existing values in the mapping. - - :param Mapping metadata: the key-value mapping to update - :param str experiment_name: name of the experiment from which these - data came and are associated; the key in the metadata mapping - for which the value is to be updated - :param str sample_name: name of the sample with which these data are - associated - :param str read_type: usually "single" or "paired," an indication of the - type of sequencing reads for this experiment - :return: updated metadata - """ - - exp = metadata[experiment_name] - - # Protocol-agnostic - exp["sample_name"] = sample_name - exp["protocol"] = exp["Sample_library_selection"] - exp["read_type"] = read_type - exp["organism"] = exp["Sample_organism_ch1"] - exp["data_source"] = "SRA" - exp["SRX"] = experiment_name - - # Protocol specified is lowercased prior to checking here to alleviate - # dependence on case for the value in the annotations file. - bisulfite_protocols = {"reduced representation": "RRBS", "random": "WGBS"} - - # Conditional on bisulfite sequencing - # print(":" + exp["Sample_library_strategy"] + ":") - # Try to be smart about some library methods, refining protocol if possible. - if exp["Sample_library_strategy"] == "Bisulfite-Seq": - # print("Parsing protocol") - proto = exp["Sample_library_selection"].lower() - if proto in bisulfite_protocols: - exp["protocol"] = bisulfite_protocols[proto] - - return exp - def _sra_bam_conversion2( self, bam_file: str, run_name: str, picard_path: str = None ) -> NoReturn: @@ -1623,7 +1431,7 @@ def _get_list_of_processed_files( for line in file_gse_content: if re.compile(r"!Series_geo_accession").search(line): - gse_numb = self._get_value(line) + gse_numb = _get_value(line) meta_processed_series["GSE"] = gse_numb found = re.findall(SER_SUPP_FILE_PATTERN, line) @@ -1670,7 +1478,7 @@ def _get_list_of_processed_files( nb = len(meta_processed_samples) - 1 for line_gsm in file_gsm_content: if line_gsm[0] == "^": - nb = len(self._check_file_existance(meta_processed_samples)) + nb = len(_check_file_existance(meta_processed_samples)) meta_processed_samples.append( {"files": [], "GSE": gse_numb} ) @@ -1714,13 +1522,11 @@ def _get_list_of_processed_files( if file_url_gsm != "NONE": meta_processed_samples[nb]["files"].append(file_url_gsm) - self._check_file_existance(meta_processed_samples) - meta_processed_samples = self._separate_list_of_files( - meta_processed_samples - ) - meta_processed_samples = self._separate_file_url( + _check_file_existance(meta_processed_samples) + meta_processed_samples = _separate_list_of_files( meta_processed_samples ) + meta_processed_samples = _separate_file_url(meta_processed_samples) self._LOGGER.info( f"\nTotal number of processed SAMPLES files found is: " @@ -1728,7 +1534,7 @@ def _get_list_of_processed_files( ) # expand meta_processed_samples with information about type and size - file_info_add = self._read_tar_filelist(filelist_raw_text) + file_info_add = _read_tar_filelist(filelist_raw_text) for index_nr in range(len(meta_processed_samples)): file_name = meta_processed_samples[index_nr]["file"] meta_processed_samples[index_nr].update( @@ -1767,8 +1573,8 @@ def _get_list_of_processed_files( f"IndexError in adding value to meta_processed_series: %s" % ind_err ) - meta_processed_series = self._separate_list_of_files(meta_processed_series) - meta_processed_series = self._separate_file_url(meta_processed_series) + meta_processed_series = _separate_list_of_files(meta_processed_series) + meta_processed_series = _separate_file_url(meta_processed_series) self._LOGGER.info( f"Total number of processed SERIES files found is: " f"%s" % str(len(meta_processed_series)) @@ -1778,66 +1584,6 @@ def _get_list_of_processed_files( return meta_processed_samples, meta_processed_series - @staticmethod - def _check_file_existance(meta_processed_sample: list): - """ - Checking if last element of the list has files. If list of files is empty deleting it - """ - nb = len(meta_processed_sample) - 1 - if nb > -1: - if len(meta_processed_sample[nb]["files"]) == 0: - del meta_processed_sample[nb] - nb -= 1 - return meta_processed_sample - - @staticmethod - def _separate_list_of_files(meta_list, col_name="files"): - """ - This method is separating list of files (dict value) or just simple dict - into two different dicts - """ - separated_list = [] - if isinstance(meta_list, list): - for meta_elem in meta_list: - for file_elem in meta_elem[col_name]: - new_dict = meta_elem.copy() - new_dict.pop(col_name, None) - new_dict["file"] = file_elem - separated_list.append(new_dict) - elif isinstance(meta_list, dict): - for file_elem in meta_list[col_name]: - new_dict = meta_list.copy() - new_dict.pop(col_name, None) - new_dict["file"] = file_elem - separated_list.append(new_dict) - else: - return TypeError("Incorrect type") - - return separated_list - - def _separate_file_url(self, meta_list): - """ - This method is adding dict key without file_name without path - """ - separated_list = [] - for meta_elem in meta_list: - new_dict = meta_elem.copy() - new_dict["file_url"] = meta_elem["file"] - new_dict["file"] = os.path.basename(meta_elem["file"]) - # new_dict["sample_name"] = os.path.basename(meta_elem["file"]) - try: - new_dict["sample_name"] = str(meta_elem["Sample_title"]) - if new_dict["sample_name"] == "" or new_dict["sample_name"] is None: - raise KeyError("sample_name Does not exist. Creating .. ") - except KeyError: - new_dict["sample_name"] = os.path.basename(meta_elem["file"]) - - # sanitize sample names - new_dict["sample_name"] = self._sanitize_name(new_dict["sample_name"]) - - separated_list.append(new_dict) - return separated_list - def _run_filter(self, meta_list, col_name="file"): """ If user specified filter it will filter all this files here by col_name @@ -1873,41 +1619,6 @@ def _run_size_filter(self, meta_list, col_name="file_size"): ) return filtered_list - @staticmethod - def _read_tar_filelist(raw_text: str): - """ - Creating list for supplementary files that are listed in "filelist.txt" - :param str raw_text: path to the file with information about files that are zipped ("filelist.txt") - :return dict: dict of supplementary file names and additional information - """ - f = StringIO(raw_text) - files_info = {} - csv_reader = csv.reader(f, delimiter="\t") - line_count = 0 - for row in csv_reader: - if line_count == 0: - name_index = row.index("Name") - size_index = row.index("Size") - type_index = row.index("Type") - - line_count += 1 - else: - files_info[row[name_index]] = { - "file_size": row[size_index], - "type": row[type_index], - } - - return files_info - - @staticmethod - def _get_value(all_line: str): - """ - :param all_line: string with key value. (e.g. '!Series_geo_accession = GSE188720') - :return: value (e.g. GSE188720) - """ - line_value = all_line.split("= ")[-1] - return line_value.split(": ")[-1].rstrip("\n") - def _download_processed_file(self, file_url: str, data_folder: str) -> bool: """ Given a url for a file, download it, and extract anything passing the filter. @@ -1993,7 +1704,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): try: # downloading metadata srp_list = self._get_SRP_list(acc_SRP) - srp_list = self._unify_list_keys(srp_list) + srp_list = _unify_list_keys(srp_list) if file_sra is not None and not self.discard_soft: with open(file_sra, "w") as m_file: dict_writer = csv.DictWriter(m_file, srp_list[0].keys()) diff --git a/geofetch/utils.py b/geofetch/utils.py index 7917c67..b8e4e08 100644 --- a/geofetch/utils.py +++ b/geofetch/utils.py @@ -6,6 +6,9 @@ import sys import re import requests +from io import StringIO +import csv +from typing import NoReturn, Dict, List, Union _LOGGER = logging.getLogger(__name__) @@ -19,7 +22,7 @@ } -def is_known_type(accn=None, typename=None): +def is_known_type(accn: str = None, typename: str = None): """ Determine if the given accession is of a known type. @@ -49,7 +52,7 @@ def parse_accessions(input_arg, metadata_folder, just_metadata=False): interested in from that GSE#. An empty sample list means we should get all samples from that GSE#. This loop will create this dict. - :param input_arg: + :param input_arg: Input argument (GSE, or file) :param str metadata_folder: path to folder for accession metadata :param bool just_metadata: whether to only process metadata, not the actual data associated with the accession @@ -119,13 +122,12 @@ def parse_accessions(input_arg, metadata_folder, just_metadata=False): return acc_GSE_list -def parse_SOFT_line(l): +def parse_SOFT_line(l: str) -> dict: """ Parse SOFT formatted line, returning a dictionary with the key-value pair. :param str l: A SOFT-formatted line to parse ( !key = value ) :return dict[str, str]: A python Dict object representing the key-value. - :raise InvalidSoftLineException: if given line can't be parsed as SOFT line """ elems = l[1:].split("=") return {elems[0].rstrip(): elems[1].lstrip()} @@ -134,7 +136,7 @@ def parse_SOFT_line(l): class AccessionException(Exception): """Exceptional condition(s) dealing with accession number(s).""" - def __init__(self, reason=""): + def __init__(self, reason: str = ""): """ Optionally provide explanation for exceptional condition. @@ -230,8 +232,12 @@ def fetch_metadata( return result_list @staticmethod - def _validate(accn): - """Determine if given value looks like an accession.""" + def _validate(accn: str): + """ + Determine if given value looks like an accession. + :param str accn: ordinary accession identifier. + :return: typename, number + """ typename, number = split_accn(accn) if len(typename) != 3: raise AccessionException( @@ -248,7 +254,7 @@ def _validate(accn): return typename, number @staticmethod - def accn_type_exception(accn, typename, include_known=True): + def accn_type_exception(accn: str, typename: str, include_known: bool = True): """ Create an exception instance based on an accession and a parsed unknown typename. @@ -266,7 +272,7 @@ def accn_type_exception(accn, typename, include_known=True): return AccessionException(message) -def split_accn(accn): +def split_accn(accn: str): """ Split accession into prefix and number, leaving suffix as text and converting the type prefix to uppercase. @@ -341,4 +347,324 @@ def run_subprocess(*args, **kwargs): sys.exit(1) -# file_gse_content +def _get_list_of_keys(list_of_dict: list): + """ + Getting list of all keys that are in the dictionaries in the list + + :param list list_of_dict: list of dicts with metadata + :return list: list of dictionary keys + """ + + list_of_keys = [] + for element in list_of_dict: + list_of_keys.extend(list(element.keys())) + return list(set(list_of_keys)) + + +def _get_value(all_line: str): + """ + :param all_line: string with key value. (e.g. '!Series_geo_accession = GSE188720') + :return: value (e.g. GSE188720) + """ + line_value = all_line.split("= ")[-1] + return line_value.split(": ")[-1].rstrip("\n") + + +def _read_tar_filelist(raw_text: str) -> dict: + """ + Creating list for supplementary files that are listed in "filelist.txt" + :param str raw_text: path to the file with information about files that are zipped ("filelist.txt") + :return dict: dict of supplementary file names and additional information + """ + f = StringIO(raw_text) + files_info = {} + csv_reader = csv.reader(f, delimiter="\t") + line_count = 0 + for row in csv_reader: + if line_count == 0: + name_index = row.index("Name") + size_index = row.index("Size") + type_index = row.index("Type") + + line_count += 1 + else: + files_info[row[name_index]] = { + "file_size": row[size_index], + "type": row[type_index], + } + + return files_info + + +def _check_file_existance(meta_processed_sample: list) -> list: + """ + Checking if last element of the list has files. If list of files is empty deleting it + :param: meta_processed_sample: list with metadata dictionary + :return: list with metadata dictionary after processing + """ + nb = len(meta_processed_sample) - 1 + if nb > -1: + if len(meta_processed_sample[nb]["files"]) == 0: + del meta_processed_sample[nb] + nb -= 1 + return meta_processed_sample + + +def _separate_list_of_files(meta_list: Union[list, dict], col_name: str = "files"): + """ + This method is separating list of files (dict value) or just simple dict + into two different dicts + :param col_name: column name that should be added with filenames + :param meta_list: list, or dict with metadata + """ + separated_list = [] + if isinstance(meta_list, list): + for meta_elem in meta_list: + for file_elem in meta_elem[col_name]: + new_dict = meta_elem.copy() + new_dict.pop(col_name, None) + new_dict["file"] = file_elem + separated_list.append(new_dict) + elif isinstance(meta_list, dict): + for file_elem in meta_list[col_name]: + new_dict = meta_list.copy() + new_dict.pop(col_name, None) + new_dict["file"] = file_elem + separated_list.append(new_dict) + else: + return TypeError("Incorrect type") + + return separated_list + + +def _update_columns( + metadata: dict, experiment_name: str, sample_name: str, read_type: str +) -> dict: + """ + Update the metadata associated with a particular experiment. + + For the experiment indicated, this function updates the value (mapping), + including new data and populating columns used by looper based on + existing values in the mapping. + + :param Mapping metadata: the key-value mapping to update + :param str experiment_name: name of the experiment from which these + data came and are associated; the key in the metadata mapping + for which the value is to be updated + :param str sample_name: name of the sample with which these data are + associated + :param str read_type: usually "single" or "paired," an indication of the + type of sequencing reads for this experiment + :return: updated metadata + """ + + exp = metadata[experiment_name] + + # Protocol-agnostic + exp["sample_name"] = sample_name + exp["protocol"] = exp["Sample_library_selection"] + exp["read_type"] = read_type + exp["organism"] = exp["Sample_organism_ch1"] + exp["data_source"] = "SRA" + exp["SRX"] = experiment_name + + # Protocol specified is lowercased prior to checking here to alleviate + # dependence on case for the value in the annotations file. + bisulfite_protocols = {"reduced representation": "RRBS", "random": "WGBS"} + + # Conditional on bisulfite sequencing + # print(":" + exp["Sample_library_strategy"] + ":") + # Try to be smart about some library methods, refining protocol if possible. + if exp["Sample_library_strategy"] == "Bisulfite-Seq": + # print("Parsing protocol") + proto = exp["Sample_library_selection"].lower() + if proto in bisulfite_protocols: + exp["protocol"] = bisulfite_protocols[proto] + + return exp + + +def _sanitize_config_string(text: str) -> str: + """ + Function that sanitizes text in config file. + :param text: Any string that have to be sanitized + :return: sanitized strings + """ + new_str = text + new_str = new_str.replace('"', f'\\"') + new_str = new_str.replace("'", f"''") + return new_str + + +def _sanitize_name(name_str: str) -> str: + """ + Function that sanitizes strings. (Replace all odd characters) + :param str name_str: Any string value that has to be sanitized. + :return: sanitized strings + """ + new_str = name_str + punctuation1 = r"""!"#$%&'()*,./:;<=>?@[\]^_`{|}~""" + for odd_char in list(punctuation1): + new_str = new_str.replace(odd_char, "_") + new_str = new_str.replace(" ", "_").replace("__", "_") + return new_str + + +def _create_dot_yaml(file_path: str, yaml_path: str) -> NoReturn: + """ + Function that creates .pep.yaml file that points to actual yaml file + :param str file_path: Path to the .pep.yaml file that we want to create + :param str yaml_path: path or name of the actual yaml file + """ + with open(file_path, "w+") as file: + file.writelines(f"config_file: {yaml_path}") + + +def _which(program: str): + """ + return str: the path to a program to make sure it exists + """ + import os + + def is_exe(fp): + return os.path.isfile(fp) and os.access(fp, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + path = path.strip('"') + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + +def _dict_to_list_converter( + proj_dict: Dict = None, proj_list: List = None +) -> Union[Dict, List]: + """ + Converter project dict to list and vice versa + dict -> list + list -> dict + :param proj_dict: project dictionary + :param proj_list: project list + :return: converted values + """ + if proj_dict is not None: + new_meta_list = [] + for key in proj_dict: + new_dict = proj_dict[key] + new_dict["big_key"] = key + new_meta_list.append(new_dict) + + meta_list = new_meta_list + + elif proj_list is not None: + new_sample_dict = {} + for sample in proj_list: + new_sample_dict[sample["big_key"]] = sample + meta_list = new_sample_dict + + else: + raise ValueError + + return meta_list + + +def _standardize_colnames(meta_list: Union[list, dict]) -> Union[list, dict]: + """ + Standardize column names by lower-casing and underscore + :param list meta_list: list of dictionaries of samples + :return : list of dictionaries of samples with standard colnames + """ + # check if meta_list is dict and converting it to list + input_is_dict = False + if isinstance(meta_list, dict): + input_is_dict = True + meta_list = _dict_to_list_converter(proj_dict=meta_list) + + new_metalist = [] + list_keys = _get_list_of_keys(meta_list) + for item_nb, values in enumerate(meta_list): + new_metalist.append({}) + for key in list_keys: + try: + new_key_name = key.lower().strip() + new_key_name = _sanitize_name(new_key_name) + + new_metalist[item_nb][new_key_name] = values[key] + + except KeyError: + pass + + if input_is_dict: + new_metalist = _dict_to_list_converter(proj_list=new_metalist) + + return new_metalist + + +def _separate_file_url(meta_list): + """ + This method is adding dict key without file_name without path + """ + separated_list = [] + for meta_elem in meta_list: + new_dict = meta_elem.copy() + new_dict["file_url"] = meta_elem["file"] + new_dict["file"] = os.path.basename(meta_elem["file"]) + # new_dict["sample_name"] = os.path.basename(meta_elem["file"]) + try: + new_dict["sample_name"] = str(meta_elem["Sample_title"]) + if new_dict["sample_name"] == "" or new_dict["sample_name"] is None: + raise KeyError("sample_name Does not exist. Creating .. ") + except KeyError: + new_dict["sample_name"] = os.path.basename(meta_elem["file"]) + + # sanitize sample names + new_dict["sample_name"] = _sanitize_name(new_dict["sample_name"]) + + separated_list.append(new_dict) + return separated_list + + +def _filter_gsm(meta_processed_samples: list, gsm_list: dict) -> list: + """ + Getting metadata list of all samples of one experiment and filtering it + by the list of GSM that was specified in the input files. + And then changing names of the sample names. + + :param meta_processed_samples: list of metadata dicts of samples + :param gsm_list: list of dicts where GSM (samples) are keys and + sample names are values. Where values can be empty string + """ + + if gsm_list.keys(): + new_gsm_list = [] + for gsm_sample in meta_processed_samples: + if gsm_sample["Sample_geo_accession"] in gsm_list.keys(): + gsm_sample_new = gsm_sample + if gsm_list[gsm_sample["Sample_geo_accession"]] != "": + gsm_sample_new["sample_name"] = gsm_list[ + gsm_sample["Sample_geo_accession"] + ] + new_gsm_list.append(gsm_sample_new) + return new_gsm_list + return meta_processed_samples + + +def _unify_list_keys(processed_meta_list: list) -> list: + """ + Unifying list of dicts with metadata, so every dict will have + same keys + + :param list processed_meta_list: list of dicts with metadata + :return list: list of unified dicts with metadata + """ + list_of_keys = _get_list_of_keys(processed_meta_list) + for k in list_of_keys: + for list_elem in range(len(processed_meta_list)): + if k not in processed_meta_list[list_elem]: + processed_meta_list[list_elem][k] = "" + return processed_meta_list From 924b5581586591e3ccac23e84fb0544298aa044b Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 25 Oct 2022 13:06:13 -0400 Subject: [PATCH 55/61] Adding comments to the config file --- geofetch/config_processed_template.yaml | 2 ++ geofetch/config_template.yaml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/geofetch/config_processed_template.yaml b/geofetch/config_processed_template.yaml index 9f8eb38..1198863 100644 --- a/geofetch/config_processed_template.yaml +++ b/geofetch/config_processed_template.yaml @@ -11,6 +11,8 @@ sample_modifiers: {additional_columns} # End of project metadata {pipeline_samples} + + # adding file paths to the project derive: attributes: [output_file_path] sources: diff --git a/geofetch/config_template.yaml b/geofetch/config_template.yaml index 59fcaa2..de5ccf5 100644 --- a/geofetch/config_template.yaml +++ b/geofetch/config_template.yaml @@ -12,6 +12,8 @@ sample_modifiers: # End of project metadata SRR_files: SRA {pipeline_samples} + + # Adding additional infromation to the project derive: attributes: [read1, read2, SRR_files] sources: From 88b251c4410797b67b0c5a8826d3f4e562ac62cc Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 25 Oct 2022 16:53:32 -0400 Subject: [PATCH 56/61] making SRA looper optional --- MANIFEST.in | 1 + geofetch/cli.py | 6 +++++ geofetch/config_template.yaml | 45 +------------------------------- geofetch/const.py | 1 + geofetch/geofetch.py | 24 ++++++++++++----- geofetch/looper_sra_convert.yaml | 45 ++++++++++++++++++++++++++++++++ 6 files changed, 71 insertions(+), 51 deletions(-) create mode 100644 geofetch/looper_sra_convert.yaml diff --git a/MANIFEST.in b/MANIFEST.in index e704c91..4f3018a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,4 @@ include README.md include docs/img/geofetch_logo.svg include geofetch/config_template.yaml include geofetch/config_processed_template.yaml +include geofetch/looper_sra_convert.yaml diff --git a/geofetch/cli.py b/geofetch/cli.py index 54ce083..1d5bcce 100644 --- a/geofetch/cli.py +++ b/geofetch/cli.py @@ -276,5 +276,11 @@ def _parse_cmdl(cmdl): help="Use just the keys defined in this module when writing out metadata.", ) + raw_group.add_argument( + "--add-convert-modifier", + action="store_true", + help="Add looper SRA convert modifier to config file.", + ) + logmuse.add_logging_options(parser) return parser.parse_args(cmdl) diff --git a/geofetch/config_template.yaml b/geofetch/config_template.yaml index de5ccf5..9de5e97 100644 --- a/geofetch/config_template.yaml +++ b/geofetch/config_template.yaml @@ -10,51 +10,8 @@ sample_modifiers: # Project metadata: {additional_columns} # End of project metadata - SRR_files: SRA {pipeline_samples} - # Adding additional infromation to the project - derive: - attributes: [read1, read2, SRR_files] - sources: - SRA: "${SRABAM}/{SRR}.bam" - FQ: "${SRAFQ}/{SRR}.fastq.gz" - FQ1: "${SRAFQ}/{SRR}_1.fastq.gz" - FQ2: "${SRAFQ}/{SRR}_2.fastq.gz" - imply: - - if: - organism: "Mus musculus" - then: - genome: mm10 - - if: - organism: "Homo sapiens" - then: - genome: hg38 - - if: - read_type: "PAIRED" - then: - read1: FQ1 - read2: FQ2 - - if: - read_type: "SINGLE" - then: - read1: FQ1 - -project_modifiers: - amend: - sra_convert: - looper: - results_subdir: sra_convert_results - sample_modifiers: - append: - SRR_files: SRA - pipeline_interfaces: ${CODE}/geofetch/pipeline_interface_convert.yaml - derive: - attributes: [read1, read2, SRR_files] - sources: - SRA: "${SRARAW}/{SRR}.sra" - FQ: "${SRAFQ}/{SRR}.fastq.gz" - FQ1: "${SRAFQ}/{SRR}_1.fastq.gz" - FQ2: "${SRAFQ}/{SRR}_2.fastq.gz" +{sra_convert} {pipeline_project} diff --git a/geofetch/const.py b/geofetch/const.py index 91669c3..2267223 100644 --- a/geofetch/const.py +++ b/geofetch/const.py @@ -46,6 +46,7 @@ CONFIG_PROCESSED_TEMPLATE_NAME = "config_processed_template.yaml" CONFIG_RAW_TEMPLATE_NAME = "config_template.yaml" +CONFIG_SRA_TEMPLATE = "looper_sra_convert.yaml" # const for Finder: RETMAX = 10000000 # once it should be increased diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 50d0a58..6122944 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -85,6 +85,7 @@ def __init__( discard_soft: bool = False, add_dotfile: bool = False, disable_progressbar: bool = False, + add_convert_modifier: bool = False, opts=None, **kwargs, ): @@ -143,6 +144,7 @@ def __init__( :param bam_conversion: Optional: set True to convert bam files [Works with raw data] :param picard_path: Specify a path to the picard jar, if you want to convert fastq to bam [Default: $PICARD:" + safe_echo("PICARD") + "] [Works with raw data] + :param add_convert_modifier: Add looper SRA convert modifier to config file. :param skip: Skip some accessions. [Default: no skip]. :param opts: opts object [Optional] @@ -244,7 +246,7 @@ def __init__( self.discard_soft = discard_soft self.add_dotfile = add_dotfile self.disable_progressbar = disable_progressbar - + self.add_convert_modifier = add_convert_modifier self._LOGGER.info(f"Metadata folder: {self.metadata_expanded}") # Some sanity checks before proceeding @@ -638,7 +640,7 @@ def _download_raw_data(self, run_name: str) -> NoReturn: # converting sra to bam using # TODO: sam-dump has a built-in prefetch. I don't have to do # any of this stuff... This also solves the bad sam-dump issues. - self._sra_bam_conversion1(bam_file, run_name) + self._sra_to_bam_conversion_sam_dump(bam_file, run_name) # checking if bam_file converted correctly, if not --> use fastq-dump st = os.stat(bam_file) @@ -646,7 +648,7 @@ def _download_raw_data(self, run_name: str) -> NoReturn: self._LOGGER.warning( "Bam conversion failed with sam-dump. Trying fastq-dump..." ) - self._sra_bam_conversion2(bam_file, run_name, self.picard_path) + self._sra_to_bam_conversion_fastq_damp(bam_file, run_name, self.picard_path) except FileNotFoundError as err: self._LOGGER.info( @@ -1145,9 +1147,15 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): ] modifiers_str = "\n ".join(d for d in meta_list_str) # Write project config file + geofetchdir = os.path.dirname(__file__) if not self.config_template: - geofetchdir = os.path.dirname(__file__) self.config_template = os.path.join(geofetchdir, CONFIG_RAW_TEMPLATE_NAME) + if self.add_convert_modifier: + sra_convert_path = os.path.join(geofetchdir, CONFIG_SRA_TEMPLATE) + with open(sra_convert_path, "r") as template_file: + sra_convert_template = template_file.read() + else: + sra_convert_template = "" with open(self.config_template, "r") as template_file: template = template_file.read() template_values = { @@ -1157,13 +1165,15 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): "pipeline_samples": self.file_pipeline_samples, "pipeline_project": self.file_pipeline_project, "additional_columns": modifiers_str, + "sra_convert": sra_convert_template, } for k, v in template_values.items(): placeholder = "{" + str(k) + "}" template = template.replace(placeholder, str(v)) return template - def _check_sample_name_standard(self, metadata_dict: dict) -> dict: + @staticmethod + def _check_sample_name_standard(metadata_dict: dict) -> dict: """ Standardizing sample name and checking if it exists (This function is used for raw data) @@ -1291,7 +1301,7 @@ def _download_SRA_file(self, run_name: str): ) time.sleep(t * 2) - def _sra_bam_conversion1(self, bam_file: str, run_name: str) -> NoReturn: + def _sra_to_bam_conversion_sam_dump(self, bam_file: str, run_name: str) -> NoReturn: """ Converting of SRA file to BAM file by using samtools function "sam-dump" :param str bam_file: path to BAM file that has to be created @@ -1315,7 +1325,7 @@ def _sra_bam_conversion1(self, bam_file: str, run_name: str) -> NoReturn: self._LOGGER.info(f"Conversion command: {cmd}") run_subprocess(cmd, shell=True) - def _sra_bam_conversion2( + def _sra_to_bam_conversion_fastq_damp( self, bam_file: str, run_name: str, picard_path: str = None ) -> NoReturn: """ diff --git a/geofetch/looper_sra_convert.yaml b/geofetch/looper_sra_convert.yaml new file mode 100644 index 0000000..bf5905d --- /dev/null +++ b/geofetch/looper_sra_convert.yaml @@ -0,0 +1,45 @@ + # Adding sra convert looper pipeline + SRR_files: SRA + + derive: + attributes: [read1, read2, SRR_files] + sources: + SRA: "${SRABAM}/{SRR}.bam" + FQ: "${SRAFQ}/{SRR}.fastq.gz" + FQ1: "${SRAFQ}/{SRR}_1.fastq.gz" + FQ2: "${SRAFQ}/{SRR}_2.fastq.gz" + imply: + - if: + organism: "Mus musculus" + then: + genome: mm10 + - if: + organism: "Homo sapiens" + then: + genome: hg38 + - if: + read_type: "PAIRED" + then: + read1: FQ1 + read2: FQ2 + - if: + read_type: "SINGLE" + then: + read1: FQ1 + +project_modifiers: + amend: + sra_convert: + looper: + results_subdir: sra_convert_results + sample_modifiers: + append: + SRR_files: SRA + pipeline_interfaces: ${CODE}/geofetch/pipeline_interface_convert.yaml + derive: + attributes: [read1, read2, SRR_files] + sources: + SRA: "${SRARAW}/{SRR}.sra" + FQ: "${SRAFQ}/{SRR}.fastq.gz" + FQ1: "${SRAFQ}/{SRR}_1.fastq.gz" + FQ2: "${SRAFQ}/{SRR}_2.fastq.gz" From 360356a2c47fad08914815e150aad6e93c50b545 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 25 Oct 2022 21:52:01 -0400 Subject: [PATCH 57/61] docstring corrections --- geofetch/finder.py | 25 +++++++++---------------- geofetch/geofetch.py | 41 +++++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/geofetch/finder.py b/geofetch/finder.py index eeb105b..0cbd460 100644 --- a/geofetch/finder.py +++ b/geofetch/finder.py @@ -27,16 +27,17 @@ class Finder: """ - Class for finding GSE accessions in special period of time - Additionally user can add specific filters for the search. + Class for finding GSE accessions in special period of time. + Additionally, user can add specific filters for the search, + while initialization of the class """ def __init__(self, filters: str = None, retmax: int = RETMAX): """ - :param filters: filters that have to be added to query. + :param filters: filters that have to be added to the query. Filter Patterns can be found here: https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Using_the_Advanced_Search_Pag - :param retmax: maximum items should be retrieved + :param retmax: maximum number of retrieved accessions. """ self.query_customized_ending = ETOOLS_ENDING.format(retmax=retmax) self.query_filter_str = self._create_filter_str(filters) @@ -65,8 +66,8 @@ def get_gse_last_week(self) -> list: def get_gse_by_day_count(self, n_days: int = 1) -> list: """ - Get list of gse accession that were uploaded or updated in last specified number of days - :param n_days: number of days from now + Get list of gse accessions that were uploaded or updated in last X days + :param n_days: number of days from now [e.g. 5] :return: list of gse accession """ today = datetime.today() @@ -88,7 +89,7 @@ def get_gse_by_date(self, start_date: str, end_date: str = None) -> list: def get_gse_id_by_query(self, url: str) -> list: """ - Use esearch query to find uids and then convert them to gse ids + Run esearch (ncbi search tool) by specifying URL and retrieve gse list result :param url: url of the query :return: list of gse ids """ @@ -101,20 +102,12 @@ def get_gse_id_by_query(self, url: str) -> list: def uid_to_gse(uid: str) -> str: """ UID to GES accession converter - :param uid: uid string + :param uid: uid string (Unique Identifier Number in GEO) :return: GSE id string """ uid_regex = re.compile(r"[1-9]+0+([1-9]+[0-9]*)") return "GSE" + uid_regex.match(uid).group(1) - def read_file(self, file_path: str) -> list: - """ - Getting list of gse's from file by specifying a file path - :param file_path: path to the file - :return: list of gse's - """ - pass - @staticmethod def find_differences(old_list: list, new_list: list) -> list: """ diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 6122944..1c2e6cc 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -327,7 +327,8 @@ def get_projects( def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Project]: """ - Main script driver/workflow + Main function driver/workflow + Function that search, filters, downloads and save data and metadata from GEO and SRA :param input: GSE or input file with gse's :param name: Name of the project :return: NoReturn or peppy Project @@ -423,7 +424,7 @@ def fetch_all(self, input: str, name: str = None) -> Union[NoReturn, peppy.Proje ( meta_processed_samples, meta_processed_series, - ) = self.fetchone_processed( + ) = self.fetch_processed_one( gse_file_content=file_gse_content, gsm_file_content=file_gsm_content, gsm_filter_list=gsm_enter_dict, @@ -648,21 +649,23 @@ def _download_raw_data(self, run_name: str) -> NoReturn: self._LOGGER.warning( "Bam conversion failed with sam-dump. Trying fastq-dump..." ) - self._sra_to_bam_conversion_fastq_damp(bam_file, run_name, self.picard_path) + self._sra_to_bam_conversion_fastq_damp( + bam_file, run_name, self.picard_path + ) except FileNotFoundError as err: self._LOGGER.info( f"SRA file doesn't exist, please download it first: {err}" ) - def fetchone_processed( + def fetch_processed_one( self, gse_file_content: list, gsm_file_content: list, gsm_filter_list: dict, ) -> Tuple: """ - Fetching one processed GSE project + Fetching one processed GSE project and return its metadata :param gsm_file_content: gse soft file content :param gse_file_content: gsm soft file content :param gsm_filter_list: list of gsm that have to be downloaded @@ -755,7 +758,7 @@ def _download_processed_data( self, acc_gse: str, meta_processed_samples: list, meta_processed_series: list ) -> NoReturn: """ - Function that downloads processed data + Download processed data from GEO by providing project annotation list :param acc_gse: accession number of the project :param meta_processed_samples: list of annotation of samples :param meta_processed_series: list of annotation of series @@ -791,7 +794,7 @@ def _download_processed_data( for file_url in processed_series_files: self._download_processed_file(file_url, data_geo_folder) - def _expand_metadata_list_in_dict(self, metadata_dict: dict) -> dict: + def _expand_metadata_dict(self, metadata_dict: dict) -> dict: """ Expanding all lists of all items in the dict by creating new items or joining them @@ -1106,7 +1109,7 @@ def _create_config_processed( self, file_annotation_path: str, proj_meta: list ) -> str: """ - completing and generating config file content + Composing and generating config file content :param file_annotation_path: root to the annotation file :param proj_meta: common metadata that has to added to config file :return: generated, complete config file content @@ -1135,7 +1138,7 @@ def _create_config_processed( def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): """ - completing and generating config file content for raw data + Composing and generating config file content for raw data :param proj_meta: root to the annotation file :param proj_root_sample: path to sampletable file :param subanot_path_yaml: path to subannotation file @@ -1201,7 +1204,7 @@ def _separate_common_meta( attr_limit_truncate: int = 500, ) -> tuple: """ - This function is separating information for the experiment from a sample + This function is separating experiment(project) metadata from sample metadata :param list or dict meta_list: list of dictionaries of samples :param int max_len: threshold of the length of the common value that can be stored in the sample table :param int del_limit: threshold of the length of the common value that have to be deleted @@ -1399,7 +1402,7 @@ def _download_file( self, file_url: str, data_folder: str, new_name: str = None, sleep_after=0.5 ) -> NoReturn: """ - Given an url for a file, downloading to specified folder + Given an url for a file, downloading file to specified folder :param str file_url: the URL of the file to download :param str data_folder: path to the folder where data should be downloaded :param float sleep_after: time to sleep after downloading @@ -1594,9 +1597,12 @@ def _get_list_of_processed_files( return meta_processed_samples, meta_processed_series - def _run_filter(self, meta_list, col_name="file"): + def _run_filter(self, meta_list: list, col_name: str = "file") -> list: """ - If user specified filter it will filter all this files here by col_name + Filters files and metadata using Regular expression filter + :param meta_list: list of composed metadata + :param col_name: name of the column where file names are stored + :return: metadata list after file_name filter """ filtered_list = [] for meta_elem in meta_list: @@ -1611,7 +1617,10 @@ def _run_filter(self, meta_list, col_name="file"): def _run_size_filter(self, meta_list, col_name="file_size"): """ - function for filtering file size + Filters files and metadata by file size column specified in meta_list + :param meta_list: list of composed metadata + :param col_name: name of the column where is size information stored + :return: metadata list after size filter """ if self.filter_size is not None: filtered_list = [] @@ -1756,7 +1765,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): def _get_SRP_list(self, srp_number: str) -> list: """ - By using requests and xml searching and getting list of dicts of SRRs + Getting list of srp by using requests and xml searching and getting list of dicts of SRRs :param str srp_number: SRP number :return: list of dicts of SRRs """ @@ -1879,7 +1888,7 @@ def _read_gsm_metadata( current_sample_srx = True # GSM SOFT file parsed, save it in a list self._LOGGER.info(f"Processed {len(samples_list)} samples.") - gsm_metadata = self._expand_metadata_list_in_dict(gsm_metadata) + gsm_metadata = self._expand_metadata_dict(gsm_metadata) return gsm_metadata def _write( From 54107e327575e00e3c45d82b5195012cda0dd4c3 Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 25 Oct 2022 22:21:24 -0400 Subject: [PATCH 58/61] docstring corrections 2 --- geofetch/finder.py | 8 ++++---- geofetch/geofetch.py | 32 ++++++++++++++++---------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/geofetch/finder.py b/geofetch/finder.py index 0cbd460..1882e11 100644 --- a/geofetch/finder.py +++ b/geofetch/finder.py @@ -111,7 +111,7 @@ def uid_to_gse(uid: str) -> str: @staticmethod def find_differences(old_list: list, new_list: list) -> list: """ - Comparing 2 lists and searching for elements that are not in old list + Compare 2 lists and search for elements that are not in old list :param old_list: old list of elements :param new_list: new list of elements :return: list of elements that are not in old list but are in new_list @@ -141,7 +141,7 @@ def _run_search_query(url: str) -> list: @staticmethod def _create_filter_str(filters: str = None) -> str: """ - Tuning filter for url request + Tune filter for url request :param filters: filter should look like here: https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Using_the_Advanced_Search_Pag :return: tuned filter string """ @@ -151,7 +151,7 @@ def _create_filter_str(filters: str = None) -> str: def _compose_url(self, date_filter: str = None) -> str: """ - Composing final url by adding date filter + Compose final url by adding date filter :param date_filter: date filter that has to be used in the query :return: string of final url """ @@ -162,7 +162,7 @@ def _compose_url(self, date_filter: str = None) -> str: def generate_file(self, file_path: str, gse_list: list = None): """ - Saving list of gse numbers to the file + Save the list of GSE accessions stored in this Finder object to a given file :param file_path: root to the file where gse accessions have to be saved :param gse_list: list of gse accessions :return: NoReturn diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index 1c2e6cc..eb6c144 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -527,7 +527,7 @@ def _process_sra_meta( gsm_metadata: dict = None, ): """ - Creating srp multitable and updating gsm_metadata based on srp + Create srp multitable and update gsm_metadata based on srp :param srp_list_result: list of srp got from sra file :param gsm_enter_dict: gsm enter content :param gsm_metadata: dict of samples of gsm @@ -609,7 +609,7 @@ def _process_sra_meta( def _download_raw_data(self, run_name: str) -> NoReturn: """ - Downloading raw data from SRA by providing run name + Downloade raw data from SRA by providing run name :param run_name: Run name from SRA :return: NoReturn """ @@ -665,7 +665,7 @@ def fetch_processed_one( gsm_filter_list: dict, ) -> Tuple: """ - Fetching one processed GSE project and return its metadata + Fetche one processed GSE project and return its metadata :param gsm_file_content: gse soft file content :param gse_file_content: gsm soft file content :param gsm_filter_list: list of gsm that have to be downloaded @@ -796,7 +796,7 @@ def _download_processed_data( def _expand_metadata_dict(self, metadata_dict: dict) -> dict: """ - Expanding all lists of all items in the dict by creating new items or joining them + Expand all lists of all items in the dict by creating new items or joining them :param metadata_dict: metadata dict :return: expanded metadata dict @@ -820,7 +820,7 @@ def _expand_metadata_list(self, metadata_list: list) -> list: def _expand_metadata_list_item(self, metadata_list: list, dict_key: str): """ - Expanding list of one element (item) in the list by creating new items or joining them + Expand list of one element (item) in the list by creating new items or joining them ["first1: fff", ...] -> separate columns :param list metadata_list: list of dicts that store metadata @@ -928,7 +928,7 @@ def _write_processed_annotation( just_object: bool = False, ) -> Union[NoReturn, peppy.Project]: """ - Saving annotation file by providing list of dictionaries with files metadata + Save annotation file by providing list of dictionaries with files metadata :param list processed_metadata: list of dictionaries with files metadata :param str file_annotation_path: the path to the metadata file that has to be saved :type just_object: True, if you want to get peppy object without saving file @@ -1016,7 +1016,7 @@ def _write_raw_annotation_new( self, name, metadata_dict: dict, subannot_dict: dict = None ) -> Union[None, peppy.Project]: """ - Combining individual accessions into project-level annotations, and writing + Combine individual accessions into project-level annotations, and writing individual accession files (if requested) :param name: Name of the run, project, or acc --> will influence name of the folder where project will be created :param metadata_dict: dictionary of sample annotations @@ -1109,7 +1109,7 @@ def _create_config_processed( self, file_annotation_path: str, proj_meta: list ) -> str: """ - Composing and generating config file content + Compose and generate config file content :param file_annotation_path: root to the annotation file :param proj_meta: common metadata that has to added to config file :return: generated, complete config file content @@ -1138,7 +1138,7 @@ def _create_config_processed( def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): """ - Composing and generating config file content for raw data + Compose and generate config file content for raw data :param proj_meta: root to the annotation file :param proj_root_sample: path to sampletable file :param subanot_path_yaml: path to subannotation file @@ -1178,7 +1178,7 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): @staticmethod def _check_sample_name_standard(metadata_dict: dict) -> dict: """ - Standardizing sample name and checking if it exists + Standardize sample name and checking if it exists (This function is used for raw data) :param metadata_dict: metadata dict :return: metadata dict with standardize sample names @@ -1204,7 +1204,7 @@ def _separate_common_meta( attr_limit_truncate: int = 500, ) -> tuple: """ - This function is separating experiment(project) metadata from sample metadata + Separate experiment(project) metadata from sample metadata :param list or dict meta_list: list of dictionaries of samples :param int max_len: threshold of the length of the common value that can be stored in the sample table :param int del_limit: threshold of the length of the common value that have to be deleted @@ -1278,7 +1278,7 @@ def _separate_common_meta( def _download_SRA_file(self, run_name: str): """ - Downloading SRA file by ising 'prefetch' utility from the SRA Toolkit + Download SRA file by ising 'prefetch' utility from the SRA Toolkit more info: (http://www.ncbi.nlm.nih.gov/books/NBK242621/) :param str run_name: SRR number of the SRA file """ @@ -1306,7 +1306,7 @@ def _download_SRA_file(self, run_name: str): def _sra_to_bam_conversion_sam_dump(self, bam_file: str, run_name: str) -> NoReturn: """ - Converting of SRA file to BAM file by using samtools function "sam-dump" + Convert SRA file to BAM file by using samtools function "sam-dump" :param str bam_file: path to BAM file that has to be created :param str run_name: SRR number of the SRA file that has to be converted """ @@ -1332,7 +1332,7 @@ def _sra_to_bam_conversion_fastq_damp( self, bam_file: str, run_name: str, picard_path: str = None ) -> NoReturn: """ - Converting of SRA file to BAM file by using fastq-dump + Convert SRA file to BAM file by using fastq-dump (is used when sam-dump fails, yielding an empty bam file. Here fastq -> bam conversion is used) :param str bam_file: path to BAM file that has to be created :param str run_name: SRR number of the SRA file that has to be converted @@ -1373,7 +1373,7 @@ def _write_subannotation( self, tabular_data: dict, filepath: str, column_names: list = None ): """ - Writes one or more tables to a given CSV filepath. + Write one or more tables to a given CSV filepath. :param tabular_data: Mapping | Iterable[Mapping]: single KV pair collection, or collection of such collections, to write to disk as tabular data @@ -1765,7 +1765,7 @@ def _get_SRA_meta(self, file_gse_content: list, gsm_metadata, file_sra=None): def _get_SRP_list(self, srp_number: str) -> list: """ - Getting list of srp by using requests and xml searching and getting list of dicts of SRRs + Get a list of srp by using requests and xml searching and getting list of dicts of SRRs :param str srp_number: SRP number :return: list of dicts of SRRs """ From 6ef994660bec583cd90027eff0dfe38b74fb0bdc Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Oct 2022 16:00:13 -0400 Subject: [PATCH 59/61] raw config correction --- geofetch/config_template.yaml | 3 +-- geofetch/geofetch.py | 6 ++++++ mkdocs.yml | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/geofetch/config_template.yaml b/geofetch/config_template.yaml index 9de5e97..588d81a 100644 --- a/geofetch/config_template.yaml +++ b/geofetch/config_template.yaml @@ -5,8 +5,7 @@ pep_version: 2.1.0 sample_table: {annotation} {subannotation} -sample_modifiers: - append: +{sample_modifier_str} # Project metadata: {additional_columns} # End of project metadata diff --git a/geofetch/geofetch.py b/geofetch/geofetch.py index eb6c144..ecb0480 100755 --- a/geofetch/geofetch.py +++ b/geofetch/geofetch.py @@ -1151,6 +1151,11 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): modifiers_str = "\n ".join(d for d in meta_list_str) # Write project config file geofetchdir = os.path.dirname(__file__) + + if self.file_pipeline_samples or modifiers_str != "": + sample_modifier_str = "sample_modifiers:\n append:" + else: + sample_modifier_str = "" if not self.config_template: self.config_template = os.path.join(geofetchdir, CONFIG_RAW_TEMPLATE_NAME) if self.add_convert_modifier: @@ -1165,6 +1170,7 @@ def _create_config_raw(self, proj_meta, proj_root_sample, subanot_path_yaml): "project_name": self.project_name, "annotation": os.path.basename(proj_root_sample), "subannotation": subanot_path_yaml, + "sample_modifier_str": sample_modifier_str, "pipeline_samples": self.file_pipeline_samples, "pipeline_project": self.file_pipeline_project, "additional_columns": modifiers_str, diff --git a/mkdocs.yml b/mkdocs.yml index 69b5a07..e6bb189 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,8 +12,8 @@ nav: - Tutorials: - Tutorial for processed data: processed-data-downloading.md - Tutorial for raw data: raw-data-downloading.md - - Python geofetch tutorial: python-usage.md - - Use GSE Finder: gse_finder.md + - geofetch API tutorial: python-usage.md + - GSE Finder: gse_finder.md - How-to Guides: - Specifying samples to download: file-specification.md - Set SRA data download location: howto-location.md From 25fca873cb1a1e94ea7d544d94a807ab1bdc3fbb Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Oct 2022 17:23:07 -0400 Subject: [PATCH 60/61] Corrected readme files --- docs/README.md | 6 ++++-- docs/changelog.md | 12 +++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/docs/README.md b/docs/README.md index b2e711b..7b87b32 100644 --- a/docs/README.md +++ b/docs/README.md @@ -21,7 +21,9 @@ Key geofetch advantages: - Standardizes output metadata - Filters type and size of processed files (from GEO) before downloading them - Easy to use -- Fast execution +- Fast execution time +- Can search GEO to find relevant data +- Can be used either as a command-line tool or from within Python using an API ## Quick example @@ -54,7 +56,7 @@ geofetch -i GSE95654 --processed --just-metadata ![](./img/arguments_outputs.svg) --- -### New features available in geofetch 0.11.0 : +### New features available in geofetch 0.11.0: 1) Now geofetch is available as Python API package. Geofetch can initialize [peppy](http://peppy.databio.org/) projects without downloading any soft files. Example: ```python diff --git a/docs/changelog.md b/docs/changelog.md index e5d3406..7f2ef68 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,15 +1,17 @@ # Changelog -## [0.11.0] -- 2022-09-06 -- Added initialization of peppy Project without saving any files +## [0.11.0] -- 2022-10-26 +- Added initialization of peppy Project without saving any files (from within Python using an API) - Added Finder (searching GSE tool) - Added progress bar -- Fixed None issue in config file - Switched way of saving soft files to request library -- Fixed saving raw peps bug -- Fixed config errors - Improved documentation - Refactored code +- Added `--add-convert-modifier` flag +- fixed looper amendments in the config file +- Fixed special character bug in the config file +- Fixed None issue in config file +- Fixed saving raw peps bug ## [0.10.1] -- 2022-08-04 - Updated metadata fetching requests from SRA database From b91329be7f3a76aa5fd9d4699e145634d0b40bce Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Wed, 26 Oct 2022 17:30:36 -0400 Subject: [PATCH 61/61] Corrected readme files 2 --- mkdocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index e6bb189..dfc6c83 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,7 +12,7 @@ nav: - Tutorials: - Tutorial for processed data: processed-data-downloading.md - Tutorial for raw data: raw-data-downloading.md - - geofetch API tutorial: python-usage.md + - geofetch from within Python: python-usage.md - GSE Finder: gse_finder.md - How-to Guides: - Specifying samples to download: file-specification.md