diff --git a/relecov_tools/ena_upload.py b/relecov_tools/ena_upload.py
new file mode 100644
index 00000000..62b60c2a
--- /dev/null
+++ b/relecov_tools/ena_upload.py
@@ -0,0 +1,149 @@
+import os
+import logging
+import rich.console
+from email import utils
+import json as j
+import xml.etree.cElementTree as e
+
+import relecov_tools.utils
+
+log = logging.getLogger(__name__)
+stderr = rich.console.Console(
+ stderr=True,
+ style="dim",
+ highlight=False,
+ force_terminal=relecov_tools.utils.rich_force_colors(),
+)
+
+
+class XmlCreation:
+ def __init__(self, source_json=None, output_path=None, action=None):
+ if source_json is None:
+ self.source_json = utils.prompt_source_path()
+ else:
+ self.source_json = source_json
+ if output_path is None:
+ self.output_path = utils.prompt_destination_path()
+ else:
+ self.output_path = output_path
+ if action is None:
+ self.action = "ADD"
+ else:
+ self.action = action
+
+ def xml_study(
+ self,
+ ):
+ """
+ 1.From validated json to xml study- submission.xml and project.xml
+ 1.1 Upload study info
+
+ 2. From validated json to xml samples - submission.xml and samples.xml
+ 2.2 Upload samples info
+
+ 3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
+ 4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest
+ """
+
+ # Load validated json
+ with open(self.source_json) as json_format_file:
+ json_data = j.load(json_format_file)
+
+ # Create output directory
+
+ try:
+ # Create target Directory
+ os.mkdir(self.output_path)
+ print("Directory ", self.output_path, " Created ")
+ except FileExistsError:
+ print("Directory ", self.output_path, " already exists")
+
+ # 1. From validated json to xml study- submission.xml and project.xml
+
+ # submission.xml
+ os.chdir("xml_files/")
+ if self.action.upper == "ADD":
+ # submission add
+ submission_file = "submission_add.xml"
+ if self.action.upper() == "MODIFY":
+ # submission modify
+ submission_file = "submission_modify.xml"
+
+ # project_relecov.xml
+ os.chdir("../conf")
+ dict_conf = j.loads("configuration.json")
+ r = e.Element("PROJECT_SET")
+ project = e.SubElement(r, "PROJECT")
+ project.set("alias", dict_conf["project_relecov_xml"]["alias"])
+ e.SubElement(project, "TITLE").text = dict_conf["project_relecov_xml"][
+ "TITLE"
+ ]
+ e.SubElement(project, "DESCRIPTION").text = dict_conf[
+ "project_relecov_xml"
+ ]["DESCRIPTION"]
+ submission = e.SubElement(project, "SUBMISSION_PROJECT")
+ e.SubElement(submission, "SEQUENCING_PROJECT")
+ a = e.ElementTree(r)
+ a.write(os.path.join(self.output_path, "study", "project_relecov.xml"))
+
+ # 1.1 Upload study info
+ # 2. From validated json to xml samples - submission.xml and samples.xml
+
+ def xml_samples():
+ # submission.xml
+ os.chdir("../xml_files/")
+ if self.action.upper == "ADD":
+ # submission add
+ submission_file = "submission_add.xml"
+ if self.action.upper() == "MODIFY":
+ # submission modify
+ submission_file = "submission_modify.xml"
+
+ # samples_relecov.xml
+ os.chdir("../schema/")
+ json_data = j.loads("to_ena.json")
+ os.chdir("../conf")
+ dict_conf = j.loads("configuration.json")
+
+ data_keys = list(json_data.keys())
+ r = e.Element("SAMPLE_SET")
+ sample = e.SubElement(r, "SAMPLE")
+ sample.set(
+ "alias",
+ "Programmatic Test SARS-CoV-2 Sample" + str(json_data["sample_name"]),
+ )
+ e.SubElement(sample, "TITLE").text = "SARS-CoV-2 Sample" + str(
+ json_data["sample_name"]
+ )
+ sample_name = e.SubElement(sample, "SAMPLE_NAME")
+ e.SubElement(sample_name, "TAXON_ID").text = dict_conf["fixed_data"][
+ "tax_id"
+ ]
+ e.SubElement(sample_name, "SCIENTIFIC_NAME").text = dict_conf["fixed_data"][
+ "scientific_name"
+ ]
+ e.SubElement(sample, "DESCRIPTION").text = "SARS-CoV-2 Sample" + str(
+ json_data["sample_name"]
+ )
+ sample_attributes = e.SubElement(sample, "SAMPLE_ATTRIBUTES")
+ for i in json_data:
+ sample_attribute = e.SubElement(sample_attributes, "SAMPLE_ATTRIBUTE")
+ e.SubElement(sample_attribute, "TAG").text = str(i)
+ e.SubElement(sample_attribute, "VALUE").text = json_data[i]
+ a = e.ElementTree(r)
+ a.write(os.path.join(self.output_path, "samples", "samples_relecov.xml"))
+
+ # 2.2 Upload samples info
+
+ # 3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
+ # 4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest
+
+
+# Adaptation to ena_upload
+with open('../example_data/ena_upload.json','r') as f:
+ data = j.loads(f.read())
+
+df_study = pd.DataFrame.from_dict(data["study"])
+df_samples = pd.DataFrame.from_dict(data["samples"])
+df_runs = pd.DataFrame.from_dict(data["runs"])
+df_experiments = pd.DataFrame.from_dict(data["experiments"])
\ No newline at end of file
diff --git a/relecov_tools/ena_upload_buisciii.py b/relecov_tools/ena_upload_buisciii.py
new file mode 100644
index 00000000..226f04a3
--- /dev/null
+++ b/relecov_tools/ena_upload_buisciii.py
@@ -0,0 +1,174 @@
+import os
+import logging
+import rich.console
+from email import utils
+import json as j
+import xml.etree.cElementTree as e
+
+import relecov_tools.utils
+
+log = logging.getLogger(__name__)
+stderr = rich.console.Console(
+ stderr=True,
+ style="dim",
+ highlight=False,
+ force_terminal=relecov_tools.utils.rich_force_colors(),
+)
+
+
+class XmlCreation:
+ def __init__(self, source_json=None, output_path=None, action=None):
+ if source_json is None:
+ self.source_json = utils.prompt_source_path()
+ else:
+ self.source_json = source_json
+ if output_path is None:
+ self.output_path = utils.prompt_destination_path()
+ else:
+ self.output_path = output_path
+ if action is None:
+ self.action = "ADD"
+ else:
+ self.action = action
+
+ def xml_study(
+ self,
+ ):
+ """
+ 1.From validated json to xml study- submission.xml and project.xml
+ 1.1 Upload study info
+
+ 2. From validated json to xml samples - submission.xml and samples.xml
+ 2.2 Upload samples info
+
+ 3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
+ 4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest
+ """
+
+ # Load validated json
+ with open(self.source_json) as json_format_file:
+ json_data = j.load(json_format_file)
+
+ # Create output directory
+
+ try:
+ # Create target Directory
+ os.mkdir(self.output_path)
+ print("Directory ", self.output_path, " Created ")
+ except FileExistsError:
+ print("Directory ", self.output_path, " already exists")
+
+ # 1. From validated json to xml study- submission.xml and project.xml
+
+ # submission.xml
+ os.chdir("xml_files/")
+ if self.action.upper == "ADD":
+ # submission add
+ submission_file = "submission_add.xml"
+ if self.action.upper() == "MODIFY":
+ # submission modify
+ submission_file = "submission_modify.xml"
+
+ # project_relecov.xml
+ os.chdir("../conf")
+ dict_conf = j.loads("configuration.json")
+ r = e.Element("PROJECT_SET")
+ project = e.SubElement(r, "PROJECT")
+ project.set("alias", dict_conf["project_relecov_xml"]["alias"])
+ e.SubElement(project, "TITLE").text = dict_conf["project_relecov_xml"][
+ "TITLE"
+ ]
+ e.SubElement(project, "DESCRIPTION").text = dict_conf[
+ "project_relecov_xml"
+ ]["DESCRIPTION"]
+ submission = e.SubElement(project, "SUBMISSION_PROJECT")
+ e.SubElement(submission, "SEQUENCING_PROJECT")
+ a = e.ElementTree(r)
+ a.write(os.path.join(self.output_path, "study", "project_relecov.xml"))
+
+ # 1.1 Upload study info
+ """
+ import requests
+ from requests.structures import CaseInsensitiveDict
+
+ url = "https://reqbin.com/echo/post/json"
+
+ headers = CaseInsensitiveDict()
+ headers["Content-Type"] = "application/json"
+ headers["Authorization"] = "Basic bG9naW46cGFzc3dvcmQ="
+
+ data = '{"login":"my_login","password":"my_password"}'
+
+
+ resp = requests.post(url, headers=headers, data=data)
+
+ print(resp.status_code)
+ """
+
+ # 2. From validated json to xml samples - submission.xml and samples.xml
+
+ def xml_samples():
+ # submission.xml
+ os.chdir("../xml_files/")
+ if self.action.upper == "ADD":
+ # submission add
+ submission_file = "submission_add.xml"
+ if self.action.upper() == "MODIFY":
+ # submission modify
+ submission_file = "submission_modify.xml"
+
+ # samples_relecov.xml
+ os.chdir("../schema/")
+ json_data = j.loads("to_ena.json")
+ os.chdir("../conf")
+ dict_conf = j.loads("configuration.json")
+
+ data_keys = list(json_data.keys())
+ r = e.Element("SAMPLE_SET")
+ sample = e.SubElement(r, "SAMPLE")
+ sample.set(
+ "alias",
+ "Programmatic Test SARS-CoV-2 Sample" + str(json_data["sample_name"]),
+ )
+ e.SubElement(sample, "TITLE").text = "SARS-CoV-2 Sample" + str(
+ json_data["sample_name"]
+ )
+ sample_name = e.SubElement(sample, "SAMPLE_NAME")
+ e.SubElement(sample_name, "TAXON_ID").text = dict_conf["fixed_data"][
+ "tax_id"
+ ]
+ e.SubElement(sample_name, "SCIENTIFIC_NAME").text = dict_conf["fixed_data"][
+ "scientific_name"
+ ]
+ e.SubElement(sample, "DESCRIPTION").text = "SARS-CoV-2 Sample" + str(
+ json_data["sample_name"]
+ )
+ sample_attributes = e.SubElement(sample, "SAMPLE_ATTRIBUTES")
+ for i in json_data:
+ sample_attribute = e.SubElement(sample_attributes, "SAMPLE_ATTRIBUTE")
+ e.SubElement(sample_attribute, "TAG").text = str(i)
+ e.SubElement(sample_attribute, "VALUE").text = json_data[i]
+ a = e.ElementTree(r)
+ a.write(os.path.join(self.output_path, "samples", "samples_relecov.xml"))
+
+ # 2.2 Upload samples info
+ """
+ import requests
+ from requests.structures import CaseInsensitiveDict
+
+ url = "https://reqbin.com/echo/post/json"
+
+ headers = CaseInsensitiveDict()
+ headers["Content-Type"] = "application/json"
+ headers["Authorization"] = "Basic bG9naW46cGFzc3dvcmQ="
+
+ data = '{"login":"my_login","password":"my_password"}'
+
+
+ resp = requests.post(url, headers=headers, data=data)
+
+ print(resp.status_code)
+ """
+
+ # 3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
+ # 4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest
diff --git a/relecov_tools/example_data/ena_upload.json b/relecov_tools/example_data/ena_upload.json
index e6689ca8..92aaefb9 100644
--- a/relecov_tools/example_data/ena_upload.json
+++ b/relecov_tools/example_data/ena_upload.json
@@ -16,7 +16,7 @@
"study_abstract_1",
"study_abstract_2"
],
- "pubmed_id": [None,
+ "pubmed_id": ["None",
"pubmed_id_2"
]
},
@@ -157,7 +157,7 @@
"single"
],
"insert_size": [
- "250",None,None
+ "250","None","None"
],
"library_construction_protocol": [
"library_construction_protocol_1",
diff --git a/relecov_tools/test/try_01.ipynb b/relecov_tools/test/try_01.ipynb
new file mode 100644
index 00000000..df0f0e80
--- /dev/null
+++ b/relecov_tools/test/try_01.ipynb
@@ -0,0 +1,261 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "aef6b5c1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#json to dataframe\n",
+ "import pandas as pd\n",
+ "\n",
+ "df = pd.read_json('../example_data/ena_upload_study.json')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "8cf882d3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " study | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " alias | \n",
+ " [study_alias_4, study_alias_5] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " study\n",
+ "alias [study_alias_4, study_alias_5]"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "95cf4dd2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(2,)"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "lista = data[\"study\"][\"alias\"]\n",
+ "import numpy as np\n",
+ "a = np.array(lista)\n",
+ "a.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "f1a7dcf1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'study': {'alias': ['study_alias_4', 'study_alias_5']}}"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "6f6c7f87",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import json\n",
+ "# load data using Python JSON module\n",
+ "with open('../example_data/ena_upload.json','r') as f:\n",
+ " data = json.loads(f.read())\n",
+ "\n",
+ "lista = data[\"study\"]\n",
+ "df_study = pd.DataFrame(lista, columns=['alias',\"title\",\"study_type\",\"study_abstract\",\"pubmed_id\"])\n",
+ "\n",
+ "#study_df pd.DataFrame(columns=[\"alias\"], )\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "a8066308",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'alias': ['study_alias_4', 'study_alias_5'],\n",
+ " 'title': ['study_title_1', 'study_title_2'],\n",
+ " 'study_type': ['Transcriptome Analysis', 'RNASeq'],\n",
+ " 'study_abstract': ['study_abstract_1', 'study_abstract_2'],\n",
+ " 'pubmed_id': ['None', 'pubmed_id_2']}"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "lista"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "5dcb05c6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " alias | \n",
+ " title | \n",
+ " study_type | \n",
+ " study_abstract | \n",
+ " pubmed_id | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " study_alias_4 | \n",
+ " study_title_1 | \n",
+ " Transcriptome Analysis | \n",
+ " study_abstract_1 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " study_alias_5 | \n",
+ " study_title_2 | \n",
+ " RNASeq | \n",
+ " study_abstract_2 | \n",
+ " pubmed_id_2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " alias title study_type study_abstract \\\n",
+ "0 study_alias_4 study_title_1 Transcriptome Analysis study_abstract_1 \n",
+ "1 study_alias_5 study_title_2 RNASeq study_abstract_2 \n",
+ "\n",
+ " pubmed_id \n",
+ "0 None \n",
+ "1 pubmed_id_2 "
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_study"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "308e5c21",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}