🎨 format all code using autopep8

- both scripts (notebooks) - and library code
RasmussenLab · Sep 18, 2023 · 1269b5e · 1269b5e
1 parent a2b97b8
commit 1269b5e
Show file tree

Hide file tree

Showing 143 changed files with 3,228 additions and 2,397 deletions.
diff --git a/project/00_0_0_lftp_upload_commands.ipynb b/project/00_0_0_lftp_upload_commands.ipynb
@@ -56,11 +56,11 @@
    },
    "outputs": [],
    "source": [
-    "fn_rawfile_metadata: str = 'data/rawfile_metadata.csv' # Machine parsed metadata from rawfile workflow\n",
-    "fn_mq_summaries: str = 'data/samples_selected_summaries.csv' # MaxQuant summary files\n",
-    "fn_files_selected: str = 'data/samples_selected.yaml' # selected files based on threshold of identified peptides\n",
-    "out_folder: str = 'data/rename' # output folder\n",
-    "fn_server_log: str = 'data/rename/mq_out_server.log' # server log of all uploaded files"
+    "fn_rawfile_metadata: str = 'data/rawfile_metadata.csv'  # Machine parsed metadata from rawfile workflow\n",
+    "fn_mq_summaries: str = 'data/samples_selected_summaries.csv'  # MaxQuant summary files\n",
+    "fn_files_selected: str = 'data/samples_selected.yaml'  # selected files based on threshold of identified peptides\n",
+    "out_folder: str = 'data/rename'  # output folder\n",
+    "fn_server_log: str = 'data/rename/mq_out_server.log'  # server log of all uploaded files"
    ]
   },
   {
@@ -133,14 +133,14 @@
    "outputs": [],
    "source": [
     "cols_identifies = [('FileProperties', 'Pathname'),\n",
-    " ('FileProperties', 'Version'),\n",
-    " ('FileProperties', 'Content Creation Date'),\n",
-    " ('InstrumentProperties', 'Thermo Scientific instrument model'),\n",
-    " ('InstrumentProperties', 'instrument attribute'),\n",
-    " ('InstrumentProperties', 'instrument serial number'),\n",
-    " ('InstrumentProperties', 'Software Version'),\n",
-    " ('InstrumentProperties', 'firmware version'),\n",
-    "]\n",
+    "                   ('FileProperties', 'Version'),\n",
+    "                   ('FileProperties', 'Content Creation Date'),\n",
+    "                   ('InstrumentProperties', 'Thermo Scientific instrument model'),\n",
+    "                   ('InstrumentProperties', 'instrument attribute'),\n",
+    "                   ('InstrumentProperties', 'instrument serial number'),\n",
+    "                   ('InstrumentProperties', 'Software Version'),\n",
+    "                   ('InstrumentProperties', 'firmware version'),\n",
+    "                   ]\n",
     "\n",
     "df_meta = df_meta[cols_identifies]\n",
     "df_meta.columns = [t[-1] for t in cols_identifies]\n",
@@ -198,9 +198,9 @@
    "source": [
     "date_col = \"Content Creation Date\"\n",
     "idx_all = (pd.to_datetime(df_meta[date_col]).dt.strftime(\"%Y_%m_%d_%H_%M\")\n",
-    "        + '_'\n",
-    "        + df_meta[\"Instrument_name\"]\n",
-    ").str.replace(' ', '-')\n",
+    "           + '_'\n",
+    "           + df_meta[\"Instrument_name\"]\n",
+    "           ).str.replace(' ', '-')\n",
     "\n",
     "mask = idx_all.duplicated(keep=False)\n",
     "duplicated_sample_idx = idx_all.loc[mask].sort_values()  # duplicated dumps\n",
@@ -214,8 +214,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df_meta['new_sample_id'] =  idx_all\n",
-    "\n",
+    "df_meta['new_sample_id'] = idx_all\n",
     "\n",
     "\n",
     "_n = df_meta.groupby(\"new_sample_id\").cumcount().astype('string').str.replace('0', '')\n",
@@ -340,10 +339,10 @@
     "            if string_ not in used_before:\n",
     "                ret += f'_{string_}'\n",
     "        used_before |= set(strings_)\n",
-    "    ret = (ret[1:] # remove _ from start\n",
+    "    ret = (ret[1:]  # remove _ from start\n",
     "           .replace('Slot_#', '')\n",
     "           .replace('slot_#', '')\n",
-    "          )\n",
+    "           )\n",
     "    return ret\n",
     "\n",
     "\n",
@@ -353,7 +352,7 @@
     "            \"instrument attribute\",\n",
     "            \"instrument serial number\",\n",
     "        ]\n",
-    "    ]\n",
+    "]\n",
     "    .sample(20)\n",
     "    .apply(build_instrument_name, axis=1)\n",
     ")"
@@ -401,8 +400,8 @@
     " .loc[selected, \"Path_old\"]\n",
     " .iloc[:3]\n",
     " .to_csv(out_folder / 'rawfiles_to_checksum.txt',\n",
-    "          index=False,\n",
-    "            header=False)\n",
+    "         index=False,\n",
+    "         header=False)\n",
     " )"
    ]
   },
@@ -453,7 +452,7 @@
     "```\n",
     "to allow parallell commands, use the runtime setting\n",
     "```bash\n",
-    ">>> cat ~/.lftprc \n",
+    ">>> cat ~/.lftprc\n",
     "set cmd:parallel 2\n",
     "```"
    ]
@@ -501,11 +500,11 @@
    "source": [
     "commands = df_meta.loc[selected]\n",
     "commands = (\n",
-    "    'put ' \n",
+    "    'put '\n",
     "    + commands['Path_old'].astype('string')\n",
-    "    + ' -o ' \n",
-    "    + \"./raw_files/\" \n",
-    "    + commands[\"Instrument_name\"] \n",
+    "    + ' -o '\n",
+    "    + \"./raw_files/\"\n",
+    "    + commands[\"Instrument_name\"]\n",
     "    + '/'\n",
     "    + commands['new_sample_id'] + '.raw'\n",
     ")\n",
@@ -559,9 +558,9 @@
    "source": [
     "commands = df_meta.loc[selected]\n",
     "commands = (\n",
-    "    \"mirror -R --only-missing --log log_lftp_mirror.log --exclude-glob *.pdf \" # command\n",
-    "    + \"mq_out/\" + commands.index # source\n",
-    "    + \" ./MQ_tables/\" + commands[\"Instrument_name\"]+ \"/\" + commands[\"new_sample_id\"] # dest\n",
+    "    \"mirror -R --only-missing --log log_lftp_mirror.log --exclude-glob *.pdf \"  # command\n",
+    "    + \"mq_out/\" + commands.index  # source\n",
+    "    + \" ./MQ_tables/\" + commands[\"Instrument_name\"] + \"/\" + commands[\"new_sample_id\"]  # dest\n",
     ")\n",
     "\n",
     "print(commands.sample(10).to_csv(header=False, index=False))"
@@ -579,9 +578,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "83c04b90-0c4e-4fe7-88f6-ed02cef93a23",
-   "metadata": {
-    "lines_to_next_cell": 2
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "fname = out_folder / 'lftp_commands_mq_output.txt'\n",

diff --git a/project/00_0_0_lftp_upload_commands.py b/project/00_0_0_lftp_upload_commands.py
@@ -42,11 +42,11 @@ def rename(fname, new_sample_id, new_folder=None, ext=None):
 # ## Arguments
 
 # %% tags=["parameters"]
-fn_rawfile_metadata: str = 'data/rawfile_metadata.csv' # Machine parsed metadata from rawfile workflow
-fn_mq_summaries: str = 'data/samples_selected_summaries.csv' # MaxQuant summary files
-fn_files_selected: str = 'data/samples_selected.yaml' # selected files based on threshold of identified peptides
-out_folder: str = 'data/rename' # output folder
-fn_server_log: str = 'data/rename/mq_out_server.log' # server log of all uploaded files
+fn_rawfile_metadata: str = 'data/rawfile_metadata.csv'  # Machine parsed metadata from rawfile workflow
+fn_mq_summaries: str = 'data/samples_selected_summaries.csv'  # MaxQuant summary files
+fn_files_selected: str = 'data/samples_selected.yaml'  # selected files based on threshold of identified peptides
+out_folder: str = 'data/rename'  # output folder
+fn_server_log: str = 'data/rename/mq_out_server.log'  # server log of all uploaded files
 
 # %%
 out_folder = Path(out_folder)
@@ -79,14 +79,14 @@ def rename(fname, new_sample_id, new_folder=None, ext=None):
 
 # %%
 cols_identifies = [('FileProperties', 'Pathname'),
- ('FileProperties', 'Version'),
- ('FileProperties', 'Content Creation Date'),
- ('InstrumentProperties', 'Thermo Scientific instrument model'),
- ('InstrumentProperties', 'instrument attribute'),
- ('InstrumentProperties', 'instrument serial number'),
- ('InstrumentProperties', 'Software Version'),
- ('InstrumentProperties', 'firmware version'),
-]
+                   ('FileProperties', 'Version'),
+                   ('FileProperties', 'Content Creation Date'),
+                   ('InstrumentProperties', 'Thermo Scientific instrument model'),
+                   ('InstrumentProperties', 'instrument attribute'),
+                   ('InstrumentProperties', 'instrument serial number'),
+                   ('InstrumentProperties', 'Software Version'),
+                   ('InstrumentProperties', 'firmware version'),
+                   ]
 
 df_meta = df_meta[cols_identifies]
 df_meta.columns = [t[-1] for t in cols_identifies]
@@ -113,17 +113,16 @@ def rename(fname, new_sample_id, new_folder=None, ext=None):
 # %%
 date_col = "Content Creation Date"
 idx_all = (pd.to_datetime(df_meta[date_col]).dt.strftime("%Y_%m_%d_%H_%M")
-        + '_'
-        + df_meta["Instrument_name"]
-).str.replace(' ', '-')
+           + '_'
+           + df_meta["Instrument_name"]
+           ).str.replace(' ', '-')
 
 mask = idx_all.duplicated(keep=False)
 duplicated_sample_idx = idx_all.loc[mask].sort_values()  # duplicated dumps
 duplicated_sample_idx
 
 # %%
-df_meta['new_sample_id'] =  idx_all
-
+df_meta['new_sample_id'] = idx_all
 
 
 _n = df_meta.groupby("new_sample_id").cumcount().astype('string').str.replace('0', '')
@@ -182,10 +181,10 @@ def build_instrument_name(s):
             if string_ not in used_before:
                 ret += f'_{string_}'
         used_before |= set(strings_)
-    ret = (ret[1:] # remove _ from start
+    ret = (ret[1:]  # remove _ from start
            .replace('Slot_#', '')
            .replace('slot_#', '')
-          )
+           )
     return ret
 
 
@@ -195,7 +194,7 @@ def build_instrument_name(s):
             "instrument attribute",
             "instrument serial number",
         ]
-    ]
+]
     .sample(20)
     .apply(build_instrument_name, axis=1)
 )
@@ -217,8 +216,8 @@ def build_instrument_name(s):
  .loc[selected, "Path_old"]
  .iloc[:3]
  .to_csv(out_folder / 'rawfiles_to_checksum.txt',
-          index=False,
-            header=False)
+         index=False,
+         header=False)
  )
 
 # %% [markdown]
@@ -247,7 +246,7 @@ def build_instrument_name(s):
 # ```
 # to allow parallell commands, use the runtime setting
 # ```bash
-# >>> cat ~/.lftprc 
+# >>> cat ~/.lftprc
 # set cmd:parallel 2
 # ```
 
@@ -269,11 +268,11 @@ def build_instrument_name(s):
 # %%
 commands = df_meta.loc[selected]
 commands = (
-    'put ' 
+    'put '
     + commands['Path_old'].astype('string')
-    + ' -o ' 
-    + "./raw_files/" 
-    + commands["Instrument_name"] 
+    + ' -o '
+    + "./raw_files/"
+    + commands["Instrument_name"]
     + '/'
     + commands['new_sample_id'] + '.raw'
 )
@@ -299,9 +298,9 @@ def build_instrument_name(s):
 # %%
 commands = df_meta.loc[selected]
 commands = (
-    "mirror -R --only-missing --log log_lftp_mirror.log --exclude-glob *.pdf " # command
-    + "mq_out/" + commands.index # source
-    + " ./MQ_tables/" + commands["Instrument_name"]+ "/" + commands["new_sample_id"] # dest
+    "mirror -R --only-missing --log log_lftp_mirror.log --exclude-glob *.pdf "  # command
+    + "mq_out/" + commands.index  # source
+    + " ./MQ_tables/" + commands["Instrument_name"] + "/" + commands["new_sample_id"]  # dest
 )
 
 print(commands.sample(10).to_csv(header=False, index=False))
@@ -312,4 +311,3 @@ def build_instrument_name(s):
 # %%
 fname = out_folder / 'lftp_commands_mq_output.txt'
 commands.to_csv(fname, header=False, index=False)
-
diff --git a/project/00_0_1_check_filesizes.ipynb b/project/00_0_1_check_filesizes.ipynb
@@ -153,7 +153,7 @@
    "source": [
     "mask = (entries['size_pride'] - entries['size_erda']).abs() > 5\n",
     "to_redo = entries.loc[mask].reset_index()\n",
-    "to_redo "
+    "to_redo"
    ]
   },
   {
@@ -172,7 +172,7 @@
    "id": "b6087751",
    "metadata": {},
    "source": [
-    "## Check MaxQuant output filesizes "
+    "## Check MaxQuant output filesizes"
    ]
   },
   {
@@ -207,7 +207,7 @@
     "                files.append(entry)\n",
     "                if entry.id_old not in folder:\n",
     "                    folder.add(entry.id_old)\n",
-    "            \n",
+    "\n",
     "print(f\"{len(folder) =: }\")\n",
     "print(f\"{len(files) =: }\")\n",
     "files[:3]"
@@ -235,11 +235,11 @@
    "outputs": [],
    "source": [
     "files['path_pride'] = ('MQ_tables/'\n",
-    "                        + files['Instrument_name']\n",
-    "                          + '/' \n",
-    "                          + files[\"new_sample_id\"]\n",
-    "                          + '/'\n",
-    "                          + files[\"filename\"])\n",
+    "                       + files['Instrument_name']\n",
+    "                       + '/'\n",
+    "                       + files[\"new_sample_id\"]\n",
+    "                       + '/'\n",
+    "                       + files[\"filename\"])\n",
     "files['path_pride'].iloc[:4].to_list()"
    ]
   },
@@ -250,7 +250,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "files['filename'].value_counts() # except mqpar.xml all present on erda"
+    "files['filename'].value_counts()  # except mqpar.xml all present on erda"
    ]
   },
   {
@@ -359,9 +359,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3fc22aef",
-   "metadata": {
-    "lines_to_next_cell": 2
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "to_do = pd.concat([missing_on_pride, files_redo])\n",