|
80 | 80 | "from bifrost_bridge.mlst import process_mlst_data\n",
|
81 | 81 | "from bifrost_bridge.fastp import process_fastp_data\n",
|
82 | 82 | "from bifrost_bridge.quast import process_quast_data\n",
|
| 83 | + "from bifrost_bridge.plasmidfinder import process_plasmidfinder_data\n", |
| 84 | + "from bifrost_bridge.bracken import process_bracken_data\n", |
| 85 | + "import pandas as pd\n", |
| 86 | + "#from bifrost_bridge.amrfinder import process_amrfinder_data\n", |
| 87 | + "#from bifrost_bridge.pmlst import process_pmlst_data\n", |
83 | 88 | "\n",
|
84 | 89 | "@call_parse\n",
|
85 | 90 | "def process_qc_data(\n",
|
86 | 91 | " mlst_path:str = None,\n",
|
87 | 92 | " fastp_path:str = None,\n",
|
88 | 93 | " quast_path:str = None,\n",
|
| 94 | + " plasmidfinder_path:str = None,\n", |
| 95 | + " bracken_path:str = None,\n", |
| 96 | + " amrfinder_path:str = None,\n", |
| 97 | + " pmlst_path:str = None,\n", |
| 98 | + " combine_output:bool = True,\n", |
89 | 99 | " output_path:str = './output.tsv'):\n",
|
90 | 100 | "\n",
|
91 | 101 | " \"\"\"\n",
|
92 | 102 | " Command-line interface for processing QC data.\n",
|
93 | 103 | "\n",
|
94 |
| - " This function processes MLST and FASTP data files based on the provided command-line arguments.\n", |
95 |
| - " It supports specifying input file paths for MLST and FASTP data, and outputs the processed data to specified paths.\n", |
| 104 | + " This function processes MLST, FASTP, QUAST, PlasmidFinder, and Bracken data files based on the provided command-line arguments.\n", |
| 105 | + " It supports specifying input file paths for MLST, FASTP, QUAST, PlasmidFinder, and Bracken data, and outputs the processed data to specified paths.\n", |
96 | 106 | "\n",
|
97 | 107 | " Arguments:\n",
|
98 |
| - " mlst (str): Path to the MLST input file.\n", |
99 |
| - " fastp (str): Path to the FASTP input file.\n", |
100 |
| - " output (str): Path to the output file (default: './output.tsv').\n", |
| 108 | + " mlst_path (str): Path to the MLST input file.\n", |
| 109 | + " fastp_path (str): Path to the FASTP input file.\n", |
| 110 | + " quast_path (str): Path to the QUAST input file.\n", |
| 111 | + " plasmidfinder_path (str): Path to the PlasmidFinder input file.\n", |
| 112 | + " bracken_path (str): Path to the Bracken input file.\n", |
| 113 | + " amrfinder_path (str): Path to the AMRFinder input file.\n", |
| 114 | + " pmlst_path (str): Path to the PMLST input file.\n", |
| 115 | + " output_path (str): Path to the output file (default: './output.tsv').\n", |
101 | 116 | " \"\"\"\n",
|
102 | 117 | " if mlst_path is not None:\n",
|
103 | 118 | " if not os.path.exists(mlst_path):\n",
|
104 | 119 | " raise FileNotFoundError(f\"File not found: {mlst_path}\")\n",
|
105 | 120 | " process_mlst_data(\n",
|
106 | 121 | " input_path=str(mlst_path), \n",
|
107 |
| - " output_path='./parsed_mlst.tsv',\n", |
| 122 | + " output_path='test_data/bifrost/parsed_mlst.tsv',\n", |
108 | 123 | " replace_header=None, \n",
|
109 | 124 | " filter_columns=\"SampleID, Species, ST\",\n",
|
110 |
| - " header_exists=0,\n", |
111 | 125 | " add_header=\"SampleID, Species, ST, 1, 2, 3, 4, 5, 6, 7\"\n",
|
112 | 126 | " )\n",
|
113 | 127 | "\n",
|
|
116 | 130 | " raise FileNotFoundError(f\"File not found: {fastp_path}\")\n",
|
117 | 131 | " process_fastp_data(\n",
|
118 | 132 | " input_path=fastp_path, \n",
|
119 |
| - " output_path='./parsed_fastp.tsv',\n", |
| 133 | + " output_path='test_data/bifrost/parsed_fastp.tsv',\n", |
120 | 134 | " filter_columns=\"summary£fastp_version, summary£sequencing, summary£before_filtering£total_reads\",\n",
|
121 | 135 | " replace_header=\"fastp_version, sequencing, total_reads\"\n",
|
122 | 136 | " )\n",
|
|
126 | 140 | " raise FileNotFoundError(f\"File not found: {quast_path}\")\n",
|
127 | 141 | " process_quast_data(\n",
|
128 | 142 | " input_path=quast_path, \n",
|
129 |
| - " output_path='./parsed_quast.tsv',\n", |
| 143 | + " output_path='test_data/bifrost/parsed_quast.tsv',\n", |
130 | 144 | " filter_columns='Assembly,# contigs (>= 0 bp), N50',\n",
|
131 | 145 | " transpose=True\n",
|
132 |
| - " )" |
| 146 | + " )\n", |
| 147 | + " \n", |
| 148 | + " if plasmidfinder_path is not None:\n", |
| 149 | + " if not os.path.exists(plasmidfinder_path):\n", |
| 150 | + " raise FileNotFoundError(f\"File not found: {plasmidfinder_path}\")\n", |
| 151 | + " process_plasmidfinder_data(\n", |
| 152 | + " input_path=plasmidfinder_path, \n", |
| 153 | + " output_path='test_data/bifrost/parsed_plasmidfinder.tsv',\n", |
| 154 | + " )\n", |
| 155 | + "\n", |
| 156 | + " if bracken_path is not None:\n", |
| 157 | + " if not os.path.exists(bracken_path):\n", |
| 158 | + " raise FileNotFoundError(f\"File not found: {bracken_path}\")\n", |
| 159 | + " process_bracken_data(\n", |
| 160 | + " input_path=bracken_path, \n", |
| 161 | + " output_path='test_data/bifrost/parsed_bracken.tsv',\n", |
| 162 | + " )\n", |
| 163 | + "\n", |
| 164 | + " if amrfinder_path is not None:\n", |
| 165 | + " if not os.path.exists(amrfinder_path):\n", |
| 166 | + " raise FileNotFoundError(f\"File not found: {amrfinder_path}\")\n", |
| 167 | + " #process_amrfinder_data(\n", |
| 168 | + " # input_path=amrfinder_path, \n", |
| 169 | + " # output_path='test_data/bifrost/parsed_amrfinder.tsv',\n", |
| 170 | + " # filter_columns='Assembly,# contigs (>= 0 bp), N50',\n", |
| 171 | + " # transpose=True\n", |
| 172 | + " #)\n", |
| 173 | + "\n", |
| 174 | + " if pmlst_path is not None:\n", |
| 175 | + " if not os.path.exists(pmlst_path):\n", |
| 176 | + " raise FileNotFoundError(f\"File not found: {pmlst_path}\")\n", |
| 177 | + " #process_pmlst_data(\n", |
| 178 | + " # input_path=pmlst_path, \n", |
| 179 | + " # output_path='test_data/bifrost/parsed_pmlst.tsv',\n", |
| 180 | + " # filter_columns='Assembly,# contigs (>= 0 bp), N50',\n", |
| 181 | + " # transpose=True\n", |
| 182 | + " #)\n", |
| 183 | + " \n", |
| 184 | + " if combine_output:\n", |
| 185 | + " # List of output files that were actually created\n", |
| 186 | + " output_files = []\n", |
| 187 | + " if mlst_path is not None:\n", |
| 188 | + " output_files.append('test_data/bifrost/parsed_mlst.tsv')\n", |
| 189 | + " if fastp_path is not None:\n", |
| 190 | + " output_files.append('test_data/bifrost/parsed_fastp.tsv')\n", |
| 191 | + " if quast_path is not None:\n", |
| 192 | + " output_files.append('test_data/bifrost/parsed_quast.tsv')\n", |
| 193 | + " if plasmidfinder_path is not None:\n", |
| 194 | + " output_files.append('test_data/bifrost/parsed_plasmidfinder.tsv')\n", |
| 195 | + " if bracken_path is not None:\n", |
| 196 | + " output_files.append('test_data/bifrost/parsed_bracken.tsv')\n", |
| 197 | + "\n", |
| 198 | + " # Read and concatenate all output files\n", |
| 199 | + " combined_df = pd.concat([pd.read_csv(file, sep='\\t') for file in output_files], axis=1)\n", |
| 200 | + "\n", |
| 201 | + " # Save the combined dataframe to the specified output path\n", |
| 202 | + " combined_df.to_csv(output_path, sep='\\t', index=False)" |
133 | 203 | ]
|
134 | 204 | },
|
135 | 205 | {
|
|
144 | 214 | " SampleID Species ST\n",
|
145 | 215 | "0 TestSample2 campylobacter 22\n",
|
146 | 216 | " fastp_version sequencing total_reads\n",
|
147 |
| - "0 0.23.4 paired end (151 cycles + 151 cycles) 4369610\n" |
| 217 | + "0 0.23.4 paired end (151 cycles + 151 cycles) 4369610\n", |
| 218 | + "\n", |
| 219 | + " Assembly # contigs (>= 0 bp) N50\n", |
| 220 | + "values Shovill_on_data_39_and_data_38__Contigs 942 56837\n" |
148 | 221 | ]
|
149 | 222 | }
|
150 | 223 | ],
|
151 | 224 | "source": [
|
152 | 225 | "# |hide\n",
|
153 | 226 | "# Example usage of the function\n",
|
154 |
| - "#process_qc_data(\n", |
155 |
| - "# mlst_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/mlst_report.tabular', \n", |
156 |
| - "# fastp_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/TestSample2.json'\n", |
157 |
| - "#)" |
| 227 | + "process_qc_data(\n", |
| 228 | + " mlst_path='test_data/mlst_report.tabular', \n", |
| 229 | + " fastp_path='test_data/TestSample2.json',\n", |
| 230 | + " quast_path='test_data/quast.tsv',\n", |
| 231 | + " plasmidfinder_path='test_data/plasmidfinder.tsv',\n", |
| 232 | + " bracken_path='test_data/bracken_krakenreport.txt',\n", |
| 233 | + " amrfinder_path='test_data/TestSample2.json',\n", |
| 234 | + " pmlst_path='test_data/TestSample2.json',\n", |
| 235 | + " combine_output = True,\n", |
| 236 | + " output_path = 'test_data/bifrost/output.tsv'\n", |
| 237 | + ")" |
158 | 238 | ]
|
159 | 239 | },
|
160 | 240 | {
|
|
0 commit comments