Skip to content

Commit db68f5e

Browse files
committed
fix(starrynight): output index integration
1 parent 4a0d1e5 commit db68f5e

File tree

10 files changed

+506
-33
lines changed

10 files changed

+506
-33
lines changed

starrynight/src/starrynight/algorithms/analysis.py

Lines changed: 107 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@
160160
from mako.template import Template
161161

162162
from starrynight.algorithms.cp_plugin_callbarcodes import CallBarcodes
163-
from starrynight.algorithms.index import PCPIndex
163+
from starrynight.algorithms.index import OutputIndex, PCPIndex
164164
from starrynight.modules.cp_illum_apply.constants import (
165165
CP_ILLUM_APPLY_OUT_PATH_SUFFIX,
166166
)
@@ -179,6 +179,8 @@
179179
get_cycles_by_batch_plate,
180180
get_cycles_from_df,
181181
get_default_path_prefix,
182+
get_filenames_by_channel_id,
183+
get_filenames_by_channel_id_cycle_id,
182184
)
183185
from starrynight.utils.globbing import flatten_all, flatten_dict, get_files_by
184186
from starrynight.utils.misc import resolve_path_loaddata
@@ -188,6 +190,22 @@
188190
###############################
189191

190192

193+
def handle_sbs_index_query(
194+
sbs_comp_index_df: pl.LazyFrame,
195+
legacy_channel_map: dict,
196+
cycle: str,
197+
ch: str,
198+
):
199+
out = get_filenames_by_channel_id_cycle_id(
200+
sbs_comp_index_df, legacy_channel_map[ch], cycle
201+
)
202+
if len(out) == 0:
203+
out = get_filenames_by_channel_id_cycle_id(
204+
sbs_comp_index_df, legacy_channel_map[ch], "1"
205+
)
206+
return out
207+
208+
191209
def get_header(
192210
header: str,
193211
cycle: int | None,
@@ -249,11 +267,23 @@ def write_loaddata(
249267
f: TextIOWrapper,
250268
use_legacy: bool = False,
251269
exp_config_path: Path | CloudPath | None = None,
270+
cp_corr_index_path: Path | CloudPath | None = None,
271+
sbs_comp_index_path: Path | CloudPath | None = None,
252272
) -> None:
253273
# setup csv headers and write the header first
254274
loaddata_writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL)
255275
legacy_channel_map = {}
256276

277+
# setup index dfs if avalilale
278+
if cp_corr_index_path is not None:
279+
cp_corr_index_df = pl.scan_parquet(
280+
cp_corr_index_path.resolve().__str__()
281+
)
282+
if sbs_comp_index_path is not None:
283+
sbs_comp_index_df = pl.scan_parquet(
284+
sbs_comp_index_path.resolve().__str__()
285+
)
286+
257287
if use_legacy:
258288
# Load experiment config
259289
exp_config = json.loads(exp_config_path.read_text())
@@ -294,8 +324,13 @@ def write_loaddata(
294324
*sbs_pathname_heads,
295325
]
296326
)
297-
index = cp_images_df.first().collect().to_dicts()[0]
298-
index = PCPIndex(**index)
327+
328+
if cp_corr_index_path is not None:
329+
index = cp_corr_index_df.first().collect().to_dicts()[0]
330+
else:
331+
index = cp_images_df.first().collect().to_dicts()[0]
332+
333+
index = OutputIndex(**index)
299334
wells_sites = (
300335
cp_images_df.collect()
301336
.group_by("well_id")
@@ -306,26 +341,53 @@ def write_loaddata(
306341
index.well_id = well_sites["well_id"]
307342
for site_id in well_sites["site_id"]:
308343
index.site_id = site_id
309-
cp_filenames = [
310-
get_cp_filename_value(index, ch, use_legacy, legacy_channel_map)
311-
for ch in cp_plate_channel_list
312-
]
344+
if cp_corr_index_path is None:
345+
cp_filenames = [
346+
get_cp_filename_value(
347+
index, ch, use_legacy, legacy_channel_map
348+
)
349+
for ch in cp_plate_channel_list
350+
]
351+
cp_pathnames = [
352+
resolve_path_loaddata(
353+
AnyPath(path_mask), cp_corr_images_path
354+
)
355+
for _ in range(len(cp_pathname_heads))
356+
]
357+
else:
358+
cp_ch = [
359+
get_filenames_by_channel_id(
360+
cp_corr_index_df, legacy_channel_map[ch]
361+
)[0]
362+
for ch in cp_plate_channel_list
363+
]
364+
cp_filenames = [elem["filename"] for elem in cp_ch]
365+
cp_pathnames = [str(elem["pathname"]) for elem in cp_ch]
313366
# Match the order of iteration in sbs_filename_heads to ensure correct alignment
314-
sbs_filenames = [
315-
get_sbs_filename_value(
316-
index, cycle, ch, use_legacy, legacy_channel_map
317-
)
318-
for cycle in plate_cycles_list
319-
for ch in sbs_plate_channel_list
320-
]
321-
cp_pathnames = [
322-
resolve_path_loaddata(AnyPath(path_mask), cp_corr_images_path)
323-
for _ in range(len(cp_pathname_heads))
324-
]
325-
sbs_pathnames = [
326-
resolve_path_loaddata(AnyPath(path_mask), sbs_comp_images_path)
327-
for _ in range(len(sbs_pathname_heads))
328-
]
367+
if sbs_comp_index_path is None:
368+
sbs_filenames = [
369+
get_sbs_filename_value(
370+
index, cycle, ch, use_legacy, legacy_channel_map
371+
)
372+
for cycle in plate_cycles_list
373+
for ch in sbs_plate_channel_list
374+
]
375+
sbs_pathnames = [
376+
resolve_path_loaddata(
377+
AnyPath(path_mask), sbs_comp_images_path
378+
)
379+
for _ in range(len(sbs_pathname_heads))
380+
]
381+
else:
382+
sbs_ch_cycle = [
383+
handle_sbs_index_query(
384+
sbs_comp_index_df, legacy_channel_map, cycle, ch
385+
)[0]
386+
for cycle in plate_cycles_list
387+
for ch in sbs_plate_channel_list
388+
]
389+
sbs_filenames = [elem["filename"] for elem in sbs_ch_cycle]
390+
sbs_pathnames = [str(elem["pathname"]) for elem in sbs_ch_cycle]
329391

330392
well_value = (
331393
index.well_id[4:]
@@ -358,7 +420,9 @@ def gen_analysis_load_data(
358420
out_path: Path | CloudPath,
359421
path_mask: str | None,
360422
cp_corr_images_path: Path | CloudPath | None = None,
423+
cp_corr_index_dir: Path | CloudPath | None = None,
361424
sbs_comp_images_path: Path | CloudPath | None = None,
425+
sbs_comp_index_dir: Path | CloudPath | None = None,
362426
use_legacy: bool = False,
363427
exp_config_path: Path | CloudPath | None = None,
364428
uow_hierarchy: list[str] = None,
@@ -375,8 +439,12 @@ def gen_analysis_load_data(
375439
Path prefix mask to use.
376440
cp_corr_images_path : Path | CloudPath
377441
Path | CloudPath to cp corr images directory.
442+
cp_corr_index_dir : Path | CloudPath
443+
Path | CloudPath to cp corr index directory.
378444
sbs_comp_images_path : Path | CloudPath
379445
Path | CloudPath to sbs compensated images directory.
446+
sbs_comp_index_dir : Path | CloudPath
447+
Path | CloudPath to sbs compensated index directory.
380448
use_legacy : bool
381449
Use legacy cppipe and loaddata.
382450
exp_config_path : Path | CloudPath
@@ -444,6 +512,21 @@ def gen_analysis_load_data(
444512

445513
# Construct filename for the loaddata csv
446514
level_out_path = out_path.joinpath(f"{'^'.join(level)}#analysis.csv")
515+
516+
# Construct index paths
517+
if cp_corr_index_dir is not None:
518+
cp_corr_index_path = next(
519+
cp_corr_index_dir.glob(f"{'^'.join(level)}#*.parquet")
520+
)
521+
else:
522+
cp_corr_index_path = None
523+
524+
if sbs_comp_index_dir is not None:
525+
sbs_comp_index_path = next(
526+
sbs_comp_index_dir.glob(f"{'^'.join(level)}#*.parquet")
527+
)
528+
else:
529+
sbs_comp_index_path = None
447530
with level_out_path.open("w") as f:
448531
write_loaddata(
449532
cp_level_df,
@@ -456,6 +539,8 @@ def gen_analysis_load_data(
456539
f,
457540
use_legacy,
458541
exp_config_path,
542+
cp_corr_index_path,
543+
sbs_comp_index_path,
459544
)
460545

461546

starrynight/src/starrynight/algorithms/cp.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77
from tqdm import tqdm
88

99
from starrynight.utils.cellprofiler import CellProfilerContext
10+
from starrynight.utils.misc import clean_directory
1011

1112

1213
def run_cp(
1314
uow_list: list[tuple[Path, Path]],
1415
out_dir: Path,
1516
plugin_dir: Path | None = None,
17+
clean: bool = True,
1618
job_idx: int = 0,
1719
) -> None:
1820
"""Run cellprofiler for a list of unit-of-work (UOW) items.
@@ -25,6 +27,8 @@ def run_cp(
2527
Output directory path.
2628
plugin_dir : Path
2729
Path to cellprofiler plugin directory.
30+
clean : bool
31+
Clean output directory before the run.
2832
job_idx : int, optional
2933
Job index for tqdm progress bar (default is 0).
3034
@@ -43,6 +47,8 @@ def run_cp(
4347
local_out_dir = out_dir.joinpath(
4448
"-".join(load_data_path.name.split("#")[0].split("^"))
4549
)
50+
if clean:
51+
clean_directory(local_out_dir)
4652
print(local_out_dir)
4753
local_out_dir.mkdir(parents=True, exist_ok=True)
4854
with CellProfilerContext(

0 commit comments

Comments
 (0)