-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from DavidStirling/remote-tables
Add initial remote table registration implementation
- Loading branch information
Showing
5 changed files
with
138 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
# If the file is missing please request a copy by contacting | ||
# [email protected]. | ||
import collections | ||
from importlib.util import find_spec | ||
import logging | ||
import os | ||
import sys | ||
|
@@ -19,6 +20,10 @@ | |
|
||
from omero2pandas.connect import OMEROConnection | ||
from omero2pandas.upload import create_table | ||
if find_spec("tiledb"): | ||
from omero2pandas.remote import register_table | ||
else: | ||
register_table = None | ||
|
||
logging.basicConfig( | ||
format="%(asctime)s %(levelname)-7s [%(name)16s] %(message)s", | ||
|
@@ -185,7 +190,8 @@ def read_table(file_id=None, annotation_id=None, column_names=(), rows=None, | |
|
||
def upload_table(source, table_name, parent_id=None, parent_type='Image', | ||
links=None, chunk_size=None, omero_connector=None, | ||
server=None, port=4064, username=None, password=None): | ||
server=None, port=4064, username=None, password=None, | ||
local_path=None, remote_path=None): | ||
""" | ||
Upload a pandas dataframe to a new OMERO table. | ||
For the connection, supply either an active client object or server | ||
|
@@ -205,6 +211,10 @@ def upload_table(source, table_name, parent_id=None, parent_type='Image', | |
:param server: Address of the server | ||
:param port: Port the server runs on (default 4064) | ||
:param username: Username for server login | ||
:param local_path: [TileDB only], construct table at this file path and | ||
register remotely | ||
:param remote_path: [TileDB only], mapping for local_path on the server | ||
(if different from local system) | ||
:param password: Password for server login | ||
:return: File Annotation ID of the new table | ||
""" | ||
|
@@ -220,7 +230,7 @@ def upload_table(source, table_name, parent_id=None, parent_type='Image', | |
if parent_id is not None: | ||
if (parent_type, parent_id) not in links: | ||
links.append((parent_type, parent_id)) | ||
if not links: | ||
if not links and not local_path: | ||
raise ValueError("No OMERO objects to link the table to") | ||
elif not isinstance(links, Iterable): | ||
raise ValueError(f"Links should be an iterable list of " | ||
|
@@ -229,7 +239,14 @@ def upload_table(source, table_name, parent_id=None, parent_type='Image', | |
port=port, client=omero_connector) as connector: | ||
conn = connector.get_gateway() | ||
conn.SERVICE_OPTS.setOmeroGroup('-1') | ||
ann_id = create_table(source, table_name, links, conn, chunk_size) | ||
if local_path or remote_path: | ||
if not register_table: | ||
raise ValueError("Remote table support is not installed") | ||
ann_id = register_table(source, local_path, | ||
remote_path=remote_path, | ||
chunk_size=chunk_size) | ||
else: | ||
ann_id = create_table(source, table_name, links, conn, chunk_size) | ||
if ann_id is None: | ||
LOGGER.warning("Failed to create OMERO table") | ||
return ann_id | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# encoding: utf-8 | ||
# | ||
# Copyright (c) Glencoe Software, Inc. All rights reserved. | ||
# | ||
# This software is distributed under the terms described by the LICENCE file | ||
# you can find at the root of the distribution bundle. | ||
# If the file is missing please request a copy by contacting | ||
# [email protected]. | ||
import logging | ||
from pathlib import Path, PurePosixPath | ||
import time | ||
|
||
import pandas as pd | ||
import tiledb | ||
from tqdm.auto import tqdm | ||
|
||
LOGGER = logging.getLogger(__name__) | ||
|
||
OMERO_TILEDB_VERSION = '3' # Version of the omero table implementation | ||
|
||
|
||
def register_table(source, local_path, remote_path=None, chunk_size=1000): | ||
LOGGER.info("Registering remote table") | ||
# Default filters from tiledb.from_pandas() | ||
write_path = Path(local_path or remote_path).with_suffix(".tiledb") | ||
# Assume the server will be running on Linux | ||
remote_path = PurePosixPath( | ||
remote_path or local_path).with_suffix(".tiledb") | ||
LOGGER.debug(f"Remote path would be {str(remote_path)}") | ||
if write_path.exists(): | ||
raise ValueError(f"Table file {write_path} already exists") | ||
# path.as_uri() exists but mangles any spaces in the path! | ||
write_path = str(write_path) | ||
# Use a default chunk size if not set | ||
chunk_size = chunk_size or 1000 | ||
LOGGER.info("Writing data to TileDB") | ||
# Export table | ||
if isinstance(source, (str, Path)): | ||
data_iterator = pd.read_csv(source, chunksize=chunk_size) | ||
total_rows = None | ||
else: | ||
data_iterator = (source.iloc[i:i + chunk_size] | ||
for i in range(0, len(source), chunk_size)) | ||
total_rows = len(source) | ||
progress_monitor = tqdm( | ||
desc="Generating TileDB file...", initial=1, dynamic_ncols=True, | ||
total=total_rows, | ||
bar_format='{desc}: {percentage:3.0f}%|{bar}| ' | ||
'{n_fmt}/{total_fmt} rows, {elapsed} {postfix}') | ||
row_idx = 0 | ||
for chunk in data_iterator: | ||
tiledb.from_pandas(write_path, chunk, sparse=True, full_domain=True, | ||
tile=10000, attr_filters=None, | ||
row_start_idx=row_idx, allows_duplicates=False, | ||
mode="append" if row_idx else "ingest") | ||
progress_monitor.update(len(chunk)) | ||
row_idx += len(chunk) | ||
progress_monitor.close() | ||
LOGGER.debug("Appending metadata to TileDB") | ||
# Append omero metadata | ||
with tiledb.open(write_path, mode="w") as array: | ||
array.meta['__version'] = OMERO_TILEDB_VERSION | ||
array.meta['__initialized'] = time.time() | ||
LOGGER.info("Table saved successfully") | ||
return write_path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,12 +8,6 @@ name = "omero2pandas" | |
description = "OMERO.tables to pandas bridge" | ||
readme = "README.md" | ||
license = {file = "LICENSE.txt"} | ||
dependencies = [ | ||
'omero-py>=5.19.5', | ||
'pandas>2', | ||
'tqdm', | ||
] | ||
requires-python = ">=3.9" | ||
authors = [ | ||
{name = "Glencoe Software, Inc.", email="[email protected]"}, | ||
] | ||
|
@@ -25,10 +19,19 @@ classifiers = [ | |
'Intended Audience :: End Users/Desktop', | ||
'Programming Language :: Python :: 3', | ||
] | ||
|
||
requires-python = ">=3.9" | ||
dependencies = [ | ||
'omero-py>=5.19.5', | ||
'pandas>2', | ||
'tqdm', | ||
] | ||
|
||
[project.optional-dependencies] | ||
token = ["omero-user-token>=0.3.0"] | ||
remote = [ | ||
"pyarrow>=19.0.0", | ||
"tiledb>=0.33.2", | ||
] | ||
|
||
[project.urls] | ||
github = "https://github.com/glencoesoftware/omero2pandas" | ||
|