Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Python Interface files #307

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
include README.md
include LICENSE
include pyensembl/*.pyi
20 changes: 20 additions & 0 deletions pyensembl/common.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, List, Tuple, Union

def dump_pickle(obj: object, filepath: str) -> None: ...
def load_pickle(filepath: str) -> object: ...
def _memoize_cache_key(
args: Union[List, Tuple], kwargs: Dict[str, Union[List, Tuple]]
): ...
def memoize(fn: function) -> function: ...
131 changes: 131 additions & 0 deletions pyensembl/database.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union
from .common import memoize

if TYPE_CHECKING:
import logging
import polars
from .locus import Locus
from sqlite3 import Connection

# any time we update the database schema, increment this version number
DATABASE_SCHEMA_VERSION = 3

logger: logging.Logger = ...

class Database(object):
def __init__(
self,
gtf_path: str,
install_string: Optional[str] = None,
cache_directory_path: Optional[str] = None,
restrict_gtf_columns: Optional[List[str]] = None,
restrict_gtf_features: Optional[List[str]] = None,
) -> None: ...
def __eq__(self, other) -> bool: ...
def __str__(self) -> str: ...
def __hash__(self) -> int: ...
@property
def local_db_filename(self) -> str: ...
@property
def local_db_path(self) -> str: ...
def _all_possible_indices(self, column_names: str) -> List[List[str]]: ...

PRIMARY_KEY_COLUMNS = {"gene": "gene_id", "transcript": "transcript_id"}

def _get_primary_key(
self, feature_name: str, feature_df: polars.DataFrame
) -> str: ...
def _feature_indices(
self, all_index_groups: List, primary_key: str, feature_df: polars.DataFrame
) -> List: ...
def create(self, overwrite: bool = False) -> Connection: ...
def _get_connection(self) -> Connection: ...
@property
def connection(self) -> Connection: ...
def connect_or_create(self, overwrite: bool = False) -> Connection: ...
def columns(self, table_name: str) -> List[str]: ...
def column_exists(self, table_name: str, column_name: str) -> bool: ...
def column_values_at_locus(
self,
column_name: str,
feature: str,
contig: str,
position: int,
end: Optional[int] = None,
strand: Literal["+", "-"] = None,
distinct: bool = False,
sorted: bool = False,
) -> List[Any]: ...
def distinct_column_values_at_locus(
self,
column: str,
feature: str,
contig: str,
position: int,
end: Optional[int] = None,
strand: Literal["+", "-"] = None,
) -> List[Any]: ...
def run_sql_query(
self, sql: str, required: bool = False, query_params: List[Union[str, int]] = []
) -> List[Any]: ...
@memoize
def query(
self,
select_column_names: List[str],
filter_column: str,
filter_value: str,
feature: Literal["transcript", "gene", "exon", "CDS"],
distinct: bool = False,
required: bool = False,
) -> List[Any]: ...
def query_one(
self,
select_column_names: List[str],
filter_column: str,
filter_value: str,
feature: Literal["transcript", "gene", "exon", "CDS"],
distinct: bool = False,
required: bool = False,
): ...
@memoize
def query_feature_values(
self,
column: str,
feature: Literal["transcript", "gene", "exon", "CDS"],
distinct: bool = True,
contig: Optional[str] = None,
strand: Optional[Literal["+", "-"]] = None,
) -> List[str]: ...
def query_distinct_on_contig(
self,
column_name: str,
feature: Literal["transcript", "gene", "exon", "CDS"],
contig: str,
) -> List[str]: ...
def query_loci(
self,
filter_column: str,
filter_value: str,
feature: Literal["transcript", "gene", "exon", "CDS"],
) -> List[Locus]: ...
def query_locus(
self,
filter_column: str,
filter_value: str,
feature: Literal["transcript", "gene", "exon", "CDS"],
) -> Locus: ...
def _load_gtf_as_dataframe(
self, usecols: Optional[List[str]] = None, features: Optional[List[str]] = None
) -> polars.DataFrame: ...
78 changes: 78 additions & 0 deletions pyensembl/download_cache.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union

if TYPE_CHECKING:
import logging

logger: logging.Logger = ...

CACHE_BASE_SUBDIR = "pyensembl"
CACHE_DIR_ENV_KEY = "PYENSEMBL_CACHE_DIR"

def cache_subdirectory(
reference_name: Optional[str] = None,
annotation_name: Optional[str] = None,
annotation_version: Optional[Union[str, int]] = None,
) -> str: ...

class MissingRemoteFile(Exception):
def __init__(self, url: str) -> None: ...

class MissingLocalFile(Exception):
def __init__(self, path: str) -> None: ...
def __str__(self) -> str: ...

class DownloadCache(object):
def __init__(
self,
reference_name: str,
annotation_name: str,
annotation_version: Union[str, int] = None,
decompress_on_download: bool = False,
copy_local_files_to_cache: bool = False,
install_string_function: Optional[function] = None,
cache_directory_path: Optional[str] = None,
) -> None: ...
@property
def cache_directory_path(self) -> str: ...
def _fields(self) -> Tuple[Tuple[str, Union[str, int, bool]]]: ...
def __eq__(self, other) -> bool: ...
def __hash__(self) -> int: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...
def is_url_format(self, path_or_url: str) -> bool: ...
def _remove_compression_suffix_if_present(self, filename: str) -> str: ...
def cached_path(self, path_or_url: str) -> str: ...
def _download_if_necessary(
self, url: str, download_if_missing: bool, overwrite: bool
) -> str: ...
def _copy_if_necessary(self, local_path: str, overwrite: bool) -> str: ...
def download_or_copy_if_necessary(
self,
path_or_url: str,
download_if_missing: bool = False,
overwrite: bool = False,
) -> str: ...
def _raise_missing_file_error(self, missing_urls_dict: Dict) -> None: ...
def local_path_or_install_error(
self,
field_name: str,
path_or_url: str,
download_if_missing: bool = False,
overwrite: bool = False,
) -> str: ...
def delete_cached_files(
self, prefixes: List[str] = [], suffixes: List[str] = []
) -> None: ...
def delete_cache_directory(self) -> None: ...
51 changes: 51 additions & 0 deletions pyensembl/ensembl_release.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union, TYPE_CHECKING
from typing_extensions import deprecated

from .genome import Genome
from .ensembl_versions import MAX_ENSEMBL_RELEASE
from .species import human
from .ensembl_url_templates import ENSEMBL_FTP_SERVER

if TYPE_CHECKING:
from .species import Species

class EnsemblRelease(Genome):
@classmethod
def normalize_init_values(
cls, release: Union[int, str], species: Union[Species, str], server: str
): ...
@classmethod
def cached(
cls,
release: int = MAX_ENSEMBL_RELEASE,
species: Union[str, Species] = human,
server: str = ENSEMBL_FTP_SERVER,
): ...
def __init__(
self,
release: int = MAX_ENSEMBL_RELEASE,
species: Union[str, Species] = human,
server: str = ENSEMBL_FTP_SERVER,
): ...
def install_string(self) -> str: ...
def __str__(self) -> str: ...
def __eq__(self, other) -> bool: ...
def __hash__(self) -> int: ...
def to_dict(self) -> dict: ...
@classmethod
def from_dict(cls, state_dict: dict) -> "EnsemblRelease": ...

@deprecated("Use pyensembl.ensembl_release.EnsemblRelease.cached instead.")
def cached_release(release, species="human") -> EnsemblRelease: ...
82 changes: 82 additions & 0 deletions pyensembl/ensembl_url_templates.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Templates for URLs and paths to specific relase, species, and file type
on the Ensembl ftp server.

For example, the human chromosomal DNA sequences for release 78 are in:

https://ftp.ensembl.org/pub/release-78/fasta/homo_sapiens/dna/

"""

from typing import Literal, Tuple, Union, TYPE_CHECKING
if TYPE_CHECKING:
from .species import Species

ENSEMBL_FTP_SERVER: str = "https://ftp.ensembl.org"
ENSEMBL_PLANTS_FTP_SERVER: str = "https://ftp.ensemblgenomes.ebi.ac.uk/"

FASTA_SUBDIR_TEMPLATE: str = "/pub/release-%(release)d/fasta/%(species)s/%(type)s/"
PLANTS_FASTA_SUBDIR_TEMPLATE: str = (
"/pub/release-%(release)d/plants/fasta/%(species)s/%(type)s/"
)
GTF_SUBDIR_TEMPLATE: str = "/pub/release-%(release)d/gtf/%(species)s/"
PLANTS_GTF_SUBDIR_TEMPLATE: str = "/pub/release-%(release)d/plants/gtf/%(species)s/"

lPlants: Tuple[str] = ("arabidopsis_thaliana", "arabidopsis")

def normalize_release_properties(
ensembl_release: Union[str, int], species: Union[str, Species]
) -> Tuple[int, str, str]: ...

GTF_FILENAME_TEMPLATE: str = "%(Species)s.%(reference)s.%(release)d.gtf.gz"

def make_gtf_filename(
ensembl_release: Union[str, int], species: Union[str, Species]
) -> str: ...
def make_gtf_url(
ensembl_release: Union[str, int],
species: Union[str, Species],
server: str = ENSEMBL_FTP_SERVER,
gtf_subdir=GTF_SUBDIR_TEMPLATE,
) -> str: ...

OLD_FASTA_FILENAME_TEMPLATE: str = (
"%(Species)s.%(reference)s.%(release)d.%(sequence_type)s.all.fa.gz"
)

OLD_FASTA_FILENAME_TEMPLATE_NCRNA: str = (
"%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz"
)

NEW_FASTA_FILENAME_TEMPLATE: str = (
"%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz"
)

NEW_FASTA_FILENAME_TEMPLATE_NCRNA: str = "%(Species)s.%(reference)s.ncrna.fa.gz"

def make_fasta_filename(
ensembl_release: Union[str, int],
species: Union[str, Species],
sequence_type: Literal["ncrna", "cdna", "cds", "pep", "dna", "dna_index"],
is_plant: bool,
) -> str: ...
def make_fasta_url(
ensembl_release: Union[str, int],
species: Union[str, Species],
sequence_type: Literal["ncrna", "cdna", "cds", "pep", "dna", "dna_index"],
is_plant: bool,
server: str = ENSEMBL_FTP_SERVER,
fasta_subdir=FASTA_SUBDIR_TEMPLATE,
) -> str: ...
19 changes: 19 additions & 0 deletions pyensembl/ensembl_versions.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union

MIN_ENSEMBL_RELEASE: int = 40
MAX_ENSEMBL_RELEASE: int = 111
MAX_PLANTS_ENSEMBL_RELEASE: int = 58

def check_release_number(release: Union[str, int], squeeze: bool = False) -> int: ...
Loading