Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: build

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install uv
uses: astral-sh/setup-uv@v4

- name: Install dependencies
run: uv sync

- name: Run pre-commit
run: |
uv run pre-commit run --all-files
- name: Run tests
run: |
uv run python -m unittest discover -s tests -p "*.py"
publish:
needs: test
runs-on: ubuntu-latest
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
permissions:
id-token: write
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.9'

- name: Install uv
uses: astral-sh/setup-uv@v4

- name: Install dependencies
run: uv sync

- name: Build package
run: uv build

- name: Publish to PyPI (production)
run: |
uv publish --trusted-publishing always
Comment on lines +62 to +64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make the publish work on adding a release tag instead? (same as molviewspec) I don't want a publish on each commit to main.

3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
**/tests/**/*.bcif
tests/*.bcif

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't be BCIF files ignored altogether?


### VisualStudioCode template
.vscode/*
Expand Down Expand Up @@ -227,4 +227,3 @@ dmypy.json

# Cython debug symbols
cython_debug/

15 changes: 15 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.2.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.3
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-format
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ A library for reading and writing (Binary)CIF files in Python.
## Linting

```
autoflake --remove-all-unused-imports --remove-unused-variables --ignore-init-module-imports -ir . && isort . && black .
```
pre-commit run -a
```
1 change: 1 addition & 0 deletions ciftools/binary/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any, Dict, List, Optional, Union

import numpy as np

from ciftools.binary.decoder import decode_cif_data
from ciftools.binary.encoded_data import EncodedCIFCategory, EncodedCIFColumn, EncodedCIFFile
from ciftools.models.data import CIFCategory, CIFColumn, CIFDataBlock, CIFFile, CIFValuePresenceEnum
Expand Down
9 changes: 5 additions & 4 deletions ciftools/binary/decoder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

from ciftools.binary.data_types import DataType
from ciftools.binary.encoded_data import EncodedCIFData
from ciftools.binary.encoding_types import (
Expand Down Expand Up @@ -59,10 +60,10 @@ def _decode_integer_packing_signed(data: np.ndarray, encoding: IntegerPackingEnc
value = 0
t = data[i]
while t == upper_limit or t == lower_limit:
value += t
value += int(t)
i += 1
t = data[i]
value += t
value += int(t)
output[j] = value
i += 1
j += 1
Expand All @@ -80,10 +81,10 @@ def _decode_integer_packing_unsigned(data: np.ndarray, encoding: IntegerPackingE
value = 0
t = data[i]
while t == upper_limit:
value += t
value += int(t)
i += 1
t = data[i]
value += t
value += int(t)
output[j] = value
i += 1
j += 1
Expand Down
1 change: 1 addition & 0 deletions ciftools/binary/encoded_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Optional, TypedDict, Union

import numpy as np

from ciftools.binary.encoding_types import EncodingBase


Expand Down
13 changes: 6 additions & 7 deletions ciftools/binary/encoder.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import math
import sys
from typing import Any, Dict, List, Protocol, Tuple, Union
from typing import Any, List, Protocol, Tuple, Union

import numpy as np
from numba import jit

from ciftools.binary.data_types import DataType, DataTypeEnum
from ciftools.binary.encoded_data import EncodedCIFData
from ciftools.binary.encoding_types import (
Expand All @@ -15,12 +17,10 @@
RunLengthEncoding,
StringArrayEncoding,
)
from numba import jit


class BinaryCIFEncoder(Protocol):
def encode(self, data: Any) -> EncodedCIFData:
...
def encode(self, data: Any) -> EncodedCIFData: ...


class ComposeEncoders(BinaryCIFEncoder):
Expand Down Expand Up @@ -109,7 +109,6 @@ def encode(self, data: np.ndarray, *args, **kwargs) -> EncodedCIFData:

class IntegerPacking(BinaryCIFEncoder):
def encode(self, data: np.ndarray) -> EncodedCIFData:

# TODO: must be 32bit integer?
packing = _determine_packing(data)
if packing.bytesPerElement == 4:
Expand Down Expand Up @@ -325,8 +324,8 @@ def _pack_strings(data: List[str]) -> Tuple[str, np.ndarray, np.ndarray]:
str_map = {s: i for i, s in enumerate(strings)}
string_data = "".join(strings)

indices = np.array([str_map[s] for s in data], dtype='<i4')
offset_data = np.empty(len(strings) + 1, dtype='<i4')
indices = np.array([str_map[s] for s in data], dtype="<i4")
offset_data = np.empty(len(strings) + 1, dtype="<i4")
offset_data[0] = 0
np.cumsum([len(s) for s in strings], out=offset_data[1:])

Expand Down
5 changes: 3 additions & 2 deletions ciftools/binary/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import msgpack
import numpy as np

from ciftools.binary.encoded_data import (
EncodedCIFCategory,
EncodedCIFColumn,
Expand Down Expand Up @@ -86,14 +87,14 @@ def _encode_field(field: CIFFieldDesc, data: List[_DataWrapper], total_count: in
category_array = field.value_array and field.value_array(d)
if category_array is not None:
if len(category_array) != category.count:
raise ValueError(f"provided values array must have the same length as the category count field")
raise ValueError("provided values array must have the same length as the category count field")

array[offset : offset + category.count] = category_array # type: ignore

category_mask = field.presence_array and field.presence_array(d)
if category_mask is not None:
if len(category_mask) != category.count:
raise ValueError(f"provided mask array must have the same length as the category count field")
raise ValueError("provided mask array must have the same length as the category count field")
mask[offset : offset + category.count] = category_mask

offset += category.count
Expand Down
60 changes: 20 additions & 40 deletions ciftools/models/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,17 @@ class CIFValuePresenceEnum(IntEnum):


class CIFColumn(Protocol):
def get_string(self, row: int) -> Optional[str]:
...
def get_string(self, row: int) -> Optional[str]: ...

def get_integer(self, row: int) -> int:
...
def get_integer(self, row: int) -> int: ...

def get_float(self, row: int) -> float:
...
def get_float(self, row: int) -> float: ...

def get_value_presence(self, row: int) -> CIFValuePresenceEnum:
...
def get_value_presence(self, row: int) -> CIFValuePresenceEnum: ...

def are_values_equal(self, row_a: int, row_b: int) -> bool:
...
def are_values_equal(self, row_a: int, row_b: int) -> bool: ...

def string_equals(self, row: int, value: str) -> bool:
...
def string_equals(self, row: int, value: str) -> bool: ...

def as_ndarray(
self, *, dtype: Optional[Union[np.dtype, str]] = None, start: Optional[int] = None, end: Optional[int] = None
Expand All @@ -40,11 +34,9 @@ def as_ndarray(
"""
...

def __getitem__(self, idx: Any) -> Any:
...
def __getitem__(self, idx: Any) -> Any: ...

def __len__(self) -> int:
...
def __len__(self) -> int: ...

@property
def value_presences(self) -> Optional[np.ndarray]:
Expand All @@ -56,29 +48,23 @@ def value_presences(self) -> Optional[np.ndarray]:

class CIFCategory(Protocol):
@property
def name(self) -> str:
...
def name(self) -> str: ...

@property
def n_rows(self) -> int:
...
def n_rows(self) -> int: ...

@property
def n_columns(self) -> int:
...
def n_columns(self) -> int: ...

@property
def field_names(self) -> List[str]:
...
def field_names(self) -> List[str]: ...

def __getattr__(self, name: str) -> CIFColumn:
return self[name]

def __getitem__(self, name: str) -> CIFColumn:
...
def __getitem__(self, name: str) -> CIFColumn: ...

def __contains__(self, key: str) -> bool:
...
def __contains__(self, key: str) -> bool: ...

# Category Helpers
def get_matrix(self, field: str, rows: int, cols: int, row_index: int) -> np.ndarray:
Expand Down Expand Up @@ -120,19 +106,15 @@ class CIFDataBlock(Protocol):
def __getattr__(self, name: str) -> CIFCategory:
return self[name]

def __getitem__(self, name: str) -> CIFCategory:
...
def __getitem__(self, name: str) -> CIFCategory: ...

def __contains__(self, key: str):
...
def __contains__(self, key: str): ...

@property
def header(self) -> str:
...
def header(self) -> str: ...

@property
def categories(self) -> Dict[str, CIFCategory]:
...
def categories(self) -> Dict[str, CIFCategory]: ...


class CIFFile(Protocol):
Expand All @@ -145,12 +127,10 @@ def __getitem__(self, index_or_name: Union[int, str]) -> CIFDataBlock:
def __getattr__(self, name: str) -> CIFDataBlock:
return self[name]

def __len__(self) -> int:
...
def __len__(self) -> int: ...

def __contains__(self, key: str) -> bool:
return key in self._block_map

@property
def data_blocks(self) -> List[CIFDataBlock]:
...
def data_blocks(self) -> List[CIFDataBlock]: ...
19 changes: 7 additions & 12 deletions ciftools/models/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Callable, Collection, Generic, List, Optional, Protocol, TypeVar, Union

import numpy as np

from ciftools.binary.encoder import BYTE_ARRAY, STRING_ARRAY, BinaryCIFEncoder
from ciftools.models.data import CIFValuePresenceEnum

Expand Down Expand Up @@ -87,24 +88,18 @@ def string_array(

class CIFCategoryDesc(Protocol):
@property
def name(self) -> str:
...
def name(self) -> str: ...

@staticmethod
def get_row_count(data: Any) -> int:
...
def get_row_count(data: Any) -> int: ...

@staticmethod
def get_field_descriptors(data: Any) -> Collection[CIFFieldDesc]:
...
def get_field_descriptors(data: Any) -> Collection[CIFFieldDesc]: ...


class CIFWriter(Protocol):
def start_data_block(self, header: str) -> None:
...
def start_data_block(self, header: str) -> None: ...

def write_category(self, category: CIFCategoryDesc, data: List[Any]) -> None:
...
def write_category(self, category: CIFCategoryDesc, data: List[Any]) -> None: ...

def encode(self) -> Union[str, bytes]:
...
def encode(self) -> Union[str, bytes]: ...
1 change: 1 addition & 0 deletions ciftools/serialization.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import msgpack

from ciftools.binary.data import BinaryCIFFile
from ciftools.binary.writer import BinaryCIFWriter
from ciftools.models.data import CIFFile
Expand Down
2 changes: 1 addition & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ ignore_missing_imports = True
ignore_missing_imports = True

[mypy-msgpack.*]
ignore_missing_imports = True
ignore_missing_imports = True
Loading