diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index 445388c..2fa2de4 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -20,7 +20,7 @@ jobs: strategy: matrix: platform: [ ubuntu-latest ] - python-version: [ "3.8", "3.9", "3.10" ] + python-version: [ "3.9", "3.10", "3.11" ] steps: - uses: actions/checkout@v2 diff --git a/mdocfile/data_models.py b/mdocfile/data_models.py new file mode 100644 index 0000000..5d8817a --- /dev/null +++ b/mdocfile/data_models.py @@ -0,0 +1,193 @@ +from pydantic import field_validator, BaseModel +from pathlib import Path +from typing import List, Optional, Tuple, Union, Sequence + +from mdocfile.utils import find_section_entries, find_title_entries + + +class MdocGlobalData(BaseModel): + """Data model for global data in a SerialEM mdoc file. + + https://bio3d.colorado.edu/SerialEM/hlp/html/about_formats.htm + """ + DataMode: Optional[int] = None + ImageSize: Optional[Tuple[int, int]] = None + Montage: Optional[bool] = None + ImageSeries: Optional[int] = None + ImageFile: Optional[Path] = None + PixelSpacing: Optional[float] = None + Voltage: Optional[float] = None + + @field_validator('ImageSize', mode="before") + @classmethod + def multi_number_string_to_tuple(cls, value: str): + return tuple(value.split()) + + @classmethod + def from_lines(cls, lines: List[str]): + lines = [ + line for line in lines + if len(line) > 0 + ] + key_value_pairs = [ + line.split('=') for line in lines + if not line.startswith('[T =') + ] + key_value_pairs = [ + (k.strip(), v.strip()) for k, v in key_value_pairs + ] + data = {k: v for k, v in key_value_pairs} + return cls(**data) + + def to_string(self): + lines = [] + for k, v in self.model_dump().items(): + if v is None: + continue + if isinstance(v, tuple): + v = ' '.join(str(el) for el in v) + if v == 'nan': + v = 'NaN' + lines.append(f'{k} = {v}') + return '\n'.join(lines) + + +class MdocSectionData(BaseModel): + """Data model for section data in a SerialEM mdoc file. + + https://bio3d.colorado.edu/SerialEM/hlp/html/about_formats.htm + """ + # headers + ZValue: Optional[int] = None + MontSection: Optional[int] = None + FrameSet: Optional[int] = None + + # section data + TiltAngle: Optional[float] = None + PieceCoordinates: Optional[Tuple[float, float, int]] = None + StagePosition: Optional[Tuple[float, float]] = None + StageZ: Optional[float] = None + Magnification: Optional[float] = None + CameraLength: Optional[float] = None + MagIndex: Optional[int] = None + Intensity: Optional[float] = None + SuperMontCoords: Optional[Tuple[float, float]] = None + PixelSpacing: Optional[float] = None + ExposureDose: Optional[float] = None + DoseRate: Optional[float] = None + SpotSize: Optional[float] = None + Defocus: Optional[float] = None + TargetDefocus: Optional[float] = None + ImageShift: Optional[Tuple[float, float]] = None + RotationAngle: Optional[float] = None + ExposureTime: Optional[float] = None + Binning: Optional[float] = None + UsingCDS: Optional[bool] = None + CameraIndex: Optional[int] = None + DividedBy2: Optional[bool] = None + LowDoseConSet: Optional[int] = None + MinMaxMean: Optional[Tuple[float, float, float]] = None + PriorRecordDose: Optional[float] = None + XedgeDxy: Optional[Tuple[float, float]] = None + YedgeDxy: Optional[Tuple[float, float]] = None + XedgeDxyVS: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] = None + YedgeDxyVS: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] = None + StageOffsets: Optional[Tuple[float, float]] = None + AlignedPieceCoords: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] = None + AlignedPieceCoordsVS: Optional[ + Union[Tuple[float, float], Tuple[float, float, float]]] = None + SubFramePath: Optional[Path] = None + NumSubFrames: Optional[int] = None + FrameDosesAndNumbers: Optional[Sequence[Tuple[float, int]]] = None + DateTime: Optional[str] = None + NavigatorLabel: Optional[str] = None + FilterSlitAndLoss: Optional[Tuple[float, float]] = None + ChannelName: Optional[str] = None + MultiShotHoleAndPosition: Optional[Union[Tuple[int, int], Tuple[int, int, int]]] = None + CameraPixelSize: Optional[float] = None + Voltage: Optional[float] = None + + @field_validator( + 'PieceCoordinates', + 'SuperMontCoords', + 'ImageShift', + 'MinMaxMean', + 'StagePosition', + 'XedgeDxy', + 'YedgeDxy', + 'XedgeDxyVS', + 'YedgeDxyVS', + 'StageOffsets', + 'AlignedPieceCoords', + 'AlignedPieceCoordsVS', + 'FrameDosesAndNumbers', + 'FilterSlitAndLoss', + 'MultiShotHoleAndPosition', + mode="before") + @classmethod + def multi_number_string_to_tuple(cls, value: str): + return tuple(value.split()) + + @classmethod + def from_lines(cls, lines: List[str]): + lines = [line.strip('[]') + for line + in lines + if len(line) > 0] + key_value_pairs = [line.split('=') for line in lines] + key_value_pairs = [ + (k.strip(), v.strip()) + for k, v + in key_value_pairs + ] + lines = {k: v for k, v in key_value_pairs} + return cls(**lines) + + def to_string(self): + data = self.model_dump() + z_value = data.pop('ZValue') + lines = [f'[ZValue = {z_value}]'] + for k, v in data.items(): + if v is None: + continue + elif isinstance(v, tuple): + v = ' '.join(str(el) for el in v) + elif v == 'nan': + v = 'NaN' + lines.append(f'{k} = {v}') + return '\n'.join(lines) + + +class Mdoc(BaseModel): + titles: List[str] + global_data: MdocGlobalData + section_data: List[MdocSectionData] + + @classmethod + def from_file(cls, filename: str): + with open(filename) as file: + lines = [line.strip() for line in file.readlines()] + split_idxs = find_section_entries(lines) + split_idxs.append(len(lines)) + + header_lines = lines[0:split_idxs[0]] + title_idxs = find_title_entries(header_lines) + + titles = [header_lines[idx] for idx in title_idxs] + global_data = MdocGlobalData.from_lines(header_lines) + section_data = [ + MdocSectionData.from_lines(lines[start_idx:end_idx]) + for start_idx, end_idx + in zip(split_idxs, split_idxs[1:]) + ] + return cls(titles=titles, global_data=global_data, section_data=section_data) + + def to_string(self): + """ + Generate the string representation of the Mdoc data + """ + return '\n\n'.join([ + self.global_data.to_string(), + '\n\n'.join(self.titles), + '\n\n'.join(section.to_string() for section in self.section_data), + ]) diff --git a/mdocfile/functions.py b/mdocfile/functions.py index 6c5d126..78036c7 100644 --- a/mdocfile/functions.py +++ b/mdocfile/functions.py @@ -2,7 +2,7 @@ import pandas as pd -from .mdoc import Mdoc +from .data_models import Mdoc def read(filename: PathLike) -> pd.DataFrame: @@ -19,11 +19,11 @@ def read(filename: PathLike) -> pd.DataFrame: dataframe containing info from mdoc file """ mdoc = Mdoc.from_file(filename) - global_data = mdoc.global_data.dict() + global_data = mdoc.global_data.model_dump() section_data = { - k: [section.dict()[k] for section in mdoc.section_data] + k: [section.model_dump()[k] for section in mdoc.section_data] for k - in mdoc.section_data[0].dict().keys() + in mdoc.section_data[0].model_dump().keys() } df = pd.DataFrame(data=section_data) diff --git a/mdocfile/global_data.py b/mdocfile/global_data.py deleted file mode 100644 index 95e92e0..0000000 --- a/mdocfile/global_data.py +++ /dev/null @@ -1,50 +0,0 @@ -from pathlib import Path -from typing import Optional, Tuple, List - -from pydantic import BaseModel, validator - - -class MdocGlobalData(BaseModel): - """Data model for global data in a SerialEM mdoc file. - - https://bio3d.colorado.edu/SerialEM/hlp/html/about_formats.htm - """ - DataMode: Optional[int] - ImageSize: Optional[Tuple[int, int]] - Montage: Optional[bool] - ImageSeries: Optional[int] - ImageFile: Optional[Path] - PixelSpacing: Optional[float] - Voltage: Optional[float] - - @validator('ImageSize', pre=True) - def multi_number_string_to_tuple(cls, value: str): - return tuple(value.split()) - - @classmethod - def from_lines(cls, lines: List[str]): - lines = [ - line for line in lines - if len(line) > 0 - ] - key_value_pairs = [ - line.split('=') for line in lines - if not line.startswith('[T =') - ] - key_value_pairs = [ - (k.strip(), v.strip()) for k, v in key_value_pairs - ] - data = {k: v for k, v in key_value_pairs} - return cls(**data) - - def to_string(self): - lines = [] - for k, v in self.dict().items(): - if v is None: - continue - if isinstance(v, tuple): - v = ' '.join(str(el) for el in v) - if v == 'nan': - v = 'NaN' - lines.append(f'{k} = {v}') - return '\n'.join(lines) diff --git a/mdocfile/mdoc.py b/mdocfile/mdoc.py deleted file mode 100644 index 942f5c4..0000000 --- a/mdocfile/mdoc.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import List - -from pydantic import BaseModel -from .global_data import MdocGlobalData -from .section_data import MdocSectionData -from .utils import find_section_entries, find_title_entries - - -class Mdoc(BaseModel): - titles: List[str] - global_data: MdocGlobalData - section_data: List[MdocSectionData] - - @classmethod - def from_file(cls, filename: str): - with open(filename) as file: - lines = [line.strip() for line in file.readlines()] - split_idxs = find_section_entries(lines) - split_idxs.append(len(lines)) - - header_lines = lines[0:split_idxs[0]] - title_idxs = find_title_entries(header_lines) - - titles = [header_lines[idx] for idx in title_idxs] - global_data = MdocGlobalData.from_lines(header_lines) - section_data = [ - MdocSectionData.from_lines(lines[start_idx:end_idx]) - for start_idx, end_idx - in zip(split_idxs, split_idxs[1:]) - ] - return cls(titles=titles, global_data=global_data, section_data=section_data) - - def to_string(self): - """ - Generate the string representation of the Mdoc data - """ - return '\n\n'.join([ - self.global_data.to_string(), - '\n\n'.join(self.titles), - '\n\n'.join(section.to_string() for section in self.section_data), - ]) diff --git a/mdocfile/section_data.py b/mdocfile/section_data.py deleted file mode 100644 index a57e741..0000000 --- a/mdocfile/section_data.py +++ /dev/null @@ -1,108 +0,0 @@ -from pathlib import Path -from typing import Optional, Tuple, Union, Sequence, List - -from pydantic import BaseModel, validator - - -class MdocSectionData(BaseModel): - """Data model for section data in a SerialEM mdoc file. - - https://bio3d.colorado.edu/SerialEM/hlp/html/about_formats.htm - """ - # headers - ZValue: Optional[int] - MontSection: Optional[int] - FrameSet: Optional[int] - - # section data - TiltAngle: Optional[float] - PieceCoordinates: Optional[Tuple[float, float, int]] - StagePosition: Optional[Tuple[float, float]] - StageZ: Optional[float] - Magnification: Optional[float] - CameraLength: Optional[float] - MagIndex: Optional[int] - Intensity: Optional[float] - SuperMontCoords: Optional[Tuple[float, float]] - PixelSpacing: Optional[float] - ExposureDose: Optional[float] - DoseRate: Optional[float] - SpotSize: Optional[float] - Defocus: Optional[float] - TargetDefocus: Optional[float] - ImageShift: Optional[Tuple[float, float]] - RotationAngle: Optional[float] - ExposureTime: Optional[float] - Binning: Optional[float] - UsingCDS: Optional[bool] - CameraIndex: Optional[int] - DividedBy2: Optional[bool] - LowDoseConSet: Optional[int] - MinMaxMean: Optional[Tuple[float, float, float]] - PriorRecordDose: Optional[float] - XedgeDxy: Optional[Tuple[float, float]] - YedgeDxy: Optional[Tuple[float, float]] - XedgeDxyVS: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] - YedgeDxyVS: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] - StageOffsets: Optional[Tuple[float, float]] - AlignedPieceCoords: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] - AlignedPieceCoordsVS: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] - SubFramePath: Optional[Path] - NumSubFrames: Optional[int] - FrameDosesAndNumbers: Optional[Sequence[Tuple[float, int]]] - DateTime: Optional[str] - NavigatorLabel: Optional[str] - FilterSlitAndLoss: Optional[Tuple[float, float]] - ChannelName: Optional[str] - MultiShotHoleAndPosition: Optional[Union[Tuple[int, int], Tuple[int, int, int]]] - CameraPixelSize: Optional[float] - Voltage: Optional[float] - - @validator( - 'PieceCoordinates', - 'SuperMontCoords', - 'ImageShift', - 'MinMaxMean', - 'StagePosition', - 'XedgeDxy', - 'YedgeDxy', - 'XedgeDxyVS', - 'YedgeDxyVS', - 'StageOffsets', - 'AlignedPieceCoords', - 'AlignedPieceCoordsVS', - 'FrameDosesAndNumbers', - 'FilterSlitAndLoss', - 'MultiShotHoleAndPosition', - pre=True) - def multi_number_string_to_tuple(cls, value: str): - return tuple(value.split()) - - @classmethod - def from_lines(cls, lines: List[str]): - lines = [line.strip('[]') - for line - in lines - if len(line) > 0] - key_value_pairs = [line.split('=') for line in lines] - key_value_pairs = [ - (k.strip(), v.strip()) - for k, v - in key_value_pairs - ] - lines = {k: v for k, v in key_value_pairs} - return cls(**lines) - - def to_string(self): - data = self.dict() - z_value = data.pop('ZValue') - lines = [f'[ZValue = {z_value}]'] - for k, v in data.items(): - if v is None: - continue - elif isinstance(v, tuple): - v = ' '.join(str(el) for el in v) - elif v == 'nan': - v = 'NaN' - lines.append(f'{k} = {v}') - return '\n'.join(lines) diff --git a/setup.cfg b/setup.cfg index 71f22d1..b7c6c1a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,20 +13,21 @@ classifiers = License :: OSI Approved :: BSD License Natural Language :: English Programming Language :: Python :: 3 - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 project_urls = Source Code =https://github.com/alisterburt/mdocfile [options] zip_safe = False packages = find: -python_requires = >=3.8 +python_requires = >=3.9 setup_requires = setuptools_scm install_requires = pandas - pydantic + pydantic>=2 [options.extras_require] testing = diff --git a/tests/test_section_data.py b/tests/test_data_models.py similarity index 56% rename from tests/test_section_data.py rename to tests/test_data_models.py index 8139b91..cbe0d5d 100644 --- a/tests/test_section_data.py +++ b/tests/test_data_models.py @@ -1,6 +1,23 @@ from pathlib import Path +from tempfile import NamedTemporaryFile + +from mdocfile.data_models import MdocGlobalData, MdocSectionData, Mdoc + +GLOBAL_DATA_EXAMPLE = r"""PixelSpacing = 5.4 +ImageFile = TS_01.mrc +ImageSize = 924 958 +DataMode = 1 +""" + + +def test_global_data_from_lines(): + lines = GLOBAL_DATA_EXAMPLE.split('\n') + data = MdocGlobalData.from_lines(lines) + assert isinstance(data, MdocGlobalData) + assert data.PixelSpacing == 5.4 + assert data.ImageFile == Path('TS_01.mrc') + assert data.DataMode == 1 -from mdocfile.section_data import MdocSectionData SECTION_DATA_EXAMPLE = r"""[ZValue = 0] TiltAngle = 0.000999877 @@ -52,3 +69,23 @@ def test_section_data_from_lines(): assert data.SubFramePath == Path(r'D:\DATA\Flo\HGK149_20151130\frames\TS_01_000_0.0.mrc') assert data.NumSubFrames == 8 assert data.DateTime == '30-Nov-15 15:21:38' + + +def test_mdoc_from_tilt_series_mdoc_file(tilt_series_mdoc_file): + mdoc = Mdoc.from_file(tilt_series_mdoc_file) + assert isinstance(mdoc, Mdoc) + assert len(mdoc.titles) == 2 + assert mdoc.global_data.PixelSpacing == 5.4 + assert len(mdoc.section_data) == 41 + + +def test_to_string_is_valid_mdoc(tilt_series_mdoc_file): + mdoc = Mdoc.from_file(tilt_series_mdoc_file) + with NamedTemporaryFile() as tmp: + tmp.write(mdoc.to_string().encode()) + mdoc2 = Mdoc.from_file(tmp.name) + mdoc_dict = mdoc.section_data[0].model_dump() + mdoc2_dict = mdoc2.section_data[0].model_dump() + for (k1, v1), (k2, v2) in zip(mdoc_dict.items(), mdoc2_dict.items()): + assert v1 == v2 + assert k1 == k2 diff --git a/tests/test_global_data.py b/tests/test_global_data.py deleted file mode 100644 index 67e6e9f..0000000 --- a/tests/test_global_data.py +++ /dev/null @@ -1,17 +0,0 @@ -from mdocfile.global_data import MdocGlobalData -from pathlib import Path - -GLOBAL_DATA_EXAMPLE = r"""PixelSpacing = 5.4 -ImageFile = TS_01.mrc -ImageSize = 924 958 -DataMode = 1 -""" - - -def test_global_data_from_lines(): - lines = GLOBAL_DATA_EXAMPLE.split('\n') - data = MdocGlobalData.from_lines(lines) - assert isinstance(data, MdocGlobalData) - assert data.PixelSpacing == 5.4 - assert data.ImageFile == Path('TS_01.mrc') - assert data.DataMode == 1 diff --git a/tests/test_mdoc.py b/tests/test_mdoc.py deleted file mode 100644 index e986019..0000000 --- a/tests/test_mdoc.py +++ /dev/null @@ -1,19 +0,0 @@ -from mdocfile.mdoc import Mdoc -from tempfile import NamedTemporaryFile - - -def test_mdoc_from_tilt_series_mdoc_file(tilt_series_mdoc_file): - mdoc = Mdoc.from_file(tilt_series_mdoc_file) - assert isinstance(mdoc, Mdoc) - assert len(mdoc.titles) == 2 - assert mdoc.global_data.PixelSpacing == 5.4 - assert len(mdoc.section_data) == 41 - - -def test_to_string_is_valid_mdoc(tilt_series_mdoc_file): - mdoc = Mdoc.from_file(tilt_series_mdoc_file) - with NamedTemporaryFile() as tmp: - tmp.write(mdoc.to_string().encode()) - mdoc2 = Mdoc.from_file(tmp.name) - for (k1, v1), (k2, v2) in zip(mdoc.section_data[0].dict().items(), mdoc2.section_data[0].dict().items()): - assert(v1 == v2), k1