Skip to content

Commit

Permalink
minor refactor and bump pydantic (#17)
Browse files Browse the repository at this point in the history
* bump pydantic

* bump pydantic

* move to py3.9+ (NEP29)
  • Loading branch information
alisterburt authored Jul 5, 2023
1 parent 3781d5a commit a7015de
Show file tree
Hide file tree
Showing 10 changed files with 240 additions and 244 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_and_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
matrix:
platform: [ ubuntu-latest ]
python-version: [ "3.8", "3.9", "3.10" ]
python-version: [ "3.9", "3.10", "3.11" ]

steps:
- uses: actions/checkout@v2
Expand Down
193 changes: 193 additions & 0 deletions mdocfile/data_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
from pydantic import field_validator, BaseModel
from pathlib import Path
from typing import List, Optional, Tuple, Union, Sequence

from mdocfile.utils import find_section_entries, find_title_entries


class MdocGlobalData(BaseModel):
"""Data model for global data in a SerialEM mdoc file.
https://bio3d.colorado.edu/SerialEM/hlp/html/about_formats.htm
"""
DataMode: Optional[int] = None
ImageSize: Optional[Tuple[int, int]] = None
Montage: Optional[bool] = None
ImageSeries: Optional[int] = None
ImageFile: Optional[Path] = None
PixelSpacing: Optional[float] = None
Voltage: Optional[float] = None

@field_validator('ImageSize', mode="before")
@classmethod
def multi_number_string_to_tuple(cls, value: str):
return tuple(value.split())

@classmethod
def from_lines(cls, lines: List[str]):
lines = [
line for line in lines
if len(line) > 0
]
key_value_pairs = [
line.split('=') for line in lines
if not line.startswith('[T =')
]
key_value_pairs = [
(k.strip(), v.strip()) for k, v in key_value_pairs
]
data = {k: v for k, v in key_value_pairs}
return cls(**data)

def to_string(self):
lines = []
for k, v in self.model_dump().items():
if v is None:
continue
if isinstance(v, tuple):
v = ' '.join(str(el) for el in v)
if v == 'nan':
v = 'NaN'
lines.append(f'{k} = {v}')
return '\n'.join(lines)


class MdocSectionData(BaseModel):
"""Data model for section data in a SerialEM mdoc file.
https://bio3d.colorado.edu/SerialEM/hlp/html/about_formats.htm
"""
# headers
ZValue: Optional[int] = None
MontSection: Optional[int] = None
FrameSet: Optional[int] = None

# section data
TiltAngle: Optional[float] = None
PieceCoordinates: Optional[Tuple[float, float, int]] = None
StagePosition: Optional[Tuple[float, float]] = None
StageZ: Optional[float] = None
Magnification: Optional[float] = None
CameraLength: Optional[float] = None
MagIndex: Optional[int] = None
Intensity: Optional[float] = None
SuperMontCoords: Optional[Tuple[float, float]] = None
PixelSpacing: Optional[float] = None
ExposureDose: Optional[float] = None
DoseRate: Optional[float] = None
SpotSize: Optional[float] = None
Defocus: Optional[float] = None
TargetDefocus: Optional[float] = None
ImageShift: Optional[Tuple[float, float]] = None
RotationAngle: Optional[float] = None
ExposureTime: Optional[float] = None
Binning: Optional[float] = None
UsingCDS: Optional[bool] = None
CameraIndex: Optional[int] = None
DividedBy2: Optional[bool] = None
LowDoseConSet: Optional[int] = None
MinMaxMean: Optional[Tuple[float, float, float]] = None
PriorRecordDose: Optional[float] = None
XedgeDxy: Optional[Tuple[float, float]] = None
YedgeDxy: Optional[Tuple[float, float]] = None
XedgeDxyVS: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] = None
YedgeDxyVS: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] = None
StageOffsets: Optional[Tuple[float, float]] = None
AlignedPieceCoords: Optional[Union[Tuple[float, float], Tuple[float, float, float]]] = None
AlignedPieceCoordsVS: Optional[
Union[Tuple[float, float], Tuple[float, float, float]]] = None
SubFramePath: Optional[Path] = None
NumSubFrames: Optional[int] = None
FrameDosesAndNumbers: Optional[Sequence[Tuple[float, int]]] = None
DateTime: Optional[str] = None
NavigatorLabel: Optional[str] = None
FilterSlitAndLoss: Optional[Tuple[float, float]] = None
ChannelName: Optional[str] = None
MultiShotHoleAndPosition: Optional[Union[Tuple[int, int], Tuple[int, int, int]]] = None
CameraPixelSize: Optional[float] = None
Voltage: Optional[float] = None

@field_validator(
'PieceCoordinates',
'SuperMontCoords',
'ImageShift',
'MinMaxMean',
'StagePosition',
'XedgeDxy',
'YedgeDxy',
'XedgeDxyVS',
'YedgeDxyVS',
'StageOffsets',
'AlignedPieceCoords',
'AlignedPieceCoordsVS',
'FrameDosesAndNumbers',
'FilterSlitAndLoss',
'MultiShotHoleAndPosition',
mode="before")
@classmethod
def multi_number_string_to_tuple(cls, value: str):
return tuple(value.split())

@classmethod
def from_lines(cls, lines: List[str]):
lines = [line.strip('[]')
for line
in lines
if len(line) > 0]
key_value_pairs = [line.split('=') for line in lines]
key_value_pairs = [
(k.strip(), v.strip())
for k, v
in key_value_pairs
]
lines = {k: v for k, v in key_value_pairs}
return cls(**lines)

def to_string(self):
data = self.model_dump()
z_value = data.pop('ZValue')
lines = [f'[ZValue = {z_value}]']
for k, v in data.items():
if v is None:
continue
elif isinstance(v, tuple):
v = ' '.join(str(el) for el in v)
elif v == 'nan':
v = 'NaN'
lines.append(f'{k} = {v}')
return '\n'.join(lines)


class Mdoc(BaseModel):
titles: List[str]
global_data: MdocGlobalData
section_data: List[MdocSectionData]

@classmethod
def from_file(cls, filename: str):
with open(filename) as file:
lines = [line.strip() for line in file.readlines()]
split_idxs = find_section_entries(lines)
split_idxs.append(len(lines))

header_lines = lines[0:split_idxs[0]]
title_idxs = find_title_entries(header_lines)

titles = [header_lines[idx] for idx in title_idxs]
global_data = MdocGlobalData.from_lines(header_lines)
section_data = [
MdocSectionData.from_lines(lines[start_idx:end_idx])
for start_idx, end_idx
in zip(split_idxs, split_idxs[1:])
]
return cls(titles=titles, global_data=global_data, section_data=section_data)

def to_string(self):
"""
Generate the string representation of the Mdoc data
"""
return '\n\n'.join([
self.global_data.to_string(),
'\n\n'.join(self.titles),
'\n\n'.join(section.to_string() for section in self.section_data),
])
8 changes: 4 additions & 4 deletions mdocfile/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pandas as pd

from .mdoc import Mdoc
from .data_models import Mdoc


def read(filename: PathLike) -> pd.DataFrame:
Expand All @@ -19,11 +19,11 @@ def read(filename: PathLike) -> pd.DataFrame:
dataframe containing info from mdoc file
"""
mdoc = Mdoc.from_file(filename)
global_data = mdoc.global_data.dict()
global_data = mdoc.global_data.model_dump()
section_data = {
k: [section.dict()[k] for section in mdoc.section_data]
k: [section.model_dump()[k] for section in mdoc.section_data]
for k
in mdoc.section_data[0].dict().keys()
in mdoc.section_data[0].model_dump().keys()
}
df = pd.DataFrame(data=section_data)

Expand Down
50 changes: 0 additions & 50 deletions mdocfile/global_data.py

This file was deleted.

41 changes: 0 additions & 41 deletions mdocfile/mdoc.py

This file was deleted.

Loading

0 comments on commit a7015de

Please sign in to comment.