Skip to content

Commit

Permalink
indent multiline docstring :param, :returns
Browse files Browse the repository at this point in the history
  • Loading branch information
brambg committed Mar 1, 2023
1 parent 4a190d1 commit a8059b7
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 10 deletions.
3 changes: 2 additions & 1 deletion pagexml/analysis/layout_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def interpolate_points(p1: Tuple[int, int], p2: Tuple[int, int],
step: int = 50) -> Generator[Dict[int, int], None, None]:
"""Determine the x coordinates between a pair of points on a baseline
and calculate their corresponding y coordinates.
:param p1: a 2D point
:type p1: Tuple[int, int]
:param p2: a 2D point
Expand Down Expand Up @@ -429,7 +430,7 @@ def find_line_width_boundary_points(line_widths: List[int], line_bin_size: int =
:param line_bin_size: the bin size for grouping lines to establish the line width distribution (default 50 pixels)
:type line_bin_size: int
:param min_ratio: the minimum ratio between a peak frequency and its neighbouring minimum to determine
if the minimum is a category boundary
if the minimum is a category boundary
:type min_ratio: float
:return: A list of category boundary points
:rtype: List[int]
Expand Down
2 changes: 1 addition & 1 deletion pagexml/analysis/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def get_doc_stats(pagexml_docs: Union[pdm.PageXMLTextRegion, List[pdm.PageXMLTex
:param pagexml_docs: a PageXML document object or a list of PageXML document objects
:type pagexml_docs: PageXMLTextRegion
:param line_width_boundary_points: a list of points indicating boundaries between categories of
line widths
line widths
:type line_width_boundary_points: List[int]
:param stop_words: a list of stopwords to include in number of stopwords the scan statistics
:type stop_words: List[str],
Expand Down
9 changes: 5 additions & 4 deletions pagexml/analysis/text_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,10 @@ def compute_keyness(target_counter: Counter, reference_counter: Counter,
are more common in the target counter than in the reference counter.
:param target_counter: the counter used for token frequencies of the target
corpus (possible values: 'all', 'start', 'mid' or 'end')
corpus (possible values: 'all', 'start', 'mid' or 'end')
:type target_counter: str
:param reference_counter: the counter used for token frequencies of the
reference corpus (possible values: 'all', 'start', 'mid' or 'end')
reference corpus (possible values: 'all', 'start', 'mid' or 'end')
:param vocab: an optional vocabulary for which to compute keyness values.
:type vocab: Iterable[str]
"""
Expand Down Expand Up @@ -139,7 +139,8 @@ def compute_complement_keyness(target_analyser: LineAnalyser,
:param target_analyser: the target LineAnalyser
:type target_analyser: LineAnalyser
:param target_counter: the counter used for token frequencies of the target
corpus (possible values: 'all', 'start', 'mid' or 'end')
corpus (possible values: 'all', 'start', 'mid' or 'end')
:type target_counter: str
"""
target_counter = target_analyser.freq[target_counter]
Expand Down Expand Up @@ -623,7 +624,7 @@ def determine_line_break(lbd: LineBreakDetector, curr_words: List[str],
:param prev_words: a list of words for the previous line to be merged with the current line
:type prev_words: List[str]
:return: a flag whether the previous line ends in a line break and the merged word composed of
the previous line's last word and current line's first word (or None if the words should not be merged)
the previous line's last word and current line's first word (or None if the words should not be merged)
:rtype: Union[str, None]
:param debug: print debugging information
"""
Expand Down
2 changes: 1 addition & 1 deletion pagexml/helper/pagexml_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def make_text_region_text(lines: List[pdm.PageXMLTextLine],
:param lbd: a line break detector object
:type lbd: LineBreakDetector
:return: a paragraph of text and a list of line ranges that indicates how the text of each line
corresponds to character offsets in the paragraph.
corresponds to character offsets in the paragraph.
:rtype: Tuple[str, List[Dict[str, any]]
"""
text = ''
Expand Down
4 changes: 2 additions & 2 deletions pagexml/helper/text_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def __init__(self, pagexml_files: Union[str, List[str]] = None,
:param has_header: whether the pagexml_line_files have a header line
:type has_header: bool
:param use_outer_textregions: use ID of outer text regions (when True) otherwise ID of inner
text regions
text regions
:type use_outer_textregions: bool
:param groupby: group lines by 'doc_id' or 'textregion_id'
:type groupby: str
Expand Down Expand Up @@ -148,7 +148,7 @@ def _iter_from_line_file(self):

def make_page_extractor(archive_file: str,
show_progress: bool = False) -> Generator[pdm.PageXMLScan, None, None]:
"""Convenience function to return a generator that yield a PageXMLScan object per PageXML file
"""Convenience function to return a generator that yields a PageXMLScan object per PageXML file
in a zip/tar archive file."""
for page_fileinfo, page_data in file_helper.read_page_archive_file(archive_file,
show_progress=show_progress):
Expand Down
2 changes: 1 addition & 1 deletion pagexml/transform/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def is_above_point(page_doc: pdm.PageXMLDoc, point: Tuple[int, int]):
return True


def split_horizonally(page_doc: pdm.PageXMLTextRegion, point: Tuple[int, int]):
def split_horizontally(page_doc: pdm.PageXMLTextRegion, point: Tuple[int, int]):
above = []
below = []
for tr in page_doc.text_regions:
Expand Down

0 comments on commit a8059b7

Please sign in to comment.