Skip to content

Commit

Permalink
organize imports
Browse files Browse the repository at this point in the history
  • Loading branch information
brambg committed Jul 6, 2022
1 parent 7b8549d commit 1181cfc
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 21 deletions.
2 changes: 1 addition & 1 deletion pagexml/analysis/layout_stats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, Generator, List, Tuple, Union
from collections import Counter
from collections import defaultdict
from typing import Dict, Generator, List, Tuple, Union

import numpy as np

Expand Down
4 changes: 2 additions & 2 deletions pagexml/analysis/text_stats.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List, Set, Tuple, Union
import re
from collections import Counter
from collections import defaultdict
import re
from typing import List, Set, Tuple, Union

import pagexml.helper.text_helper as text_helper

Expand Down
10 changes: 5 additions & 5 deletions pagexml/helper/pagexml_helper.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from typing import Dict, Generator, List, Tuple, Union
from collections import Counter
import copy
import re
import gzip
import re
import string
from collections import Counter
from typing import Dict, Generator, List, Tuple, Union

import numpy as np

import pagexml.model.physical_document_model as pdm
import pagexml.analysis.layout_stats as summarise
import pagexml.analysis.text_stats as text_stats
import pagexml.helper.text_helper as text_helper
import pagexml.model.physical_document_model as pdm


def sort_regions_in_reading_order(doc: pdm.PageXMLDoc) -> List[pdm.PageXMLTextRegion]:
Expand Down Expand Up @@ -319,7 +319,7 @@ def read_line_format_file(line_format_files: Union[str, List[str]]) -> Generator
def make_line_text(line: pdm.PageXMLTextLine, do_merge: bool,
end_word: str, merge_word: str, line_break_char: str = '-') -> str:
line_text = line.text
if len(line_text) >= 2 and line_text.endswith(line_break_char*2):
if len(line_text) >= 2 and line_text.endswith(line_break_char * 2):
# remove the redundant line break char
line_text = line_text[:-1]
if do_merge:
Expand Down
14 changes: 7 additions & 7 deletions pagexml/helper/text_helper.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Dict, List, Tuple, Union, Generator, Iterable
import re
import gzip
import math
import re
from collections import Counter, defaultdict
from itertools import combinations
from typing import Dict, List, Tuple, Union, Generator, Iterable

import pagexml.parser as parser
import pagexml.model.physical_document_model as pdm
import pagexml.parser as parser


class SkipGram:
Expand Down Expand Up @@ -42,10 +42,10 @@ def text2skipgrams(text: str, ngram_size: int = 2, skip_size: int = 2) -> Genera
:rtype: Generator[tuple]"""
if ngram_size <= 0 or skip_size < 0:
raise ValueError('ngram_size must be a positive integer, skip_size must be a positive integer or zero')
indexes = [i for i in range(0, ngram_size+skip_size)]
skipgram_combinations = [combination for combination in combinations(indexes[1:], ngram_size-1)]
for offset in range(0, len(text)-1):
window = text[offset:offset+ngram_size+skip_size]
indexes = [i for i in range(0, ngram_size + skip_size)]
skipgram_combinations = [combination for combination in combinations(indexes[1:], ngram_size - 1)]
for offset in range(0, len(text) - 1):
window = text[offset:offset + ngram_size + skip_size]
for skipgram, skipgram_length in insert_skips(window, skipgram_combinations):
yield SkipGram(skipgram, offset, skipgram_length)

Expand Down
3 changes: 2 additions & 1 deletion pagexml/model/physical_document_model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations
from typing import Dict, List, Set, Tuple, Union

from collections import defaultdict
from typing import Dict, List, Set, Tuple, Union

import numpy as np
from scipy.spatial import ConvexHull
Expand Down
4 changes: 2 additions & 2 deletions pagexml/parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Generator, List, Dict, Union
from datetime import datetime
import glob
import re
from datetime import datetime
from typing import Generator, List, Dict, Union

import xmltodict
from dateutil.parser import parse as date_parse
Expand Down
6 changes: 3 additions & 3 deletions pagexml/plotting/plot_dist.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Dict
from collections import Counter
from typing import Dict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style="darkgrid")
Expand All @@ -26,5 +26,5 @@ def plot_dist_stats(stats: Dict[str, Dict[str, Counter]]) -> None:
df = pd.DataFrame(data={field: points})
x, y = zip(*sorted(stats[doc_type][field].items()))
sns.histplot(df, ax=axes[fi], x=field, binwidth=binwidth)
#sns.kdeplot(ax=axes[fi], x=x, y=y, cut=0)
# sns.kdeplot(ax=axes[fi], x=x, y=y, cut=0)
axes[fi].set_title(field)

0 comments on commit 1181cfc

Please sign in to comment.