diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..52132bb --- /dev/null +++ b/.gitignore @@ -0,0 +1,158 @@ +# Data/plots +*.csv +*.png +*.svg +*.pdf + +# Work in progress +notebooks/ + +# Mac +.DS_Store + +# PyCharm +.idea/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# static files generated from Django application using `collectstatic` +media +static \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1a78229 --- /dev/null +++ b/LICENSE @@ -0,0 +1,18 @@ +Copyright 2020 Office for National Statistics + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..717666e --- /dev/null +++ b/README.md @@ -0,0 +1,112 @@ +# Mobility Report graph extractor + +

+ Logo +

+ +For extracting graphs from COVID-19 Community Mobility Report PDF into +comma separated value (CSV) files for each graph. + +Scripts `getgraphs.py` and `createcsvs.py` are able to extract all graphs from a Scalable Vector Graphics (SVG) document (converted as a single document from the original PDF) in one go, including those with gaps in the data. + +**Developed and tested with:** +- MacOS 10.15.4 +- PyCharm Community Edition 2018.3.2 +- Affinity Designer 1.6.5 +- Python 3 + +This code is developed at *speed* on the COVID-19 Community Mobility Report PDF documents published on Friday 3rd of April 2020. Changes may be made. + +## Installation + +We provide the python `requirements.txt` file as well as a `poetry` setup for +dependency management. + +We recommend using a virtual environment before installing dependencies. + +To install with `pip`: + +```shell +pip install -r requirements.txt +``` + +To install with `poetry` + +```shell +poetry install +``` + +## Usage + +1. Either create, or download a pre-made, SVG document file (this file is the + entire PDF document in Scalable Vector Graphics (SVG) format). + + **Create**: + + 1. Load in PDF document to Affinity Designer. + 1. Click Load all pages. + 1. `File > Export > SVG (for print)` + 1. Select Area: Whole Document. + 1. Save the SVG file to (`./svgs`). + + **Download**: + + 1. The Data Science Campus has created [SVGs for all G20 countries](https://drive.google.com/open?id=1n5XXYbJtf-wNWEkDPe7N8iNfjzJXwZ_d) (no PDFs available for China or Russia). + 1. Save the SVG file to (`./svgs`). + +2. **Run the `getgraphs.py` Python script.** + +```text +Usage: getgraphs.py [OPTIONS] INPUT_LOCATION OUTPUT_FOLDER + +Options: + -m, --multiple If the input location is a folder with multiple SVGs, + this has to be set to true. + -f, --folder TEXT If provided will overwrite the output folder name (can + not be used with the `--multiple` flag) + --help Show this message and exit. +``` + +Specify either the location of the single SVG file (e.g. `./svgs/UK`) you with to process or the folder of svg files (you will have to provide the `-m` flag in that case). You will also have to provide the output folder, e.g. `./graphs/UK`. + +3. **Run the `createcsvs.py` Python script.** + +```text +Usage: createcsvs.py [OPTIONS] INPUT_FOLDER OUTPUT_FOLDER DATES_FILE + +Options: + -p, --plots Enables creation and saving of additional PNG plots + --help Show this message and exit. +``` + +Specify the input folder where the individual SVG files are (e.g. `./graphs/UK`), +the output folder where you want the CSV files to be saved to (e.g. `./output/UK`), +and the dates lookup file (e.g. `./config/dates_lookup.csv`) - used to convert +coordinates to dates. + +If you want simple matplotlib PNG plots to save as well as CSV files, use the `-p` flag. + +## Data format + +Each CSV will be saved to (`./output/subfolder`), starting at `1.csv`. As of the **COVID-19 Community Mobility Reports** released on Friday 3rd April 2020, CSV files `1.csv` to `6.csv` relate to the country-level graphs in +the original PDF (pages one and two). Then each set of 6 CSV files (e.g., `7.csv` to `12.csv`) will relate to a regional area. + +Each set of 6 files follows the order: + +1. Retail & recreation +2. Grocery & pharmacy +3. Parks +4. Transit stations +5. Workplaces +6. Residential + +## United Kingdom Dataset + +A pre-made dataset for the United Kingdom can be found at the [Data Science Campus' Google Mobility Reports Data repository](https://github.com/datasciencecampus/google-mobility-reports-data). + + +## Contributing + +Any suggestions or issues, please use the Issues template. We welcome +collaborators. To help us with this work, fork the repository and issue a Pull +Request when you have added a feature, or fixed a bug. Thanks! diff --git a/config/.gitkeep b/config/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/config/README.md b/config/README.md new file mode 100644 index 0000000..b87827b --- /dev/null +++ b/config/README.md @@ -0,0 +1 @@ +This file changes the raw vertices (0-43) to real dates. These are related to the COVID-19 Community Mobility Reports released on Friday 3rd April 2020 and should be updated for future releases. \ No newline at end of file diff --git a/config/dates_lookup.csv b/config/dates_lookup.csv new file mode 100644 index 0000000..f4c1f96 --- /dev/null +++ b/config/dates_lookup.csv @@ -0,0 +1,44 @@ +index,date +1,2020-02-16 +2,2020-02-17 +3,2020-02-18 +4,2020-02-19 +5,2020-02-20 +6,2020-02-21 +7,2020-02-22 +8,2020-02-23 +9,2020-02-24 +10,2020-02-25 +11,2020-02-26 +12,2020-02-27 +13,2020-02-28 +14,2020-02-29 +15,2020-03-01 +16,2020-03-02 +17,2020-03-03 +18,2020-03-04 +19,2020-03-05 +20,2020-03-06 +21,2020-03-07 +22,2020-03-08 +23,2020-03-09 +24,2020-03-10 +25,2020-03-11 +26,2020-03-12 +27,2020-03-13 +28,2020-03-14 +29,2020-03-15 +30,2020-03-16 +31,2020-03-17 +32,2020-03-18 +33,2020-03-19 +34,2020-03-20 +35,2020-03-21 +36,2020-03-22 +37,2020-03-23 +38,2020-03-24 +39,2020-03-25 +40,2020-03-26 +41,2020-03-27 +42,2020-03-28 +43,2020-03-29 diff --git a/createcsvs.py b/createcsvs.py new file mode 100644 index 0000000..0b027da --- /dev/null +++ b/createcsvs.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +import os + +import click +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from svgpathtools import svg2paths + + +@click.command() +@click.argument("INPUT_FOLDER") +@click.argument("OUTPUT_FOLDER") +@click.argument("DATES_FILE") +@click.option( + "-p", + "--plots", + is_flag=True, + default=False, + help="Enables creation and saving of additional PNG plots", +) +def main(input_folder, output_folder, dates_file, plots): + """Turn SVG graphs into CSVs. + + Given an input folder of single plot SVGs convert them into CSV files. + + Args: + input_folder: Location of SVG files + output_folder: Location to store the CSVs + dates_file: Lookup from x axis steps to date + plots: Boolean flag + Set to true to create png plots from the extracted data + (used for manual inspection checks against source plots) + """ + # Get date lookup file + date_df = pd.read_csv(dates_file) + + # Set location + location = input_folder.split("/")[-1] + + print(f"Loading data from location: {location}") + + try: + os.mkdir(output_folder) + except FileExistsError: + print(f"Output Folder: {output_folder} exists, skipping creation") + + for file in os.listdir(input_folder): + + try: + + print(f"Getting paths from: {file}") + + paths, _ = svg2paths(os.path.join(input_folder, file)) + + # Gets paths from file + xlim, y_lines, trend = categorise_paths(paths) + + # Sort largest to smallest. Top line with be 0, baseline 1, bottom line 2 + y_lines.sort(reverse=True) + + trend_converted = convert_units(trend, y_lines, xlim, yspan=80, xspan=42) + + filename = ( + f"{output_folder}/{input_folder.split('/')[-1]}-{file.split('.')[0]}" + ) + + xs, ys = tuple(zip(*trend_converted)) + df = pd.DataFrame(data={"value": ys, "rel_day": xs}) + + result_df = pd.merge( + date_df, df, left_on="index", right_on="rel_day", how="left" + ) + + result_df = result_df[["value", "date"]] + result_df["origin"] = location + result_df["graph_num"] = file.split(".")[0] + + result_df.to_csv( + f"{filename}.csv", sep=",", index=False, float_format="%.3f" + ) + + if plots: + plt.plot(result_df.date, result_df.value) + plt.ylim(-80, 80) + plt.savefig(f"{filename}.png") + + plt.clf() + + except ValueError as err: + print(f"ERROR for {file}, skipping") + print(err) + + +def categorise_paths(paths): + """Categorise paths into background lines and the trend line. + + Args: + paths: Paths extracted from single plot SVG + + Returns: + (xlim, y_lines, points) + xlim: Limits of the x axis (in SVG coordinates) + y_lines: [Bottom, Middle, Top] + i.e. (-80%, baseline, +80%) (in SVG coordinates) + points: Points on the trend line (in SVG coordinates) + + Raises: + ValueError: Assuming single segment trend line, not yet handled + """ + y_lines = sorted([path.start.imag for path in paths if len(path) == 1]) + + if len(y_lines) == 5: + y_lines = [y_lines[0], y_lines[2], y_lines[-1]] + + if len(y_lines) == 3: + # Normal case + xlim = [(path.start.real, path.end.real) for path in paths if len(path) == 1][1] + + trends = [path for path in paths if len(path) > 1] + + assert len(y_lines) == 3 + assert len(trends) == 1 + + trend = trends[0] + + mid_points = [] + for end_seg, next_start_seg in zip(trend[:-1], trend[1:]): + if not np.isclose(end_seg.end, next_start_seg.start): + mid_points.append(end_seg.end) + + mid_points.append(next_start_seg.start) + + points = [trend[0].start] + mid_points + [trend[-1].end] + + return xlim, sorted(y_lines, reverse=True), points + + else: + raise ValueError("Assuming single segment trend line, not yet handled") + + +def convert_units(trend, line_y, xlim, yspan, xspan): + """witch from SVG coordinates to plot coordinates + + Args: + trend: points on the trend line + line_y: y SVG coords of (-80%, baseline, 80%) lines + xlim: Limits of the x axis (SVG coordinates) + yspan: Coordinate distance from baseline to outer y_lines + xspan: Distance in whole days from start to end of the plot + + Returns: + trend_plot_coords: List of (x, y) tuples + Points on the trend line in plot coordinates + """ + xmin, xmax = xlim + x_scale = xmax - xmin + + ymax, ymid, ymin = tuple(line_y) + y_scale = (abs(ymax - ymid) + abs(ymid - ymin)) / 2 + + trend_plot_coords = [] + for point in trend: + x = point.real + y = point.imag + + x_out = 1 + round(xspan * ((x - xmin) / x_scale)) + y_out = yspan * ((ymid - y) / y_scale) + + trend_plot_coords.append((x_out, y_out)) + return trend_plot_coords + + +if __name__ == "__main__": + main() diff --git a/getgraphs.py b/getgraphs.py new file mode 100644 index 0000000..f789469 --- /dev/null +++ b/getgraphs.py @@ -0,0 +1,128 @@ +# stdlib +import os +import logging + +# third party +import click +from svgpathtools import svg2paths2, wsvg + + +@click.command() +@click.argument("INPUT_LOCATION") +@click.argument("OUTPUT_FOLDER") +@click.option( + "-m", + "--multiple", + is_flag=True, + help="If the input location is a folder with multiple SVGs, this has to be set to true.", +) +@click.option( + "-f", + "--folder", + help="If provided will overwrite the output folder name (can not be used with the `--multiple` flag)", +) +def main(input_location, output_folder, multiple, folder): + if not multiple: + process(input_location, output_folder, folder) + else: + files = [f for f in os.listdir(input_location) if f != ".DS_Store"] + + lst = [] + for file in files: + lst.append(int(file.split(".")[0])) + lst.sort() + files = [] + for item in lst: + files.append(f"{item}.svg") + + print("Converting:", *[" - " + file for file in files], sep="\n") + for input_file in [os.path.join(input_location, file) for file in files]: + try: + process(input_file, output_folder) + except Exception as e: + print("Could not process file ", input_file, f"\nError: {e}", end="\n") + + +def process(input_file, output_folder, overwrite_name=None): + hour_lines = [] + trends = [] + line_y = [] + trend_y = [] + trend_y_end = [] + paths_new = [] + attributes_new = [] + + paths, attributes, svg_attributes = svg2paths2(input_file) + + # filter only relevant elements of the svg + for k, v in enumerate(attributes): + + path = paths[k] + if path._end is None: + continue + if v.get("style") is None: + continue + + else: + + if "stroke:#dadce0" in v.get("style").split(";"): + if "stroke-width:1.19px" in v.get("style").split(";"): + hour_lines.append(k) + paths_new.append(paths[k]) + attributes_new.append(attributes[k]) + if "stroke:#4285f4" in v.get("style").split(";"): + trends.append(k) + paths_new.append(paths[k]) + attributes_new.append(attributes[k]) + + # prep output folder + output_folder = ( + os.path.join(output_folder, overwrite_name) + if overwrite_name + else os.path.join(output_folder, input_file.split(".")[0].split("/")[-1]) + ) + os.mkdir(output_folder) + + # FIX: missing one graph + + paths_save = [] + attributes_save = [] + + count = 0 + + for k, v in enumerate(attributes_new): + count = count + 1 + if ( + count % 6 == 0 + and count != 0 + and "stroke-width:0.5" in attributes_new[k].get("style") + ): + attributes_new.append(attributes_new[len(attributes_new) - 1]) + attributes_new[k + 1 :] = attributes_new[k:-1] + paths_new.append(paths_new[len(paths_new) - 1]) + paths_new[k + 1 :] = paths_new[k:-1] + + num = 1 + count = 0 + for k, v in enumerate(attributes_new): + + paths_save.append(paths_new[k]) + attributes_save.append(attributes_new[k]) + + count = count + 1 + + if count % 6 == 0: + + wsvg( + paths_save, + filename=os.path.join(output_folder, f"{num}.svg"), + attributes=attributes_save, + ) + paths_save = [] + attributes_save = [] + count = 0 + num += 1 + + +if __name__ == "__main__": + main() diff --git a/graphs/.gitkeep b/graphs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/meta/logo.png b/meta/logo.png new file mode 100644 index 0000000..a589f8d Binary files /dev/null and b/meta/logo.png differ diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/pdfs/.gitkeep b/pdfs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..50fa922 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,233 @@ +[[package]] +category = "main" +description = "Composable command line interface toolkit" +name = "click" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +version = "7.1.1" + +[[package]] +category = "main" +description = "Composable style cycles" +name = "cycler" +optional = false +python-versions = "*" +version = "0.10.0" + +[package.dependencies] +six = "*" + +[[package]] +category = "main" +description = "A fast implementation of the Cassowary constraint solver" +name = "kiwisolver" +optional = false +python-versions = ">=3.6" +version = "1.2.0" + +[[package]] +category = "main" +description = "Python plotting package" +name = "matplotlib" +optional = false +python-versions = ">=3.6" +version = "3.1.3" + +[package.dependencies] +cycler = ">=0.10" +kiwisolver = ">=1.0.1" +numpy = ">=1.11" +pyparsing = ">=2.0.1,<2.0.4 || >2.0.4,<2.1.2 || >2.1.2,<2.1.6 || >2.1.6" +python-dateutil = ">=2.1" + +[[package]] +category = "main" +description = "NumPy is the fundamental package for array computing with Python." +name = "numpy" +optional = false +python-versions = ">=3.5" +version = "1.18.1" + +[[package]] +category = "main" +description = "Powerful data structures for data analysis, time series, and statistics" +name = "pandas" +optional = false +python-versions = ">=3.6.1" +version = "1.0.3" + +[package.dependencies] +numpy = ">=1.13.3" +python-dateutil = ">=2.6.1" +pytz = ">=2017.2" + +[package.extras] +test = ["pytest (>=4.0.2)", "pytest-xdist", "hypothesis (>=3.58)"] + +[[package]] +category = "main" +description = "Python parsing module" +name = "pyparsing" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +version = "2.4.6" + +[[package]] +category = "main" +description = "Extensions to the standard Python datetime module" +name = "python-dateutil" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +version = "2.8.1" + +[package.dependencies] +six = ">=1.5" + +[[package]] +category = "main" +description = "World timezone definitions, modern and historical" +name = "pytz" +optional = false +python-versions = "*" +version = "2019.3" + +[[package]] +category = "main" +description = "Python 2 and 3 compatibility utilities" +name = "six" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +version = "1.14.0" + +[[package]] +category = "main" +description = "A collection of tools for manipulating and analyzing SVG Path objects and Bezier curves." +name = "svgpathtools" +optional = false +python-versions = "*" +version = "1.3.3" + +[package.dependencies] +numpy = "*" +svgwrite = "*" + +[[package]] +category = "main" +description = "A Python library to create SVG drawings." +name = "svgwrite" +optional = false +python-versions = ">=3.6" +version = "1.4" + +[metadata] +content-hash = "73e7c4cebea093b703e2b0e41a39be25d5ded6cf6df7714c492a2a7342893800" +python-versions = "^3.8" + +[metadata.files] +click = [ + {file = "click-7.1.1-py2.py3-none-any.whl", hash = "sha256:e345d143d80bf5ee7534056164e5e112ea5e22716bbb1ce727941f4c8b471b9a"}, + {file = "click-7.1.1.tar.gz", hash = "sha256:8a18b4ea89d8820c5d0c7da8a64b2c324b4dabb695804dbfea19b9be9d88c0cc"}, +] +cycler = [ + {file = "cycler-0.10.0-py2.py3-none-any.whl", hash = "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d"}, + {file = "cycler-0.10.0.tar.gz", hash = "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"}, +] +kiwisolver = [ + {file = "kiwisolver-1.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74"}, + {file = "kiwisolver-1.2.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:efcf3397ae1e3c3a4a0a0636542bcad5adad3b1dd3e8e629d0b6e201347176c8"}, + {file = "kiwisolver-1.2.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fccefc0d36a38c57b7bd233a9b485e2f1eb71903ca7ad7adacad6c28a56d62d2"}, + {file = "kiwisolver-1.2.0-cp36-none-win32.whl", hash = "sha256:60a78858580761fe611d22127868f3dc9f98871e6fdf0a15cc4203ed9ba6179b"}, + {file = "kiwisolver-1.2.0-cp36-none-win_amd64.whl", hash = "sha256:556da0a5f60f6486ec4969abbc1dd83cf9b5c2deadc8288508e55c0f5f87d29c"}, + {file = "kiwisolver-1.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7cc095a4661bdd8a5742aaf7c10ea9fac142d76ff1770a0f84394038126d8fc7"}, + {file = "kiwisolver-1.2.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c955791d80e464da3b471ab41eb65cf5a40c15ce9b001fdc5bbc241170de58ec"}, + {file = "kiwisolver-1.2.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:603162139684ee56bcd57acc74035fceed7dd8d732f38c0959c8bd157f913fec"}, + {file = "kiwisolver-1.2.0-cp37-none-win32.whl", hash = "sha256:03662cbd3e6729f341a97dd2690b271e51a67a68322affab12a5b011344b973c"}, + {file = "kiwisolver-1.2.0-cp37-none-win_amd64.whl", hash = "sha256:4eadb361baf3069f278b055e3bb53fa189cea2fd02cb2c353b7a99ebb4477ef1"}, + {file = "kiwisolver-1.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c31bc3c8e903d60a1ea31a754c72559398d91b5929fcb329b1c3a3d3f6e72113"}, + {file = "kiwisolver-1.2.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:d52b989dc23cdaa92582ceb4af8d5bcc94d74b2c3e64cd6785558ec6a879793e"}, + {file = "kiwisolver-1.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e586b28354d7b6584d8973656a7954b1c69c93f708c0c07b77884f91640b7657"}, + {file = "kiwisolver-1.2.0-cp38-none-win32.whl", hash = "sha256:d069ef4b20b1e6b19f790d00097a5d5d2c50871b66d10075dab78938dc2ee2cf"}, + {file = "kiwisolver-1.2.0-cp38-none-win_amd64.whl", hash = "sha256:18d749f3e56c0480dccd1714230da0f328e6e4accf188dd4e6884bdd06bf02dd"}, + {file = "kiwisolver-1.2.0.tar.gz", hash = "sha256:247800260cd38160c362d211dcaf4ed0f7816afb5efe56544748b21d6ad6d17f"}, +] +matplotlib = [ + {file = "matplotlib-3.1.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6a0031774c6c68298183438edf2e738856d63a4c4797876fa81d0ee337f5361c"}, + {file = "matplotlib-3.1.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b4c0010eff09ab65c77ad1a0eec6c7cccb9f6838c3c77dc5b4002fe0cf2912fd"}, + {file = "matplotlib-3.1.3-cp36-cp36m-win32.whl", hash = "sha256:78d0772412c0653aa3e860c52ff08d1f5ba64334e2b86b09dc2d502657d8ca73"}, + {file = "matplotlib-3.1.3-cp36-cp36m-win_amd64.whl", hash = "sha256:97f04d29a358826f205320fbc88d46ce5c5ff6fb54ae050042ff396beda52ca4"}, + {file = "matplotlib-3.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4164265ca573481ce61c83322e6b33628203afeabeb3e22c50376f5d3ee0f9be"}, + {file = "matplotlib-3.1.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b5ace0531255932ad19fe64c116ada2713f7b38381db8f68df0fa694409e67d1"}, + {file = "matplotlib-3.1.3-cp37-cp37m-win32.whl", hash = "sha256:c7bb7ed3e011324b56462391ec3f4bbb7c8c6af5892ebfb45d312b15b4cdfc8d"}, + {file = "matplotlib-3.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:f0023322c99328c40ce22678ab0ab5adfc27e338419966539398239996f63e8d"}, + {file = "matplotlib-3.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:db8bbba9284845034a2f0e1add91dc5e89db8c996359bdcf677a8d6f88875cf1"}, + {file = "matplotlib-3.1.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:635ded7834f43c8d999076236f7e90074d77f7b8345e5e82cd95af053cc29df1"}, + {file = "matplotlib-3.1.3-cp38-cp38-win32.whl", hash = "sha256:8efff896c49676700dc6adace6137a854ff64a4d44ca057ff726960ffdaa47bf"}, + {file = "matplotlib-3.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:470eed601ff5132364e0121a20d7c3d43fab969c8c333422c1b6b72fde2ed3c1"}, + {file = "matplotlib-3.1.3-pp373-pypy36_pp73-win32.whl", hash = "sha256:23b71560c721109954c0215ffc81f4c80ce8528749d534a01a61e8ab737c5bce"}, + {file = "matplotlib-3.1.3.tar.gz", hash = "sha256:db3121f12fb9b99f105d1413aebaeb3d943f269f3d262b45586d12765866f0c6"}, +] +numpy = [ + {file = "numpy-1.18.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:20b26aaa5b3da029942cdcce719b363dbe58696ad182aff0e5dcb1687ec946dc"}, + {file = "numpy-1.18.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:70a840a26f4e61defa7bdf811d7498a284ced303dfbc35acb7be12a39b2aa121"}, + {file = "numpy-1.18.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:17aa7a81fe7599a10f2b7d95856dc5cf84a4eefa45bc96123cbbc3ebc568994e"}, + {file = "numpy-1.18.1-cp35-cp35m-win32.whl", hash = "sha256:f3d0a94ad151870978fb93538e95411c83899c9dc63e6fb65542f769568ecfa5"}, + {file = "numpy-1.18.1-cp35-cp35m-win_amd64.whl", hash = "sha256:1786a08236f2c92ae0e70423c45e1e62788ed33028f94ca99c4df03f5be6b3c6"}, + {file = "numpy-1.18.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ae0975f42ab1f28364dcda3dde3cf6c1ddab3e1d4b2909da0cb0191fa9ca0480"}, + {file = "numpy-1.18.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:cf7eb6b1025d3e169989416b1adcd676624c2dbed9e3bcb7137f51bfc8cc2572"}, + {file = "numpy-1.18.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b765ed3930b92812aa698a455847141869ef755a87e099fddd4ccf9d81fffb57"}, + {file = "numpy-1.18.1-cp36-cp36m-win32.whl", hash = "sha256:2d75908ab3ced4223ccba595b48e538afa5ecc37405923d1fea6906d7c3a50bc"}, + {file = "numpy-1.18.1-cp36-cp36m-win_amd64.whl", hash = "sha256:9acdf933c1fd263c513a2df3dceecea6f3ff4419d80bf238510976bf9bcb26cd"}, + {file = "numpy-1.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:56bc8ded6fcd9adea90f65377438f9fea8c05fcf7c5ba766bef258d0da1554aa"}, + {file = "numpy-1.18.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:e422c3152921cece8b6a2fb6b0b4d73b6579bd20ae075e7d15143e711f3ca2ca"}, + {file = "numpy-1.18.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b3af02ecc999c8003e538e60c89a2b37646b39b688d4e44d7373e11c2debabec"}, + {file = "numpy-1.18.1-cp37-cp37m-win32.whl", hash = "sha256:d92350c22b150c1cae7ebb0ee8b5670cc84848f6359cf6b5d8f86617098a9b73"}, + {file = "numpy-1.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:77c3bfe65d8560487052ad55c6998a04b654c2fbc36d546aef2b2e511e760971"}, + {file = "numpy-1.18.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c98c5ffd7d41611407a1103ae11c8b634ad6a43606eca3e2a5a269e5d6e8eb07"}, + {file = "numpy-1.18.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9537eecf179f566fd1c160a2e912ca0b8e02d773af0a7a1120ad4f7507cd0d26"}, + {file = "numpy-1.18.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e840f552a509e3380b0f0ec977e8124d0dc34dc0e68289ca28f4d7c1d0d79474"}, + {file = "numpy-1.18.1-cp38-cp38-win32.whl", hash = "sha256:590355aeade1a2eaba17617c19edccb7db8d78760175256e3cf94590a1a964f3"}, + {file = "numpy-1.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:39d2c685af15d3ce682c99ce5925cc66efc824652e10990d2462dfe9b8918c6a"}, + {file = "numpy-1.18.1.zip", hash = "sha256:b6ff59cee96b454516e47e7721098e6ceebef435e3e21ac2d6c3b8b02628eb77"}, +] +pandas = [ + {file = "pandas-1.0.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d234bcf669e8b4d6cbcd99e3ce7a8918414520aeb113e2a81aeb02d0a533d7f7"}, + {file = "pandas-1.0.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:ca84a44cf727f211752e91eab2d1c6c1ab0f0540d5636a8382a3af428542826e"}, + {file = "pandas-1.0.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1fa4bae1a6784aa550a1c9e168422798104a85bf9c77a1063ea77ee6f8452e3a"}, + {file = "pandas-1.0.3-cp36-cp36m-win32.whl", hash = "sha256:863c3e4b7ae550749a0bb77fa22e601a36df9d2905afef34a6965bed092ba9e5"}, + {file = "pandas-1.0.3-cp36-cp36m-win_amd64.whl", hash = "sha256:a210c91a02ec5ff05617a298ad6f137b9f6f5771bf31f2d6b6367d7f71486639"}, + {file = "pandas-1.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11c7cb654cd3a0e9c54d81761b5920cdc86b373510d829461d8f2ed6d5905266"}, + {file = "pandas-1.0.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6597df07ea361231e60c00692d8a8099b519ed741c04e65821e632bc9ccb924c"}, + {file = "pandas-1.0.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:743bba36e99d4440403beb45a6f4f3a667c090c00394c176092b0b910666189b"}, + {file = "pandas-1.0.3-cp37-cp37m-win32.whl", hash = "sha256:07c1b58936b80eafdfe694ce964ac21567b80a48d972879a359b3ebb2ea76835"}, + {file = "pandas-1.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:12f492dd840e9db1688126216706aa2d1fcd3f4df68a195f9479272d50054645"}, + {file = "pandas-1.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0ebe327fb088df4d06145227a4aa0998e4f80a9e6aed4b61c1f303bdfdf7c722"}, + {file = "pandas-1.0.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:858a0d890d957ae62338624e4aeaf1de436dba2c2c0772570a686eaca8b4fc85"}, + {file = "pandas-1.0.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:387dc7b3c0424327fe3218f81e05fc27832772a5dffbed385013161be58df90b"}, + {file = "pandas-1.0.3-cp38-cp38-win32.whl", hash = "sha256:167a1315367cea6ec6a5e11e791d9604f8e03f95b57ad227409de35cf850c9c5"}, + {file = "pandas-1.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:1a7c56f1df8d5ad8571fa251b864231f26b47b59cbe41aa5c0983d17dbb7a8e4"}, + {file = "pandas-1.0.3.tar.gz", hash = "sha256:32f42e322fb903d0e189a4c10b75ba70d90958cc4f66a1781ed027f1a1d14586"}, +] +pyparsing = [ + {file = "pyparsing-2.4.6-py2.py3-none-any.whl", hash = "sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec"}, + {file = "pyparsing-2.4.6.tar.gz", hash = "sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f"}, +] +python-dateutil = [ + {file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"}, + {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"}, +] +pytz = [ + {file = "pytz-2019.3-py2.py3-none-any.whl", hash = "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d"}, + {file = "pytz-2019.3.tar.gz", hash = "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"}, +] +six = [ + {file = "six-1.14.0-py2.py3-none-any.whl", hash = "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"}, + {file = "six-1.14.0.tar.gz", hash = "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a"}, +] +svgpathtools = [ + {file = "svgpathtools-1.3.3-py2.py3-none-any.whl", hash = "sha256:7f7bdafe2c03b312178460104705e1d554d8cf36c898bec41bdce9fed3504746"}, + {file = "svgpathtools-1.3.3.tar.gz", hash = "sha256:e4b3784ae41b725fbce6a33a8981210967b16d0b557cb5d98c0ed0c81f0f89b9"}, +] +svgwrite = [ + {file = "svgwrite-1.4-py3-none-any.whl", hash = "sha256:fa842fb3129a9399d19b5e9602a022fcc7f2f3f24713550e765c488ffafd743d"}, + {file = "svgwrite-1.4.zip", hash = "sha256:b38ac03b67f81c728d81a33e4711aaf3ab136a57156d721bb17f88525d9909bb"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..f25baac --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,19 @@ +[tool.poetry] +name = "svg-graph-extractor" +version = "0.1.0" +description = "" +authors = ["Finnkauski "] + +[tool.poetry.dependencies] +python = "^3.8" +click = "7.1.1" +matplotlib = "3.1.3" +numpy = "1.18.1" +pandas = "1.0.3" +svgpathtools = "1.3.3" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry>=0.12"] +build-backend = "poetry.masonry.api" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f2f5ad0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +click==7.1.1 +matplotlib==3.1.3 +numpy==1.18.1 +pandas==1.0.3 +svgpathtools==1.3.3 \ No newline at end of file diff --git a/svgs/.gitkeep b/svgs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/resources/gaps.svg b/tests/resources/gaps.svg new file mode 100644 index 0000000..4d38166 --- /dev/null +++ b/tests/resources/gaps.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/tests/resources/nogaps.svg b/tests/resources/nogaps.svg new file mode 100644 index 0000000..78606d8 --- /dev/null +++ b/tests/resources/nogaps.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/tests/test_extract.py b/tests/test_extract.py new file mode 100644 index 0000000..e00b002 --- /dev/null +++ b/tests/test_extract.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +from svgpathtools import svg2paths +import createcsvs + + +def test_categorise_paths(): + # Given + filepath = "resources/nogaps.svg" + + # When + paths, _ = svg2paths(filepath) + xlim, lines, trend = createcsvs.categorise_paths(paths) + + # Then + assert len(lines) == 3 + + +def test_convert_units(): + # Given + filepath = "resources/nogaps.svg" + + # When + paths, _ = svg2paths(filepath) + xlim, lines, trend = createcsvs.categorise_paths(paths) + trend_converted = createcsvs.convert_units(trend, lines, xlim, yspan=80, xspan=42) + + # Then + assert len(trend_converted) == 43 + + +def test_categorise_paths_w_gaps(): + # Given + filepath = "resources/gaps.svg" + + # When + paths, _ = svg2paths(filepath) + xlim, lines, trend = createcsvs.categorise_paths(paths) + + # Then + assert len(lines) == 3 + + +def test_convert_units_w_gaps(): + # Given + filepath = "resources/gaps.svg" + + # When + paths, _ = svg2paths(filepath) + xlim, lines, trend = createcsvs.categorise_paths(paths) + trend_converted = createcsvs.convert_units(trend, lines, xlim, yspan=80, xspan=42) + + # Then + assert len(trend_converted) == 27