diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..52132bb
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,158 @@
+# Data/plots
+*.csv
+*.png
+*.svg
+*.pdf
+
+# Work in progress
+notebooks/
+
+# Mac
+.DS_Store
+
+# PyCharm
+.idea/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# static files generated from Django application using `collectstatic`
+media
+static
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1a78229
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,18 @@
+Copyright 2020 Office for National Statistics
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..717666e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,112 @@
+# Mobility Report graph extractor
+
+
+
+
+
+For extracting graphs from COVID-19 Community Mobility Report PDF into
+comma separated value (CSV) files for each graph.
+
+Scripts `getgraphs.py` and `createcsvs.py` are able to extract all graphs from a Scalable Vector Graphics (SVG) document (converted as a single document from the original PDF) in one go, including those with gaps in the data.
+
+**Developed and tested with:**
+- MacOS 10.15.4
+- PyCharm Community Edition 2018.3.2
+- Affinity Designer 1.6.5
+- Python 3
+
+This code is developed at *speed* on the COVID-19 Community Mobility Report PDF documents published on Friday 3rd of April 2020. Changes may be made.
+
+## Installation
+
+We provide the python `requirements.txt` file as well as a `poetry` setup for
+dependency management.
+
+We recommend using a virtual environment before installing dependencies.
+
+To install with `pip`:
+
+```shell
+pip install -r requirements.txt
+```
+
+To install with `poetry`
+
+```shell
+poetry install
+```
+
+## Usage
+
+1. Either create, or download a pre-made, SVG document file (this file is the
+ entire PDF document in Scalable Vector Graphics (SVG) format).
+
+ **Create**:
+
+ 1. Load in PDF document to Affinity Designer.
+ 1. Click Load all pages.
+ 1. `File > Export > SVG (for print)`
+ 1. Select Area: Whole Document.
+ 1. Save the SVG file to (`./svgs`).
+
+ **Download**:
+
+ 1. The Data Science Campus has created [SVGs for all G20 countries](https://drive.google.com/open?id=1n5XXYbJtf-wNWEkDPe7N8iNfjzJXwZ_d) (no PDFs available for China or Russia).
+ 1. Save the SVG file to (`./svgs`).
+
+2. **Run the `getgraphs.py` Python script.**
+
+```text
+Usage: getgraphs.py [OPTIONS] INPUT_LOCATION OUTPUT_FOLDER
+
+Options:
+ -m, --multiple If the input location is a folder with multiple SVGs,
+ this has to be set to true.
+ -f, --folder TEXT If provided will overwrite the output folder name (can
+ not be used with the `--multiple` flag)
+ --help Show this message and exit.
+```
+
+Specify either the location of the single SVG file (e.g. `./svgs/UK`) you with to process or the folder of svg files (you will have to provide the `-m` flag in that case). You will also have to provide the output folder, e.g. `./graphs/UK`.
+
+3. **Run the `createcsvs.py` Python script.**
+
+```text
+Usage: createcsvs.py [OPTIONS] INPUT_FOLDER OUTPUT_FOLDER DATES_FILE
+
+Options:
+ -p, --plots Enables creation and saving of additional PNG plots
+ --help Show this message and exit.
+```
+
+Specify the input folder where the individual SVG files are (e.g. `./graphs/UK`),
+the output folder where you want the CSV files to be saved to (e.g. `./output/UK`),
+and the dates lookup file (e.g. `./config/dates_lookup.csv`) - used to convert
+coordinates to dates.
+
+If you want simple matplotlib PNG plots to save as well as CSV files, use the `-p` flag.
+
+## Data format
+
+Each CSV will be saved to (`./output/subfolder`), starting at `1.csv`. As of the **COVID-19 Community Mobility Reports** released on Friday 3rd April 2020, CSV files `1.csv` to `6.csv` relate to the country-level graphs in
+the original PDF (pages one and two). Then each set of 6 CSV files (e.g., `7.csv` to `12.csv`) will relate to a regional area.
+
+Each set of 6 files follows the order:
+
+1. Retail & recreation
+2. Grocery & pharmacy
+3. Parks
+4. Transit stations
+5. Workplaces
+6. Residential
+
+## United Kingdom Dataset
+
+A pre-made dataset for the United Kingdom can be found at the [Data Science Campus' Google Mobility Reports Data repository](https://github.com/datasciencecampus/google-mobility-reports-data).
+
+
+## Contributing
+
+Any suggestions or issues, please use the Issues template. We welcome
+collaborators. To help us with this work, fork the repository and issue a Pull
+Request when you have added a feature, or fixed a bug. Thanks!
diff --git a/config/.gitkeep b/config/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/config/README.md b/config/README.md
new file mode 100644
index 0000000..b87827b
--- /dev/null
+++ b/config/README.md
@@ -0,0 +1 @@
+This file changes the raw vertices (0-43) to real dates. These are related to the COVID-19 Community Mobility Reports released on Friday 3rd April 2020 and should be updated for future releases.
\ No newline at end of file
diff --git a/config/dates_lookup.csv b/config/dates_lookup.csv
new file mode 100644
index 0000000..f4c1f96
--- /dev/null
+++ b/config/dates_lookup.csv
@@ -0,0 +1,44 @@
+index,date
+1,2020-02-16
+2,2020-02-17
+3,2020-02-18
+4,2020-02-19
+5,2020-02-20
+6,2020-02-21
+7,2020-02-22
+8,2020-02-23
+9,2020-02-24
+10,2020-02-25
+11,2020-02-26
+12,2020-02-27
+13,2020-02-28
+14,2020-02-29
+15,2020-03-01
+16,2020-03-02
+17,2020-03-03
+18,2020-03-04
+19,2020-03-05
+20,2020-03-06
+21,2020-03-07
+22,2020-03-08
+23,2020-03-09
+24,2020-03-10
+25,2020-03-11
+26,2020-03-12
+27,2020-03-13
+28,2020-03-14
+29,2020-03-15
+30,2020-03-16
+31,2020-03-17
+32,2020-03-18
+33,2020-03-19
+34,2020-03-20
+35,2020-03-21
+36,2020-03-22
+37,2020-03-23
+38,2020-03-24
+39,2020-03-25
+40,2020-03-26
+41,2020-03-27
+42,2020-03-28
+43,2020-03-29
diff --git a/createcsvs.py b/createcsvs.py
new file mode 100644
index 0000000..0b027da
--- /dev/null
+++ b/createcsvs.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+import os
+
+import click
+import numpy as np
+import pandas as pd
+from matplotlib import pyplot as plt
+from svgpathtools import svg2paths
+
+
+@click.command()
+@click.argument("INPUT_FOLDER")
+@click.argument("OUTPUT_FOLDER")
+@click.argument("DATES_FILE")
+@click.option(
+ "-p",
+ "--plots",
+ is_flag=True,
+ default=False,
+ help="Enables creation and saving of additional PNG plots",
+)
+def main(input_folder, output_folder, dates_file, plots):
+ """Turn SVG graphs into CSVs.
+
+ Given an input folder of single plot SVGs convert them into CSV files.
+
+ Args:
+ input_folder: Location of SVG files
+ output_folder: Location to store the CSVs
+ dates_file: Lookup from x axis steps to date
+ plots: Boolean flag
+ Set to true to create png plots from the extracted data
+ (used for manual inspection checks against source plots)
+ """
+ # Get date lookup file
+ date_df = pd.read_csv(dates_file)
+
+ # Set location
+ location = input_folder.split("/")[-1]
+
+ print(f"Loading data from location: {location}")
+
+ try:
+ os.mkdir(output_folder)
+ except FileExistsError:
+ print(f"Output Folder: {output_folder} exists, skipping creation")
+
+ for file in os.listdir(input_folder):
+
+ try:
+
+ print(f"Getting paths from: {file}")
+
+ paths, _ = svg2paths(os.path.join(input_folder, file))
+
+ # Gets paths from file
+ xlim, y_lines, trend = categorise_paths(paths)
+
+ # Sort largest to smallest. Top line with be 0, baseline 1, bottom line 2
+ y_lines.sort(reverse=True)
+
+ trend_converted = convert_units(trend, y_lines, xlim, yspan=80, xspan=42)
+
+ filename = (
+ f"{output_folder}/{input_folder.split('/')[-1]}-{file.split('.')[0]}"
+ )
+
+ xs, ys = tuple(zip(*trend_converted))
+ df = pd.DataFrame(data={"value": ys, "rel_day": xs})
+
+ result_df = pd.merge(
+ date_df, df, left_on="index", right_on="rel_day", how="left"
+ )
+
+ result_df = result_df[["value", "date"]]
+ result_df["origin"] = location
+ result_df["graph_num"] = file.split(".")[0]
+
+ result_df.to_csv(
+ f"{filename}.csv", sep=",", index=False, float_format="%.3f"
+ )
+
+ if plots:
+ plt.plot(result_df.date, result_df.value)
+ plt.ylim(-80, 80)
+ plt.savefig(f"{filename}.png")
+
+ plt.clf()
+
+ except ValueError as err:
+ print(f"ERROR for {file}, skipping")
+ print(err)
+
+
+def categorise_paths(paths):
+ """Categorise paths into background lines and the trend line.
+
+ Args:
+ paths: Paths extracted from single plot SVG
+
+ Returns:
+ (xlim, y_lines, points)
+ xlim: Limits of the x axis (in SVG coordinates)
+ y_lines: [Bottom, Middle, Top]
+ i.e. (-80%, baseline, +80%) (in SVG coordinates)
+ points: Points on the trend line (in SVG coordinates)
+
+ Raises:
+ ValueError: Assuming single segment trend line, not yet handled
+ """
+ y_lines = sorted([path.start.imag for path in paths if len(path) == 1])
+
+ if len(y_lines) == 5:
+ y_lines = [y_lines[0], y_lines[2], y_lines[-1]]
+
+ if len(y_lines) == 3:
+ # Normal case
+ xlim = [(path.start.real, path.end.real) for path in paths if len(path) == 1][1]
+
+ trends = [path for path in paths if len(path) > 1]
+
+ assert len(y_lines) == 3
+ assert len(trends) == 1
+
+ trend = trends[0]
+
+ mid_points = []
+ for end_seg, next_start_seg in zip(trend[:-1], trend[1:]):
+ if not np.isclose(end_seg.end, next_start_seg.start):
+ mid_points.append(end_seg.end)
+
+ mid_points.append(next_start_seg.start)
+
+ points = [trend[0].start] + mid_points + [trend[-1].end]
+
+ return xlim, sorted(y_lines, reverse=True), points
+
+ else:
+ raise ValueError("Assuming single segment trend line, not yet handled")
+
+
+def convert_units(trend, line_y, xlim, yspan, xspan):
+ """witch from SVG coordinates to plot coordinates
+
+ Args:
+ trend: points on the trend line
+ line_y: y SVG coords of (-80%, baseline, 80%) lines
+ xlim: Limits of the x axis (SVG coordinates)
+ yspan: Coordinate distance from baseline to outer y_lines
+ xspan: Distance in whole days from start to end of the plot
+
+ Returns:
+ trend_plot_coords: List of (x, y) tuples
+ Points on the trend line in plot coordinates
+ """
+ xmin, xmax = xlim
+ x_scale = xmax - xmin
+
+ ymax, ymid, ymin = tuple(line_y)
+ y_scale = (abs(ymax - ymid) + abs(ymid - ymin)) / 2
+
+ trend_plot_coords = []
+ for point in trend:
+ x = point.real
+ y = point.imag
+
+ x_out = 1 + round(xspan * ((x - xmin) / x_scale))
+ y_out = yspan * ((ymid - y) / y_scale)
+
+ trend_plot_coords.append((x_out, y_out))
+ return trend_plot_coords
+
+
+if __name__ == "__main__":
+ main()
diff --git a/getgraphs.py b/getgraphs.py
new file mode 100644
index 0000000..f789469
--- /dev/null
+++ b/getgraphs.py
@@ -0,0 +1,128 @@
+# stdlib
+import os
+import logging
+
+# third party
+import click
+from svgpathtools import svg2paths2, wsvg
+
+
+@click.command()
+@click.argument("INPUT_LOCATION")
+@click.argument("OUTPUT_FOLDER")
+@click.option(
+ "-m",
+ "--multiple",
+ is_flag=True,
+ help="If the input location is a folder with multiple SVGs, this has to be set to true.",
+)
+@click.option(
+ "-f",
+ "--folder",
+ help="If provided will overwrite the output folder name (can not be used with the `--multiple` flag)",
+)
+def main(input_location, output_folder, multiple, folder):
+ if not multiple:
+ process(input_location, output_folder, folder)
+ else:
+ files = [f for f in os.listdir(input_location) if f != ".DS_Store"]
+
+ lst = []
+ for file in files:
+ lst.append(int(file.split(".")[0]))
+ lst.sort()
+ files = []
+ for item in lst:
+ files.append(f"{item}.svg")
+
+ print("Converting:", *[" - " + file for file in files], sep="\n")
+ for input_file in [os.path.join(input_location, file) for file in files]:
+ try:
+ process(input_file, output_folder)
+ except Exception as e:
+ print("Could not process file ", input_file, f"\nError: {e}", end="\n")
+
+
+def process(input_file, output_folder, overwrite_name=None):
+ hour_lines = []
+ trends = []
+ line_y = []
+ trend_y = []
+ trend_y_end = []
+ paths_new = []
+ attributes_new = []
+
+ paths, attributes, svg_attributes = svg2paths2(input_file)
+
+ # filter only relevant elements of the svg
+ for k, v in enumerate(attributes):
+
+ path = paths[k]
+ if path._end is None:
+ continue
+ if v.get("style") is None:
+ continue
+
+ else:
+
+ if "stroke:#dadce0" in v.get("style").split(";"):
+ if "stroke-width:1.19px" in v.get("style").split(";"):
+ hour_lines.append(k)
+ paths_new.append(paths[k])
+ attributes_new.append(attributes[k])
+ if "stroke:#4285f4" in v.get("style").split(";"):
+ trends.append(k)
+ paths_new.append(paths[k])
+ attributes_new.append(attributes[k])
+
+ # prep output folder
+ output_folder = (
+ os.path.join(output_folder, overwrite_name)
+ if overwrite_name
+ else os.path.join(output_folder, input_file.split(".")[0].split("/")[-1])
+ )
+ os.mkdir(output_folder)
+
+ # FIX: missing one graph
+
+ paths_save = []
+ attributes_save = []
+
+ count = 0
+
+ for k, v in enumerate(attributes_new):
+ count = count + 1
+ if (
+ count % 6 == 0
+ and count != 0
+ and "stroke-width:0.5" in attributes_new[k].get("style")
+ ):
+ attributes_new.append(attributes_new[len(attributes_new) - 1])
+ attributes_new[k + 1 :] = attributes_new[k:-1]
+ paths_new.append(paths_new[len(paths_new) - 1])
+ paths_new[k + 1 :] = paths_new[k:-1]
+
+ num = 1
+ count = 0
+ for k, v in enumerate(attributes_new):
+
+ paths_save.append(paths_new[k])
+ attributes_save.append(attributes_new[k])
+
+ count = count + 1
+
+ if count % 6 == 0:
+
+ wsvg(
+ paths_save,
+ filename=os.path.join(output_folder, f"{num}.svg"),
+ attributes=attributes_save,
+ )
+ paths_save = []
+ attributes_save = []
+ count = 0
+ num += 1
+
+
+if __name__ == "__main__":
+ main()
diff --git a/graphs/.gitkeep b/graphs/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/meta/logo.png b/meta/logo.png
new file mode 100644
index 0000000..a589f8d
Binary files /dev/null and b/meta/logo.png differ
diff --git a/output/.gitkeep b/output/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/pdfs/.gitkeep b/pdfs/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/poetry.lock b/poetry.lock
new file mode 100644
index 0000000..50fa922
--- /dev/null
+++ b/poetry.lock
@@ -0,0 +1,233 @@
+[[package]]
+category = "main"
+description = "Composable command line interface toolkit"
+name = "click"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+version = "7.1.1"
+
+[[package]]
+category = "main"
+description = "Composable style cycles"
+name = "cycler"
+optional = false
+python-versions = "*"
+version = "0.10.0"
+
+[package.dependencies]
+six = "*"
+
+[[package]]
+category = "main"
+description = "A fast implementation of the Cassowary constraint solver"
+name = "kiwisolver"
+optional = false
+python-versions = ">=3.6"
+version = "1.2.0"
+
+[[package]]
+category = "main"
+description = "Python plotting package"
+name = "matplotlib"
+optional = false
+python-versions = ">=3.6"
+version = "3.1.3"
+
+[package.dependencies]
+cycler = ">=0.10"
+kiwisolver = ">=1.0.1"
+numpy = ">=1.11"
+pyparsing = ">=2.0.1,<2.0.4 || >2.0.4,<2.1.2 || >2.1.2,<2.1.6 || >2.1.6"
+python-dateutil = ">=2.1"
+
+[[package]]
+category = "main"
+description = "NumPy is the fundamental package for array computing with Python."
+name = "numpy"
+optional = false
+python-versions = ">=3.5"
+version = "1.18.1"
+
+[[package]]
+category = "main"
+description = "Powerful data structures for data analysis, time series, and statistics"
+name = "pandas"
+optional = false
+python-versions = ">=3.6.1"
+version = "1.0.3"
+
+[package.dependencies]
+numpy = ">=1.13.3"
+python-dateutil = ">=2.6.1"
+pytz = ">=2017.2"
+
+[package.extras]
+test = ["pytest (>=4.0.2)", "pytest-xdist", "hypothesis (>=3.58)"]
+
+[[package]]
+category = "main"
+description = "Python parsing module"
+name = "pyparsing"
+optional = false
+python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
+version = "2.4.6"
+
+[[package]]
+category = "main"
+description = "Extensions to the standard Python datetime module"
+name = "python-dateutil"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+version = "2.8.1"
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+category = "main"
+description = "World timezone definitions, modern and historical"
+name = "pytz"
+optional = false
+python-versions = "*"
+version = "2019.3"
+
+[[package]]
+category = "main"
+description = "Python 2 and 3 compatibility utilities"
+name = "six"
+optional = false
+python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+version = "1.14.0"
+
+[[package]]
+category = "main"
+description = "A collection of tools for manipulating and analyzing SVG Path objects and Bezier curves."
+name = "svgpathtools"
+optional = false
+python-versions = "*"
+version = "1.3.3"
+
+[package.dependencies]
+numpy = "*"
+svgwrite = "*"
+
+[[package]]
+category = "main"
+description = "A Python library to create SVG drawings."
+name = "svgwrite"
+optional = false
+python-versions = ">=3.6"
+version = "1.4"
+
+[metadata]
+content-hash = "73e7c4cebea093b703e2b0e41a39be25d5ded6cf6df7714c492a2a7342893800"
+python-versions = "^3.8"
+
+[metadata.files]
+click = [
+ {file = "click-7.1.1-py2.py3-none-any.whl", hash = "sha256:e345d143d80bf5ee7534056164e5e112ea5e22716bbb1ce727941f4c8b471b9a"},
+ {file = "click-7.1.1.tar.gz", hash = "sha256:8a18b4ea89d8820c5d0c7da8a64b2c324b4dabb695804dbfea19b9be9d88c0cc"},
+]
+cycler = [
+ {file = "cycler-0.10.0-py2.py3-none-any.whl", hash = "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d"},
+ {file = "cycler-0.10.0.tar.gz", hash = "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"},
+]
+kiwisolver = [
+ {file = "kiwisolver-1.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:443c2320520eda0a5b930b2725b26f6175ca4453c61f739fef7a5847bd262f74"},
+ {file = "kiwisolver-1.2.0-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:efcf3397ae1e3c3a4a0a0636542bcad5adad3b1dd3e8e629d0b6e201347176c8"},
+ {file = "kiwisolver-1.2.0-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:fccefc0d36a38c57b7bd233a9b485e2f1eb71903ca7ad7adacad6c28a56d62d2"},
+ {file = "kiwisolver-1.2.0-cp36-none-win32.whl", hash = "sha256:60a78858580761fe611d22127868f3dc9f98871e6fdf0a15cc4203ed9ba6179b"},
+ {file = "kiwisolver-1.2.0-cp36-none-win_amd64.whl", hash = "sha256:556da0a5f60f6486ec4969abbc1dd83cf9b5c2deadc8288508e55c0f5f87d29c"},
+ {file = "kiwisolver-1.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7cc095a4661bdd8a5742aaf7c10ea9fac142d76ff1770a0f84394038126d8fc7"},
+ {file = "kiwisolver-1.2.0-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:c955791d80e464da3b471ab41eb65cf5a40c15ce9b001fdc5bbc241170de58ec"},
+ {file = "kiwisolver-1.2.0-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:603162139684ee56bcd57acc74035fceed7dd8d732f38c0959c8bd157f913fec"},
+ {file = "kiwisolver-1.2.0-cp37-none-win32.whl", hash = "sha256:03662cbd3e6729f341a97dd2690b271e51a67a68322affab12a5b011344b973c"},
+ {file = "kiwisolver-1.2.0-cp37-none-win_amd64.whl", hash = "sha256:4eadb361baf3069f278b055e3bb53fa189cea2fd02cb2c353b7a99ebb4477ef1"},
+ {file = "kiwisolver-1.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c31bc3c8e903d60a1ea31a754c72559398d91b5929fcb329b1c3a3d3f6e72113"},
+ {file = "kiwisolver-1.2.0-cp38-cp38-manylinux1_i686.whl", hash = "sha256:d52b989dc23cdaa92582ceb4af8d5bcc94d74b2c3e64cd6785558ec6a879793e"},
+ {file = "kiwisolver-1.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e586b28354d7b6584d8973656a7954b1c69c93f708c0c07b77884f91640b7657"},
+ {file = "kiwisolver-1.2.0-cp38-none-win32.whl", hash = "sha256:d069ef4b20b1e6b19f790d00097a5d5d2c50871b66d10075dab78938dc2ee2cf"},
+ {file = "kiwisolver-1.2.0-cp38-none-win_amd64.whl", hash = "sha256:18d749f3e56c0480dccd1714230da0f328e6e4accf188dd4e6884bdd06bf02dd"},
+ {file = "kiwisolver-1.2.0.tar.gz", hash = "sha256:247800260cd38160c362d211dcaf4ed0f7816afb5efe56544748b21d6ad6d17f"},
+]
+matplotlib = [
+ {file = "matplotlib-3.1.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:6a0031774c6c68298183438edf2e738856d63a4c4797876fa81d0ee337f5361c"},
+ {file = "matplotlib-3.1.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b4c0010eff09ab65c77ad1a0eec6c7cccb9f6838c3c77dc5b4002fe0cf2912fd"},
+ {file = "matplotlib-3.1.3-cp36-cp36m-win32.whl", hash = "sha256:78d0772412c0653aa3e860c52ff08d1f5ba64334e2b86b09dc2d502657d8ca73"},
+ {file = "matplotlib-3.1.3-cp36-cp36m-win_amd64.whl", hash = "sha256:97f04d29a358826f205320fbc88d46ce5c5ff6fb54ae050042ff396beda52ca4"},
+ {file = "matplotlib-3.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4164265ca573481ce61c83322e6b33628203afeabeb3e22c50376f5d3ee0f9be"},
+ {file = "matplotlib-3.1.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b5ace0531255932ad19fe64c116ada2713f7b38381db8f68df0fa694409e67d1"},
+ {file = "matplotlib-3.1.3-cp37-cp37m-win32.whl", hash = "sha256:c7bb7ed3e011324b56462391ec3f4bbb7c8c6af5892ebfb45d312b15b4cdfc8d"},
+ {file = "matplotlib-3.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:f0023322c99328c40ce22678ab0ab5adfc27e338419966539398239996f63e8d"},
+ {file = "matplotlib-3.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:db8bbba9284845034a2f0e1add91dc5e89db8c996359bdcf677a8d6f88875cf1"},
+ {file = "matplotlib-3.1.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:635ded7834f43c8d999076236f7e90074d77f7b8345e5e82cd95af053cc29df1"},
+ {file = "matplotlib-3.1.3-cp38-cp38-win32.whl", hash = "sha256:8efff896c49676700dc6adace6137a854ff64a4d44ca057ff726960ffdaa47bf"},
+ {file = "matplotlib-3.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:470eed601ff5132364e0121a20d7c3d43fab969c8c333422c1b6b72fde2ed3c1"},
+ {file = "matplotlib-3.1.3-pp373-pypy36_pp73-win32.whl", hash = "sha256:23b71560c721109954c0215ffc81f4c80ce8528749d534a01a61e8ab737c5bce"},
+ {file = "matplotlib-3.1.3.tar.gz", hash = "sha256:db3121f12fb9b99f105d1413aebaeb3d943f269f3d262b45586d12765866f0c6"},
+]
+numpy = [
+ {file = "numpy-1.18.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:20b26aaa5b3da029942cdcce719b363dbe58696ad182aff0e5dcb1687ec946dc"},
+ {file = "numpy-1.18.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:70a840a26f4e61defa7bdf811d7498a284ced303dfbc35acb7be12a39b2aa121"},
+ {file = "numpy-1.18.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:17aa7a81fe7599a10f2b7d95856dc5cf84a4eefa45bc96123cbbc3ebc568994e"},
+ {file = "numpy-1.18.1-cp35-cp35m-win32.whl", hash = "sha256:f3d0a94ad151870978fb93538e95411c83899c9dc63e6fb65542f769568ecfa5"},
+ {file = "numpy-1.18.1-cp35-cp35m-win_amd64.whl", hash = "sha256:1786a08236f2c92ae0e70423c45e1e62788ed33028f94ca99c4df03f5be6b3c6"},
+ {file = "numpy-1.18.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ae0975f42ab1f28364dcda3dde3cf6c1ddab3e1d4b2909da0cb0191fa9ca0480"},
+ {file = "numpy-1.18.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:cf7eb6b1025d3e169989416b1adcd676624c2dbed9e3bcb7137f51bfc8cc2572"},
+ {file = "numpy-1.18.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b765ed3930b92812aa698a455847141869ef755a87e099fddd4ccf9d81fffb57"},
+ {file = "numpy-1.18.1-cp36-cp36m-win32.whl", hash = "sha256:2d75908ab3ced4223ccba595b48e538afa5ecc37405923d1fea6906d7c3a50bc"},
+ {file = "numpy-1.18.1-cp36-cp36m-win_amd64.whl", hash = "sha256:9acdf933c1fd263c513a2df3dceecea6f3ff4419d80bf238510976bf9bcb26cd"},
+ {file = "numpy-1.18.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:56bc8ded6fcd9adea90f65377438f9fea8c05fcf7c5ba766bef258d0da1554aa"},
+ {file = "numpy-1.18.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:e422c3152921cece8b6a2fb6b0b4d73b6579bd20ae075e7d15143e711f3ca2ca"},
+ {file = "numpy-1.18.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:b3af02ecc999c8003e538e60c89a2b37646b39b688d4e44d7373e11c2debabec"},
+ {file = "numpy-1.18.1-cp37-cp37m-win32.whl", hash = "sha256:d92350c22b150c1cae7ebb0ee8b5670cc84848f6359cf6b5d8f86617098a9b73"},
+ {file = "numpy-1.18.1-cp37-cp37m-win_amd64.whl", hash = "sha256:77c3bfe65d8560487052ad55c6998a04b654c2fbc36d546aef2b2e511e760971"},
+ {file = "numpy-1.18.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c98c5ffd7d41611407a1103ae11c8b634ad6a43606eca3e2a5a269e5d6e8eb07"},
+ {file = "numpy-1.18.1-cp38-cp38-manylinux1_i686.whl", hash = "sha256:9537eecf179f566fd1c160a2e912ca0b8e02d773af0a7a1120ad4f7507cd0d26"},
+ {file = "numpy-1.18.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:e840f552a509e3380b0f0ec977e8124d0dc34dc0e68289ca28f4d7c1d0d79474"},
+ {file = "numpy-1.18.1-cp38-cp38-win32.whl", hash = "sha256:590355aeade1a2eaba17617c19edccb7db8d78760175256e3cf94590a1a964f3"},
+ {file = "numpy-1.18.1-cp38-cp38-win_amd64.whl", hash = "sha256:39d2c685af15d3ce682c99ce5925cc66efc824652e10990d2462dfe9b8918c6a"},
+ {file = "numpy-1.18.1.zip", hash = "sha256:b6ff59cee96b454516e47e7721098e6ceebef435e3e21ac2d6c3b8b02628eb77"},
+]
+pandas = [
+ {file = "pandas-1.0.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d234bcf669e8b4d6cbcd99e3ce7a8918414520aeb113e2a81aeb02d0a533d7f7"},
+ {file = "pandas-1.0.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:ca84a44cf727f211752e91eab2d1c6c1ab0f0540d5636a8382a3af428542826e"},
+ {file = "pandas-1.0.3-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:1fa4bae1a6784aa550a1c9e168422798104a85bf9c77a1063ea77ee6f8452e3a"},
+ {file = "pandas-1.0.3-cp36-cp36m-win32.whl", hash = "sha256:863c3e4b7ae550749a0bb77fa22e601a36df9d2905afef34a6965bed092ba9e5"},
+ {file = "pandas-1.0.3-cp36-cp36m-win_amd64.whl", hash = "sha256:a210c91a02ec5ff05617a298ad6f137b9f6f5771bf31f2d6b6367d7f71486639"},
+ {file = "pandas-1.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11c7cb654cd3a0e9c54d81761b5920cdc86b373510d829461d8f2ed6d5905266"},
+ {file = "pandas-1.0.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6597df07ea361231e60c00692d8a8099b519ed741c04e65821e632bc9ccb924c"},
+ {file = "pandas-1.0.3-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:743bba36e99d4440403beb45a6f4f3a667c090c00394c176092b0b910666189b"},
+ {file = "pandas-1.0.3-cp37-cp37m-win32.whl", hash = "sha256:07c1b58936b80eafdfe694ce964ac21567b80a48d972879a359b3ebb2ea76835"},
+ {file = "pandas-1.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:12f492dd840e9db1688126216706aa2d1fcd3f4df68a195f9479272d50054645"},
+ {file = "pandas-1.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0ebe327fb088df4d06145227a4aa0998e4f80a9e6aed4b61c1f303bdfdf7c722"},
+ {file = "pandas-1.0.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:858a0d890d957ae62338624e4aeaf1de436dba2c2c0772570a686eaca8b4fc85"},
+ {file = "pandas-1.0.3-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:387dc7b3c0424327fe3218f81e05fc27832772a5dffbed385013161be58df90b"},
+ {file = "pandas-1.0.3-cp38-cp38-win32.whl", hash = "sha256:167a1315367cea6ec6a5e11e791d9604f8e03f95b57ad227409de35cf850c9c5"},
+ {file = "pandas-1.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:1a7c56f1df8d5ad8571fa251b864231f26b47b59cbe41aa5c0983d17dbb7a8e4"},
+ {file = "pandas-1.0.3.tar.gz", hash = "sha256:32f42e322fb903d0e189a4c10b75ba70d90958cc4f66a1781ed027f1a1d14586"},
+]
+pyparsing = [
+ {file = "pyparsing-2.4.6-py2.py3-none-any.whl", hash = "sha256:c342dccb5250c08d45fd6f8b4a559613ca603b57498511740e65cd11a2e7dcec"},
+ {file = "pyparsing-2.4.6.tar.gz", hash = "sha256:4c830582a84fb022400b85429791bc551f1f4871c33f23e44f353119e92f969f"},
+]
+python-dateutil = [
+ {file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"},
+ {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"},
+]
+pytz = [
+ {file = "pytz-2019.3-py2.py3-none-any.whl", hash = "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d"},
+ {file = "pytz-2019.3.tar.gz", hash = "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"},
+]
+six = [
+ {file = "six-1.14.0-py2.py3-none-any.whl", hash = "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"},
+ {file = "six-1.14.0.tar.gz", hash = "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a"},
+]
+svgpathtools = [
+ {file = "svgpathtools-1.3.3-py2.py3-none-any.whl", hash = "sha256:7f7bdafe2c03b312178460104705e1d554d8cf36c898bec41bdce9fed3504746"},
+ {file = "svgpathtools-1.3.3.tar.gz", hash = "sha256:e4b3784ae41b725fbce6a33a8981210967b16d0b557cb5d98c0ed0c81f0f89b9"},
+]
+svgwrite = [
+ {file = "svgwrite-1.4-py3-none-any.whl", hash = "sha256:fa842fb3129a9399d19b5e9602a022fcc7f2f3f24713550e765c488ffafd743d"},
+ {file = "svgwrite-1.4.zip", hash = "sha256:b38ac03b67f81c728d81a33e4711aaf3ab136a57156d721bb17f88525d9909bb"},
+]
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..f25baac
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,19 @@
+[tool.poetry]
+name = "svg-graph-extractor"
+version = "0.1.0"
+description = ""
+authors = ["Finnkauski "]
+
+[tool.poetry.dependencies]
+python = "^3.8"
+click = "7.1.1"
+matplotlib = "3.1.3"
+numpy = "1.18.1"
+pandas = "1.0.3"
+svgpathtools = "1.3.3"
+
+[tool.poetry.dev-dependencies]
+
+[build-system]
+requires = ["poetry>=0.12"]
+build-backend = "poetry.masonry.api"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..f2f5ad0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+click==7.1.1
+matplotlib==3.1.3
+numpy==1.18.1
+pandas==1.0.3
+svgpathtools==1.3.3
\ No newline at end of file
diff --git a/svgs/.gitkeep b/svgs/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/tests/resources/gaps.svg b/tests/resources/gaps.svg
new file mode 100644
index 0000000..4d38166
--- /dev/null
+++ b/tests/resources/gaps.svg
@@ -0,0 +1,8 @@
+
+
+
diff --git a/tests/resources/nogaps.svg b/tests/resources/nogaps.svg
new file mode 100644
index 0000000..78606d8
--- /dev/null
+++ b/tests/resources/nogaps.svg
@@ -0,0 +1,8 @@
+
+
+
diff --git a/tests/test_extract.py b/tests/test_extract.py
new file mode 100644
index 0000000..e00b002
--- /dev/null
+++ b/tests/test_extract.py
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+from svgpathtools import svg2paths
+import createcsvs
+
+
+def test_categorise_paths():
+ # Given
+ filepath = "resources/nogaps.svg"
+
+ # When
+ paths, _ = svg2paths(filepath)
+ xlim, lines, trend = createcsvs.categorise_paths(paths)
+
+ # Then
+ assert len(lines) == 3
+
+
+def test_convert_units():
+ # Given
+ filepath = "resources/nogaps.svg"
+
+ # When
+ paths, _ = svg2paths(filepath)
+ xlim, lines, trend = createcsvs.categorise_paths(paths)
+ trend_converted = createcsvs.convert_units(trend, lines, xlim, yspan=80, xspan=42)
+
+ # Then
+ assert len(trend_converted) == 43
+
+
+def test_categorise_paths_w_gaps():
+ # Given
+ filepath = "resources/gaps.svg"
+
+ # When
+ paths, _ = svg2paths(filepath)
+ xlim, lines, trend = createcsvs.categorise_paths(paths)
+
+ # Then
+ assert len(lines) == 3
+
+
+def test_convert_units_w_gaps():
+ # Given
+ filepath = "resources/gaps.svg"
+
+ # When
+ paths, _ = svg2paths(filepath)
+ xlim, lines, trend = createcsvs.categorise_paths(paths)
+ trend_converted = createcsvs.convert_units(trend, lines, xlim, yspan=80, xspan=42)
+
+ # Then
+ assert len(trend_converted) == 27