-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Replaces pandas with polars * Refactored and merged some funcs, added tests * Refactored requirements.txt * Fixes error in viz sql * Replaces format() with fstrings in most instances * Updates README
- Loading branch information
Showing
60 changed files
with
410,860 additions
and
381,987 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,5 @@ | ||
ORDS_DB_HOST=localhost | ||
ORDS_DB_DATABASE=ords_test | ||
ORDS_DB_COLLATION=utf8mb4_unicode_ci | ||
ORDS_DB_USER=admin | ||
ORDS_DB_PWD=admin | ||
ORDS_DB_CONN="{'host': 'localhost','database': 'ords','user': 'admin','pwd': 'admin','collation': 'utf8mb4_unicode_ci'}" | ||
ORDS_DB_TEST="{'host': 'localhost','database': 'ords_test','user': 'admin','pwd': 'admin','collation': 'utf8mb4_unicode_ci'}" | ||
ORDS_DATA=OpenRepairData_v0.3_aggregate_202309 | ||
ORDS_CATS=OpenRepairData_v0.3_Product_Categories | ||
DEEPL_KEY= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,8 @@ dat/ords/*.csv | |
dat/backup*.* | ||
out/ | ||
tmp/ | ||
tmp.* | ||
*.tmp | ||
solr/solr-* | ||
pyvenv.cfg | ||
*.code-workspace | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,25 +63,23 @@ It is compiled and published by the [Open Repair Alliance (ORA)](https://openrep | |
* Python with venv module. | ||
* Optional MySQL 8.x and libmysqlclient-dev. | ||
|
||
### Working copy | ||
|
||
```git clone [email protected]:openrepair/tools.git ./ords-tools``` | ||
|
||
### Virtual environment | ||
|
||
```python3 -m venv ords-tools``` | ||
|
||
```cd ords-tools``` | ||
|
||
```source bin/activate``` | ||
```git init``` | ||
|
||
To install the requirements. | ||
```git remote add origin [email protected]:openrepair/tools.git``` | ||
|
||
```pip install -r requirements.txt``` | ||
```git pull origin main``` | ||
|
||
To upgrade to the newest requirements. | ||
```git branch --set-upstream-to=origin/main main``` | ||
|
||
```pip install -r requirements.txt --upgrade``` | ||
```source bin/activate``` | ||
|
||
```pip install -r requirements.txt``` | ||
|
||
### Data | ||
|
||
|
@@ -93,8 +91,6 @@ Copy ```.env.example``` to ```.env``` and edit as necessary. | |
|
||
```.env``` is in .gitignore, do not add it to this repo. | ||
|
||
[.env file documentation](https://saurabh-kumar.com/python-dotenv/#file-format) | ||
|
||
## Links | ||
|
||
### Repair data | ||
|
@@ -107,18 +103,8 @@ Copy ```.env.example``` to ```.env``` and edit as necessary. | |
|
||
### Python | ||
|
||
[Python and Virtual Environments](https://csguide.cs.princeton.edu/software/virtualenv#scm) | ||
|
||
[Using Python environments in VS Code](https://code.visualstudio.com/docs/python/environments) | ||
|
||
[Python](https://docs.python.org/) | ||
|
||
[W3 Schools Python](https://www.w3schools.com/python/) | ||
|
||
[Numpy](https://numpy.org/) | ||
|
||
[Pandas](https://pandas.pydata.org/) | ||
|
||
[Scikit-learn](https://scikit-learn.org/) | ||
|
||
[Natural Language Processing Demystified](https://www.nlpdemystified.org/) | ||
|
@@ -129,14 +115,4 @@ Copy ```.env.example``` to ```.env``` and edit as necessary. | |
|
||
[MySQL 8.0 Reference Manual](https://dev.mysql.com/doc/refman/8.0/en/) | ||
|
||
[OpenRefine](https://openrefine.org/) | ||
|
||
[R](https://www.r-project.org/) | ||
|
||
[Apache OpenNLP](https://opennlp.apache.org/) | ||
|
||
[Solr](https://solr.apache.org/) | ||
|
||
[Data-Driven Documents (D3)](https://d3js.org/) | ||
|
||
[Orange](https://orangedatamining.com/) |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,13 @@ | ||
# About | ||
|
||
A scrappy set of wrapper functions that alleviate some of the most tedious code repetition. | ||
|
||
## Tests | ||
|
||
Run all tests. | ||
|
||
`$ python3 -m unittest discover tests/` | ||
|
||
Run one test, e.g. | ||
|
||
`python3 -m unittest tests/testTextFuncs.py` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,10 @@ | ||
__all__ = ["pathfuncs", "envfuncs", "logfuncs", "dbfuncs", "datefuncs", "miscfuncs", "textfuncs", "deeplfuncs"] | ||
__all__ = [ | ||
"cfg", | ||
"pathfuncs", | ||
"dbfuncs", | ||
"datefuncs", | ||
"miscfuncs", | ||
"ordsfuncs", | ||
"textfuncs", | ||
"deeplfuncs", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import os | ||
import ast | ||
import logging | ||
from dotenv import load_dotenv | ||
load_dotenv() | ||
|
||
if not os.path.exists("dat"): | ||
os.mkdir("dat") | ||
if not os.path.exists("dat/ords"): | ||
os.mkdir("dat/ords") | ||
if not os.path.exists("log"): | ||
os.mkdir("log") | ||
if not os.path.exists("out"): | ||
os.mkdir("out") | ||
|
||
ROOT_DIR = os.path.realpath(os.path.join(os.path.dirname(__file__), "..")) | ||
DATA_DIR = os.path.join(ROOT_DIR, "dat", "") | ||
ORDS_DIR = os.path.join(ROOT_DIR, "dat/ords") | ||
LOG_DIR = os.path.join(ROOT_DIR, "log", "") | ||
OUT_DIR = os.path.join(ROOT_DIR, "out", "") | ||
|
||
def init_logger(caller): | ||
|
||
filename, file_ext = os.path.splitext(os.path.basename(caller)) | ||
path = os.path.join(LOG_DIR, filename + '.log') | ||
logger = logging.getLogger() | ||
logger.setLevel(logging.DEBUG) | ||
fh = logging.FileHandler(path, mode='w') | ||
fh.setLevel(logging.DEBUG) | ||
logger.addHandler(fh) | ||
return logger | ||
|
||
def get_envvar(key): | ||
|
||
if key in os.environ: | ||
return os.environ[key] | ||
else: | ||
print('ERROR! {} NOT FOUND!'.format(key)) | ||
return False | ||
|
||
def get_dbvars(con="ORDS_DB_CONN"): | ||
|
||
try: | ||
dbstr = os.environ.get(con) | ||
dbdict = ast.literal_eval(dbstr) | ||
return dbdict | ||
except Exception as error: | ||
print("Exception: {}".format(error)) | ||
return False | ||
|
||
def get_version(): | ||
|
||
return "0.0.1" | ||
|
Oops, something went wrong.