diff --git a/.gitignore b/.gitignore index 1c73140..96d6ac6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,5 @@ *.pyc # Generated by make release -/sql_data_integrity_checker.egg-info/ +/sdic.egg-info/ /dist/ diff --git a/Makefile b/Makefile index 0aa79f6..bfa5cd0 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ test: clean: rm -rf dist/ - rm -rf sql_data_integrity_checker.egg-info + rm -rf sdic.egg-info release: clean python setup.py sdist diff --git a/README.md b/README.md index acbf565..56f6d69 100644 --- a/README.md +++ b/README.md @@ -1,42 +1,130 @@ -# sql-data-integrity-checker +# sdic -[![CircleCI](https://circleci.com/gh/percolate/sql-data-integrity-checker.svg?style=svg)](https://circleci.com/gh/percolate/sql-data-integrity-checker) -[![codecov](https://codecov.io/gh/percolate/sql-data-integrity-checker/branch/master/graph/badge.svg)](https://codecov.io/gh/percolate/sql-data-integrity-checker) +__A.K.A. SQL Data Integrity Checker__ -Asynchronous soft constraints executed against you databases. -Queries that are intended to be ran here should produce `bad data`, -or data that should not be in the table that is the object of the query. +[![CircleCI](https://circleci.com/gh/percolate/sdic.svg?style=svg)](https://circleci.com/gh/percolate/sdic) +[![codecov](https://codecov.io/gh/percolate/sdic/branch/master/graph/badge.svg)](https://codecov.io/gh/percolate/sdic + +## One line purpose + +`sdic` executes all the SQL queries found in a folder and display its output. + +## More detailled purpose + +In any RDBMS, you can set constraints to prevent the application to save the +data in a way that's not consistent. E.g. if you want all your users to have an +email, you can set the email column to `NOT NULL`. + +This works for simple constraints: + +1. It's easy to implement +1. It's cheap for the database to check on every change + +But for more complex constraints that you'd like to set, it'd be either very +expensinve to check on every write, or even impossible to write as a +constraint. + +With `sdic`, you can write you complex constraints as simple queries, and have +the database run them asynchronously at the occurence you want. + +We call them "soft constraints". + +## Example + +Let's say that you have a `users` table, defined like this: + +- `id` Primary Key `NOT NULL` +- `firstname` `NULL` +- `lastname` `NULL` +- `email` `NOT NULL` + +Now, let's suppose your application allow users to register just with their +`email` but can fill in they `firstname` and `lastname` later on, but we don't +want our users to have only a firstname or a lastname. + +Simply put, our constraint is: Make sure every users has either a `firstname` +and a `lastname` set, or both set to NULL. + +With `sdic`, you can add this `enforce_fullname.sql` file and let `sdic` check +that every user comply nightly. + +```sql +-- Make sure every user with a name has both a firstname and a lastname +SELECT id, firstname, lastname +FROM users +WHERE + (firstname IS NULL AND lastname IS NOT NULL) OR + (firstname IS NOT NULL AND lastname IS NULL) +LIMIT 10 +; +``` + +Put this file in `your-environment/your-server/enforce_fullname.sql`. + +Edit the `your-environment/servers.ini` file to tell sdic how to connect to +your server. + +Now run `sdic your-environment` and it will output any user that do no comply +with your soft constraint. + +You can have as many soft constraints on as many servers and as many +environments as you need. + +## Install as a cron + +If you want to get an email every night to give you a list of all the soft +constraints that have been broken during the last day, just add it to you +crontab. We like to have it run daily, so we can fix any bug generating bad +data before it becomes a real problem. + +Example crontab: +``` +MAILTO="dba@acme.com" +@daily sdic live +``` + +`dba@acme.com` is the email that will get the soft constraints broken every +day. Make sure your local MTA is well configured on your system. You can test +it by doing `date | mail -s test dba@acme.com`. + +## Databases supported + +Any database supported by [SQLAlchemy](http://www.sqlalchemy.org/) should be +supported, including [PostgreSQL](https://www.postgresql.org/) and +[MySQL](https://www.mysql.com/). ## Install -`pip install sql-data-integrity-checker` +`pip install sdic` ## Configuration +An example configuration is given in the `example-environment` folder. + The script reads from a designated folder, whose path you pass as an argument. This folder should consist of the following: -1. A `servers.ini` file, which contains the Database URL/s (see`examples` folder) - +1. A `servers.ini` file, which contains the Database URLs (see the +`example-environment` folder) 1. A sub-folder, which contains the actual queries in a `.sql` file format ## Usage A `directory` argument is mandatory: -`sql-data-integrity-checker path/to/your/folder` +`sdic path/to/your/folder` If you have e.g more than one server in a folder, but you want to only run one of them, an optional `server` argument can be passed as well: -`sql-data-integrity-checker path/to/your/folder server1` +`sdic path/to/your/folder server1` If a query produces an output, it will look something like this: -```bash +``` -----===== /!\ INCOMING BAD DATA /!\ =====----- -Server: circleci +Server: big-database File: test_query.sql SQL Query: diff --git a/example-environment/server1/test_query.sql b/example-environment/server1/test_query.sql new file mode 100644 index 0000000..deb619c --- /dev/null +++ b/example-environment/server1/test_query.sql @@ -0,0 +1,4 @@ +-- This is a query that returns current time. +-- You should never do this, but this can be used to see if sdic is actually +-- printing results. +SELECT NOW(); diff --git a/example-environment/server2/test_query.sql b/example-environment/server2/test_query.sql new file mode 100644 index 0000000..fff5956 --- /dev/null +++ b/example-environment/server2/test_query.sql @@ -0,0 +1,4 @@ +-- This is a query that returns number 1. +-- You should never do this, but this can be used to see if sdic is actually +-- printing results. +SELECT 1; diff --git a/examples/servers.ini b/example-environment/servers.ini similarity index 100% rename from examples/servers.ini rename to example-environment/servers.ini diff --git a/examples/server1/test_query.sql b/examples/server1/test_query.sql deleted file mode 100644 index 5438015..0000000 --- a/examples/server1/test_query.sql +++ /dev/null @@ -1,2 +0,0 @@ --- This is a query that returns current time. -Select now(); diff --git a/examples/server2/test_query.sql b/examples/server2/test_query.sql deleted file mode 100644 index 30034cc..0000000 --- a/examples/server2/test_query.sql +++ /dev/null @@ -1,2 +0,0 @@ --- This is a query that returns number 1. -Select 1; diff --git a/sdic/main.py b/sdic/main.py index 672ac95..f5c1e2f 100644 --- a/sdic/main.py +++ b/sdic/main.py @@ -1,5 +1,7 @@ #!/usr/bin/env python -"""sql-data-integrity-checker +"""sdic + +A.K.A. SQL Data Integrity Checker Asynchronous soft constraints executed against your databases. The path to your queries and servers.ini files should be defined as an arg. @@ -7,7 +9,7 @@ in a directory, but want to only run one. Usage: - sql_data_integrity_checker [] + sdic [] Options: -h --help Show this screen. @@ -167,7 +169,7 @@ def get_servers_from_config(directory): def main(): args = docopt(__doc__, - version="sql-data-integrity-checker {}".format(VERSION)) + version="sdic {}".format(VERSION)) # Check that the given directory exists if not isdir(args['']): @@ -185,7 +187,7 @@ def main(): # Everything's ok, run the main program with lock: - syslog.openlog('data_integrity_checker') + syslog.openlog('sdic') has_output = False if not args['']: diff --git a/setup.py b/setup.py index e3f5eda..104eb0b 100644 --- a/setup.py +++ b/setup.py @@ -1,22 +1,22 @@ -"""Setup file to automate the install of Mackup in the Python environment.""" +"""Setup file to automate the install of sdic in the Python environment.""" from setuptools import setup from sdic.constants import VERSION setup( - name='sql-data-integrity-checker', + name='sdic', version=VERSION, author='Laurent Raufaste', author_email='analogue@glop.org', - url='https://github.com/percolate/sql-data-integrity-checker', + url='https://github.com/percolate/sdic', description='Asynchronous soft constraints executed against you databases', - keywords='sql mysql postgresql sqlalchemy data integrity constraints', + keywords='sdic sql mysql postgresql sqlalchemy data integrity constraints', license='GPLv3', packages=['sdic'], install_requires=['docopt', 'prettytable'], entry_points={ 'console_scripts': [ - 'sql-data-integrity-checker=sdic.main:main', + 'sdic=sdic.main:main', ], }, classifiers=[