Skip to content

Commit

Permalink
Merge pull request #2 from percolate/add-necessary-elements
Browse files Browse the repository at this point in the history
Setup sql-data-integrity-checker in a repo
  • Loading branch information
lra committed Jun 6, 2016
2 parents a150f21 + 68a8418 commit e008048
Show file tree
Hide file tree
Showing 7 changed files with 913 additions and 19 deletions.
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

53 changes: 39 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,49 @@
[![CircleCI](https://circleci.com/gh/percolate/sql-data-integrity-checker.svg?style=svg)](https://circleci.com/gh/percolate/sql-data-integrity-checker)
[![codecov](https://codecov.io/gh/percolate/sql-data-integrity-checker/branch/master/graph/badge.svg)](https://codecov.io/gh/percolate/sql-data-integrity-checker)

Asynchronous soft constraints executed against you databases
Asynchronous soft constraints executed against you databases.
Queries that are intended to be ran here should produce `bad data`,
or data that should not be in the table that is the object of the query.

## How to improve it
## Install

```
# Install the development version on your system
make develop
`pip install sql-data-integrity-checker`

# Edit the code and try your changes
sql-data-integrity-checker
## Configuration

# Edit the code and try your changes
sql-data-integrity-checker
The script reads from a designated folder, whose path you pass as an argument.
This folder should consist of the following:

[...]
1. A `servers.ini` file, which contains the Database URL/s (see`examples` folder)

# Uninstall the development version on your system
make undevelop
```
1. A sub-folder, which contains the actual queries in a `.sql` file format

## Usage

A `directory` argument is mandatory:

`sql-data-integrity-checker path/to/your/folder`

If you have e.g more than one server in a folder, but you want to
only run one of them, an optional `server` argument can be passed as well:

That's the usual workflow.
`sql-data-integrity-checker path/to/your/folder server1`

If a query produces an output, it will look something like this:

```bash
-----===== /!\ INCOMING BAD DATA /!\ =====-----

Server: circleci
File: test_query.sql

SQL Query:
-- This is a query that returns current time.
Select now();

+---------------------+
| now() |
+---------------------+
| 2016-06-03 19:27:14 |
+---------------------+
```
2 changes: 2 additions & 0 deletions examples/server1/test_query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- This is a query that returns current time.
Select now();
2 changes: 2 additions & 0 deletions examples/server2/test_query.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- This is a query that returns number 1.
Select 1;
5 changes: 5 additions & 0 deletions examples/servers.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[server1]
db_url = postgresql://john:doe@localhost/mydatabase

[server2]
db_url = mysql://michael:scott@localhost/foo
194 changes: 190 additions & 4 deletions sdic/main.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,206 @@
#!/usr/bin/env python
"""sql-data-integrity-checker
Asynchronous soft constraints executed against you databases.
Asynchronous soft constraints executed against your databases.
The path to your queries and servers.ini files should be defined as an arg.
Optionally, declare a single server if you have multiple ones
in a directory, but want to only run one.
Usage:
sql-data-integrity-checker
sql_data_integrity_checker <directory> [<server>]
Options:
-h --help Show this screen.
"""
from os.path import isdir
from lockfile import FileLock
import sys
import os
import fnmatch
import prettytable
import ConfigParser
import time
import syslog

from sqlalchemy import create_engine
from sqlalchemy import text

from docopt import docopt
from constants import VERSION

CONFIG_SERVERS = 'servers.ini'


def error(message):
"""Print an error message and exit the script"""
print "Error:", message
exit(1)


def get_query_files(directory):
"""
Get the list of filenames of SQL files found in the specified folder
Params: directory string
Returns: Array
"""
files = []

for found_file in os.listdir(directory):
if fnmatch.fnmatch(found_file, '*.sql'):
files.append(found_file)

return files


def launch_queries(directory, server):
"""
Launch the queries found in the specified folder
Param directory string Folder containing the SQL files
Param server dict describing a server
Returns: Bool value of whether we get query output or not
"""
query_folder = os.path.join(directory, server['name'])
files = get_query_files(query_folder)
produced_output = False

for filename in files:
query_filename = os.path.join(directory, server['name'], filename)
output = None
with open(query_filename, 'r') as opened_file:
query = opened_file.read()

start_time = time.time()
output = get_query_output(server, query)
query_time = round(time.time() - start_time, 3)

syslog.syslog('{} successfully ran in {} sec.'.format(filename,
query_time))
if output:
produced_output = True

# Announce that this query has results
print "-----===== /!\ INCOMING BAD DATA /!\ =====-----"
print
print "Server: {}".format(server['name'])
print "File: {}".format(filename)
print
# Display the raw query
print "SQL Query:"
print query

# Display the results of the query
print output
print

return produced_output


def get_query_output(server, query):
"""
Launch a query and display the output in a pretty text table
Args:
server (dict): Server to launch the query on
query (str): Query to launch
Returns:
(str) or None
"""
db_url = server['db_url']

# start sqlalchemy engine
engine = create_engine(db_url)
conn = engine.connect()
result = conn.execute(text(query))
rows = result.fetchall()

table = None

if result.rowcount > 0:
# Get the column titles
titles = []
for desc in result.keys():
titles.append(desc)
table = prettytable.PrettyTable(titles)

# Fill the table
for row in rows:
arr = []
for item in row:
if isinstance(item, str):
item = unicode(item, 'utf8', 'ignore')
arr.append(item)

table.add_row(arr)

conn.close()
result.close()

return table


def get_servers_from_config(directory):
"""
Get the configuration of all the servers in the config file
param directory string Folder containing the servers.ini file
return List of servers dictionnaries
"""
config = ConfigParser.RawConfigParser()
config.read(os.path.join(directory, CONFIG_SERVERS))

valid_config_items = ['db_url']

servers = []
for section in config.sections():
server = {'name': section}
for (item_name, item_value) in config.items(section):
if item_name in valid_config_items:
server[item_name] = item_value
servers.append(server)

return servers


def main():
docopt(__doc__, version="sql-data-integrity-checker {}".format(VERSION))
args = docopt(__doc__,
version="sql-data-integrity-checker {}".format(VERSION))

# Check that the given directory exists
if not isdir(args['<directory>']):
error("The folder {} does not exist".format(args['<directory>']))

# Try to get the config of the servers we are gonna use
servers = get_servers_from_config(args['<directory>'])

# Check that we are not already running
program_name = os.path.basename(sys.argv[0])
lock = FileLock("/tmp/{}.lock".format(program_name))
if lock.is_locked():
error("{} is already running. Delete {} if it's a mistake.".format(
program_name, lock.path))

# Everything's ok, run the main program
with lock:
syslog.openlog('data_integrity_checker')

has_output = False
if not args['<server>']:
for server in servers:
if launch_queries(args['<directory>'], server):
has_output = True
else:
for server in servers:
if server['name'] == args['<server>']:
if launch_queries(args['<directory>'], server):
has_output = True
if has_output:
return 1

syslog.closelog()

if __name__ == "__main__":
main()
sys.exit(main())
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
keywords='sql mysql postgresql sqlalchemy data integrity constraints',
license='GPLv3',
packages=['sdic'],
install_requires=['docopt'],
install_requires=['docopt', 'prettytable'],
entry_points={
'console_scripts': [
'sql-data-integrity-checker=sdic.main:main',
Expand Down

0 comments on commit e008048

Please sign in to comment.