From 06c5d2f227dc680e427f682cce6b05bfff972f1a Mon Sep 17 00:00:00 2001 From: Patryk Mrukot Date: Mon, 22 May 2017 23:09:43 +0200 Subject: [PATCH] feature(fixtures): Load fixtures into the database (#18) --- .gitignore | 1 + Dockerfile | 8 ++- ...20170402123134_change_question_to_text.exs | 9 ++++ docker-compose.yml | 2 +- fixtures/.gitignore | 2 + fixtures/README.md | 10 ++++ fixtures/main.py | 16 ++++++ fixtures/requirements.txt | 6 +++ fixtures/src/__init__.py | 11 ++++ fixtures/src/codingfix.py | 35 ++++++++++++ fixtures/src/config.py | 4 ++ fixtures/src/loader.py | 16 ++++++ fixtures/src/queries/__init__.py | 11 ++++ fixtures/src/queries/answers.py | 37 +++++++++++++ fixtures/src/queries/questions.py | 36 +++++++++++++ fixtures/src/queries/subjects.py | 17 ++++++ fixtures/src/questionparser.py | 54 +++++++++++++++++++ scripts/populate_database.sh | 1 + scripts/run_migrations.sh | 3 ++ 19 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 aion/priv/repo/migrations/20170402123134_change_question_to_text.exs create mode 100644 fixtures/.gitignore create mode 100644 fixtures/README.md create mode 100644 fixtures/main.py create mode 100644 fixtures/requirements.txt create mode 100644 fixtures/src/__init__.py create mode 100644 fixtures/src/codingfix.py create mode 100644 fixtures/src/config.py create mode 100644 fixtures/src/loader.py create mode 100644 fixtures/src/queries/__init__.py create mode 100644 fixtures/src/queries/answers.py create mode 100644 fixtures/src/queries/questions.py create mode 100644 fixtures/src/queries/subjects.py create mode 100644 fixtures/src/questionparser.py create mode 100644 scripts/populate_database.sh create mode 100755 scripts/run_migrations.sh diff --git a/.gitignore b/.gitignore index 4fcffd3..2009acc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .idea/ .DS_Store /postgres-data +*.pyc diff --git a/Dockerfile b/Dockerfile index dff357c..879ed2c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,8 @@ FROM elixir:1.4.2 WORKDIR /aion ADD aion /aion +ADD fixtures /fixtures +ADD scripts /scripts # Install nodeJS RUN apt-get update \ @@ -13,4 +15,8 @@ RUN apt-get update \ RUN mix local.hex --force \ && mix local.rebar --force \ && mix deps.get \ - && npm install + && npm install + +# Install python pip and dependencies +RUN apt-get -y install python3-pip \ + && pip3 install -r /fixtures/requirements.txt diff --git a/aion/priv/repo/migrations/20170402123134_change_question_to_text.exs b/aion/priv/repo/migrations/20170402123134_change_question_to_text.exs new file mode 100644 index 0000000..3e35f50 --- /dev/null +++ b/aion/priv/repo/migrations/20170402123134_change_question_to_text.exs @@ -0,0 +1,9 @@ +defmodule Aion.Repo.Migrations.ChangeQuestionToText do + use Ecto.Migration + + def change do + alter table(:questions) do + modify :content, :text + end + end +end diff --git a/docker-compose.yml b/docker-compose.yml index 960d61e..c342df1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: dockerfile: Dockerfile depends_on: - db - command: mix phoenix.server + command: bash /scripts/run_migrations.sh volumes: - ./aion/:/aion - /aion/deps diff --git a/fixtures/.gitignore b/fixtures/.gitignore new file mode 100644 index 0000000..2e1165f --- /dev/null +++ b/fixtures/.gitignore @@ -0,0 +1,2 @@ +jpks/ +__pycache__/ diff --git a/fixtures/README.md b/fixtures/README.md new file mode 100644 index 0000000..cf54491 --- /dev/null +++ b/fixtures/README.md @@ -0,0 +1,10 @@ +To install packages required to use question loader run: +>```pip install -r requirements.txt``` + +Then simply run: +> python main.py + +or: +> python main.py -h + +to see available options. diff --git a/fixtures/main.py b/fixtures/main.py new file mode 100644 index 0000000..005c878 --- /dev/null +++ b/fixtures/main.py @@ -0,0 +1,16 @@ +import argparse +from src.loader import load_questions +from src.questionparser import get_questions + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Dump questions in postgresql database.') + parser.add_argument('-path', type=str, nargs='*', + help="Provide path to file/folder containing questions in a proper format") + args = parser.parse_args() + paths = args.path + if paths is None: + paths = ['jpks/'] + + for path in paths: + questions = get_questions(path) + load_questions(questions) diff --git a/fixtures/requirements.txt b/fixtures/requirements.txt new file mode 100644 index 0000000..e8956fb --- /dev/null +++ b/fixtures/requirements.txt @@ -0,0 +1,6 @@ +appdirs==1.4.3 +packaging==16.8 +psycopg2==2.7.1 +pyparsing==2.2.0 +six==1.10.0 +tqdm==4.11.2 diff --git a/fixtures/src/__init__.py b/fixtures/src/__init__.py new file mode 100644 index 0000000..ab2234d --- /dev/null +++ b/fixtures/src/__init__.py @@ -0,0 +1,11 @@ +def escape_string(s): + if s is None: + return s + + replacements = { + "'": "''", + '"': '""', + } + for old, new in replacements.items(): + s = s.replace(old, new) + return s diff --git a/fixtures/src/codingfix.py b/fixtures/src/codingfix.py new file mode 100644 index 0000000..1522663 --- /dev/null +++ b/fixtures/src/codingfix.py @@ -0,0 +1,35 @@ +# !/usr/bin/env python +# -*- coding: utf-8 -*- + + +def fix_letters(text): + fixes = { + '%s': 'ś', + '%z': 'ż', + '%o': 'ó', + '%l': 'ł', + '%e': 'ę', + '%a': 'ą', + '%x': 'ź', + '%c': 'ć', + '%n': 'ń', + '%S': 'Ś', + '%Z': 'Ż', + '%O': 'Ó', + '%L': 'Ł', + '%E': 'Ę', + '%A': 'Ą', + '%X': 'Ź', + '%C': 'Ć', + '%N': 'Ń', + } + for old, new in fixes.items(): + text = text.replace(old, new) + return text + + +def fix_coding(question): + for key, value in question.items(): + question[key] = fix_letters(value) + + return question diff --git a/fixtures/src/config.py b/fixtures/src/config.py new file mode 100644 index 0000000..9395e77 --- /dev/null +++ b/fixtures/src/config.py @@ -0,0 +1,4 @@ +dbname = 'aion' +user = 'root' +password = 'password' +host = 'db' diff --git a/fixtures/src/loader.py b/fixtures/src/loader.py new file mode 100644 index 0000000..8771ee2 --- /dev/null +++ b/fixtures/src/loader.py @@ -0,0 +1,16 @@ +import psycopg2 +from tqdm import tqdm + +from .config import dbname, user, password, host +from .queries.answers import insert_answers +from .queries.subjects import get_or_insert_subject +from .queries.questions import get_or_insert_question + + +def load_questions(questions): + conn = psycopg2.connect("dbname={} user={} password={} host={}".format(dbname, user, password, host)) + + for question in tqdm(questions, desc="Loading questions into the database"): + get_or_insert_subject(question['subject'], conn) + get_or_insert_question(question, conn) + insert_answers(question, conn) diff --git a/fixtures/src/queries/__init__.py b/fixtures/src/queries/__init__.py new file mode 100644 index 0000000..ab2234d --- /dev/null +++ b/fixtures/src/queries/__init__.py @@ -0,0 +1,11 @@ +def escape_string(s): + if s is None: + return s + + replacements = { + "'": "''", + '"': '""', + } + for old, new in replacements.items(): + s = s.replace(old, new) + return s diff --git a/fixtures/src/queries/answers.py b/fixtures/src/queries/answers.py new file mode 100644 index 0000000..bdf3af8 --- /dev/null +++ b/fixtures/src/queries/answers.py @@ -0,0 +1,37 @@ +from src.queries import escape_string +from src.queries.questions import get_question_id +from src.queries.subjects import get_subject_id + + +def get_answer_id(question_id, answer_content, conn): + with conn.cursor() as cur: + query = """SELECT id FROM answers WHERE question_id = {} AND content = '{}'""".format(question_id, + answer_content) + cur.execute(query) + answer_id = cur.fetchone() + + return answer_id + + +def get_or_insert_answer(question, answer, conn): + subject = escape_string(question['subject']) + question_content = escape_string(question['question']) + image = "'" + escape_string(question['image']) + "'" if 'image' in question else 'NULL' + + subject_id = get_subject_id(subject, conn) + question_id = get_question_id(subject_id, question_content, image, conn) + answer = escape_string(answer) + + with conn.cursor() as cur: + if get_answer_id(question_id, answer, conn) is None: + query = """INSERT INTO answers (content, question_id, inserted_at, updated_at) VALUES ('{}', {}, now(), now())""".format( + answer, question_id) + cur.execute(query) + + return get_answer_id(question_id, answer, conn) + + +def insert_answers(question, conn): + answers = question['answers'].split(';') + for answer in answers: + get_or_insert_answer(question, answer, conn) diff --git a/fixtures/src/queries/questions.py b/fixtures/src/queries/questions.py new file mode 100644 index 0000000..4d21184 --- /dev/null +++ b/fixtures/src/queries/questions.py @@ -0,0 +1,36 @@ +from src.queries import escape_string +from src.queries.subjects import get_subject_id + + +def get_question_id(subject_id, content, image, conn): + with conn.cursor() as cur: + if image == 'NULL': + query = """SELECT id FROM questions WHERE content = '{}' AND image_name IS NULL AND subject_id = '{}'""".format( + content, subject_id) + else: + query = """SELECT id FROM questions WHERE content = '{}' AND image_name = {} AND subject_id = '{}'""".format( + content, image, subject_id) + cur.execute(query) + question_id = cur.fetchone() + return question_id[0] if question_id is not None else None + + +def get_or_insert_question(question, conn): + subject_id = get_subject_id(question['subject'], conn) + content = escape_string(question['question']) + image = question.get('image', None) + if image is None: + image = "NULL" + else: + image = "'" + escape_string(image) + "'" + + with conn.cursor() as cur: + if get_question_id(subject_id, content, image, conn) is None: + query = """INSERT INTO questions (subject_id, content, image_name, inserted_at, updated_at) VALUES ({}, '{}', {}, now(), now());""".format( + subject_id, content, image) + cur.execute(query) + conn.commit() + else: + print("Question already in the database: {}".format(question)) + + return get_question_id(subject_id, content, image, conn) diff --git a/fixtures/src/queries/subjects.py b/fixtures/src/queries/subjects.py new file mode 100644 index 0000000..3443f48 --- /dev/null +++ b/fixtures/src/queries/subjects.py @@ -0,0 +1,17 @@ +def get_subject_id(subject, conn): + with conn.cursor() as cur: + cur.execute("""SELECT id FROM subjects WHERE name = '{}'""".format(subject)) + subject_id = cur.fetchone() + + return subject_id[0] if subject_id is not None else None + + +def get_or_insert_subject(subject, conn): + with conn.cursor() as cur: + if get_subject_id(subject, conn) is None: + cur.execute( + """INSERT INTO subjects (name, inserted_at, updated_at) VALUES ('{}', now(), now());""".format(subject) + ) + conn.commit() + + return get_subject_id(subject, conn) diff --git a/fixtures/src/questionparser.py b/fixtures/src/questionparser.py new file mode 100644 index 0000000..3788399 --- /dev/null +++ b/fixtures/src/questionparser.py @@ -0,0 +1,54 @@ +import os, re + +from src.codingfix import fix_coding + + +def parse_file(path): + question_pattern = r'P\n' \ + '%(?P.*?)%\n' \ + '(\$(?P([^\$]|\n)*?)\$\n)?' \ + '@(?P([^@]|\n)*?)@\n' \ + '@(?P([^@]|\n)*?)@\s*?' + + questions = list() + with open(path, 'r') as f: + matches = re.finditer(re.compile(question_pattern), f.read()) + for match in matches: + group_names = ('subject', 'question', 'answers', 'image') + question = {group_name: match.group(group_name) for group_name in group_names if match.group(group_name)} + question = fix_coding(question) + questions.append(question) + assert 'answers' in question and 'subject' in question and 'question' in question + return questions + + +def explore_directory(path): + def gather_questions(): + for elem in os.listdir(path): + elem_path = os.path.join(path, elem) + if os.path.isfile(elem_path): + yield parse_file(elem_path) + + questions = [question for questions in list(gather_questions()) for question in questions] + return questions + + +def explore_directory(path): + def gather_questions(): + for elem in os.listdir(path): + elem_path = os.path.join(path, elem) + if os.path.isfile(elem_path): + yield parse_file(elem_path) + + questions = [question for questions in list(gather_questions()) for question in questions] + return questions + + +def get_questions(path): + if os.path.isdir(path): + results = explore_directory(path) + elif os.path.isfile(path): + results = parse_file(path) + else: + raise FileNotFoundError("The specified path doesn't lead to any directory or file") + return results diff --git a/scripts/populate_database.sh b/scripts/populate_database.sh new file mode 100644 index 0000000..5ff6adf --- /dev/null +++ b/scripts/populate_database.sh @@ -0,0 +1 @@ +python3 /fixtures/main.py -path /fixtures/jpks/ diff --git a/scripts/run_migrations.sh b/scripts/run_migrations.sh new file mode 100755 index 0000000..ed821b0 --- /dev/null +++ b/scripts/run_migrations.sh @@ -0,0 +1,3 @@ +sleep 4 +mix ecto.migrate +mix phoenix.server