Skip to content

Commit

Permalink
feature(fixtures): Load fixtures into the database (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
pmrukot authored May 22, 2017
1 parent d462492 commit 06c5d2f
Show file tree
Hide file tree
Showing 19 changed files with 277 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.idea/
.DS_Store
/postgres-data
*.pyc
8 changes: 7 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ FROM elixir:1.4.2

WORKDIR /aion
ADD aion /aion
ADD fixtures /fixtures
ADD scripts /scripts

# Install nodeJS
RUN apt-get update \
Expand All @@ -13,4 +15,8 @@ RUN apt-get update \
RUN mix local.hex --force \
&& mix local.rebar --force \
&& mix deps.get \
&& npm install
&& npm install

# Install python pip and dependencies
RUN apt-get -y install python3-pip \
&& pip3 install -r /fixtures/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
defmodule Aion.Repo.Migrations.ChangeQuestionToText do
use Ecto.Migration

def change do
alter table(:questions) do
modify :content, :text
end
end
end
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ services:
dockerfile: Dockerfile
depends_on:
- db
command: mix phoenix.server
command: bash /scripts/run_migrations.sh
volumes:
- ./aion/:/aion
- /aion/deps
Expand Down
2 changes: 2 additions & 0 deletions fixtures/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
jpks/
__pycache__/
10 changes: 10 additions & 0 deletions fixtures/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
To install packages required to use question loader run:
>```pip install -r requirements.txt```
Then simply run:
> python main.py
or:
> python main.py -h
to see available options.
16 changes: 16 additions & 0 deletions fixtures/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import argparse
from src.loader import load_questions
from src.questionparser import get_questions

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Dump questions in postgresql database.')
parser.add_argument('-path', type=str, nargs='*',
help="Provide path to file/folder containing questions in a proper format")
args = parser.parse_args()
paths = args.path
if paths is None:
paths = ['jpks/']

for path in paths:
questions = get_questions(path)
load_questions(questions)
6 changes: 6 additions & 0 deletions fixtures/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
appdirs==1.4.3
packaging==16.8
psycopg2==2.7.1
pyparsing==2.2.0
six==1.10.0
tqdm==4.11.2
11 changes: 11 additions & 0 deletions fixtures/src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def escape_string(s):
if s is None:
return s

replacements = {
"'": "''",
'"': '""',
}
for old, new in replacements.items():
s = s.replace(old, new)
return s
35 changes: 35 additions & 0 deletions fixtures/src/codingfix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# !/usr/bin/env python
# -*- coding: utf-8 -*-


def fix_letters(text):
fixes = {
'%s': 'ś',
'%z': 'ż',
'%o': 'ó',
'%l': 'ł',
'%e': 'ę',
'%a': 'ą',
'%x': 'ź',
'%c': 'ć',
'%n': 'ń',
'%S': 'Ś',
'%Z': 'Ż',
'%O': 'Ó',
'%L': 'Ł',
'%E': 'Ę',
'%A': 'Ą',
'%X': 'Ź',
'%C': 'Ć',
'%N': 'Ń',
}
for old, new in fixes.items():
text = text.replace(old, new)
return text


def fix_coding(question):
for key, value in question.items():
question[key] = fix_letters(value)

return question
4 changes: 4 additions & 0 deletions fixtures/src/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
dbname = 'aion'
user = 'root'
password = 'password'
host = 'db'
16 changes: 16 additions & 0 deletions fixtures/src/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import psycopg2
from tqdm import tqdm

from .config import dbname, user, password, host
from .queries.answers import insert_answers
from .queries.subjects import get_or_insert_subject
from .queries.questions import get_or_insert_question


def load_questions(questions):
conn = psycopg2.connect("dbname={} user={} password={} host={}".format(dbname, user, password, host))

for question in tqdm(questions, desc="Loading questions into the database"):
get_or_insert_subject(question['subject'], conn)
get_or_insert_question(question, conn)
insert_answers(question, conn)
11 changes: 11 additions & 0 deletions fixtures/src/queries/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def escape_string(s):
if s is None:
return s

replacements = {
"'": "''",
'"': '""',
}
for old, new in replacements.items():
s = s.replace(old, new)
return s
37 changes: 37 additions & 0 deletions fixtures/src/queries/answers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from src.queries import escape_string
from src.queries.questions import get_question_id
from src.queries.subjects import get_subject_id


def get_answer_id(question_id, answer_content, conn):
with conn.cursor() as cur:
query = """SELECT id FROM answers WHERE question_id = {} AND content = '{}'""".format(question_id,
answer_content)
cur.execute(query)
answer_id = cur.fetchone()

return answer_id


def get_or_insert_answer(question, answer, conn):
subject = escape_string(question['subject'])
question_content = escape_string(question['question'])
image = "'" + escape_string(question['image']) + "'" if 'image' in question else 'NULL'

subject_id = get_subject_id(subject, conn)
question_id = get_question_id(subject_id, question_content, image, conn)
answer = escape_string(answer)

with conn.cursor() as cur:
if get_answer_id(question_id, answer, conn) is None:
query = """INSERT INTO answers (content, question_id, inserted_at, updated_at) VALUES ('{}', {}, now(), now())""".format(
answer, question_id)
cur.execute(query)

return get_answer_id(question_id, answer, conn)


def insert_answers(question, conn):
answers = question['answers'].split(';')
for answer in answers:
get_or_insert_answer(question, answer, conn)
36 changes: 36 additions & 0 deletions fixtures/src/queries/questions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from src.queries import escape_string
from src.queries.subjects import get_subject_id


def get_question_id(subject_id, content, image, conn):
with conn.cursor() as cur:
if image == 'NULL':
query = """SELECT id FROM questions WHERE content = '{}' AND image_name IS NULL AND subject_id = '{}'""".format(
content, subject_id)
else:
query = """SELECT id FROM questions WHERE content = '{}' AND image_name = {} AND subject_id = '{}'""".format(
content, image, subject_id)
cur.execute(query)
question_id = cur.fetchone()
return question_id[0] if question_id is not None else None


def get_or_insert_question(question, conn):
subject_id = get_subject_id(question['subject'], conn)
content = escape_string(question['question'])
image = question.get('image', None)
if image is None:
image = "NULL"
else:
image = "'" + escape_string(image) + "'"

with conn.cursor() as cur:
if get_question_id(subject_id, content, image, conn) is None:
query = """INSERT INTO questions (subject_id, content, image_name, inserted_at, updated_at) VALUES ({}, '{}', {}, now(), now());""".format(
subject_id, content, image)
cur.execute(query)
conn.commit()
else:
print("Question already in the database: {}".format(question))

return get_question_id(subject_id, content, image, conn)
17 changes: 17 additions & 0 deletions fixtures/src/queries/subjects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def get_subject_id(subject, conn):
with conn.cursor() as cur:
cur.execute("""SELECT id FROM subjects WHERE name = '{}'""".format(subject))
subject_id = cur.fetchone()

return subject_id[0] if subject_id is not None else None


def get_or_insert_subject(subject, conn):
with conn.cursor() as cur:
if get_subject_id(subject, conn) is None:
cur.execute(
"""INSERT INTO subjects (name, inserted_at, updated_at) VALUES ('{}', now(), now());""".format(subject)
)
conn.commit()

return get_subject_id(subject, conn)
54 changes: 54 additions & 0 deletions fixtures/src/questionparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import os, re

from src.codingfix import fix_coding


def parse_file(path):
question_pattern = r'P\n' \
'%(?P<subject>.*?)%\n' \
'(\$(?P<image>([^\$]|\n)*?)\$\n)?' \
'@(?P<question>([^@]|\n)*?)@\n' \
'@(?P<answers>([^@]|\n)*?)@\s*?'

questions = list()
with open(path, 'r') as f:
matches = re.finditer(re.compile(question_pattern), f.read())
for match in matches:
group_names = ('subject', 'question', 'answers', 'image')
question = {group_name: match.group(group_name) for group_name in group_names if match.group(group_name)}
question = fix_coding(question)
questions.append(question)
assert 'answers' in question and 'subject' in question and 'question' in question
return questions


def explore_directory(path):
def gather_questions():
for elem in os.listdir(path):
elem_path = os.path.join(path, elem)
if os.path.isfile(elem_path):
yield parse_file(elem_path)

questions = [question for questions in list(gather_questions()) for question in questions]
return questions


def explore_directory(path):
def gather_questions():
for elem in os.listdir(path):
elem_path = os.path.join(path, elem)
if os.path.isfile(elem_path):
yield parse_file(elem_path)

questions = [question for questions in list(gather_questions()) for question in questions]
return questions


def get_questions(path):
if os.path.isdir(path):
results = explore_directory(path)
elif os.path.isfile(path):
results = parse_file(path)
else:
raise FileNotFoundError("The specified path doesn't lead to any directory or file")
return results
1 change: 1 addition & 0 deletions scripts/populate_database.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
python3 /fixtures/main.py -path /fixtures/jpks/
3 changes: 3 additions & 0 deletions scripts/run_migrations.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sleep 4
mix ecto.migrate
mix phoenix.server

0 comments on commit 06c5d2f

Please sign in to comment.