-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature(fixtures): Load fixtures into the database (#18)
- Loading branch information
Showing
19 changed files
with
277 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
.idea/ | ||
.DS_Store | ||
/postgres-data | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
9 changes: 9 additions & 0 deletions
9
aion/priv/repo/migrations/20170402123134_change_question_to_text.exs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
defmodule Aion.Repo.Migrations.ChangeQuestionToText do | ||
use Ecto.Migration | ||
|
||
def change do | ||
alter table(:questions) do | ||
modify :content, :text | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
jpks/ | ||
__pycache__/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
To install packages required to use question loader run: | ||
>```pip install -r requirements.txt``` | ||
Then simply run: | ||
> python main.py | ||
or: | ||
> python main.py -h | ||
to see available options. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import argparse | ||
from src.loader import load_questions | ||
from src.questionparser import get_questions | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='Dump questions in postgresql database.') | ||
parser.add_argument('-path', type=str, nargs='*', | ||
help="Provide path to file/folder containing questions in a proper format") | ||
args = parser.parse_args() | ||
paths = args.path | ||
if paths is None: | ||
paths = ['jpks/'] | ||
|
||
for path in paths: | ||
questions = get_questions(path) | ||
load_questions(questions) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
appdirs==1.4.3 | ||
packaging==16.8 | ||
psycopg2==2.7.1 | ||
pyparsing==2.2.0 | ||
six==1.10.0 | ||
tqdm==4.11.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
def escape_string(s): | ||
if s is None: | ||
return s | ||
|
||
replacements = { | ||
"'": "''", | ||
'"': '""', | ||
} | ||
for old, new in replacements.items(): | ||
s = s.replace(old, new) | ||
return s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# !/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
|
||
|
||
def fix_letters(text): | ||
fixes = { | ||
'%s': 'ś', | ||
'%z': 'ż', | ||
'%o': 'ó', | ||
'%l': 'ł', | ||
'%e': 'ę', | ||
'%a': 'ą', | ||
'%x': 'ź', | ||
'%c': 'ć', | ||
'%n': 'ń', | ||
'%S': 'Ś', | ||
'%Z': 'Ż', | ||
'%O': 'Ó', | ||
'%L': 'Ł', | ||
'%E': 'Ę', | ||
'%A': 'Ą', | ||
'%X': 'Ź', | ||
'%C': 'Ć', | ||
'%N': 'Ń', | ||
} | ||
for old, new in fixes.items(): | ||
text = text.replace(old, new) | ||
return text | ||
|
||
|
||
def fix_coding(question): | ||
for key, value in question.items(): | ||
question[key] = fix_letters(value) | ||
|
||
return question |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
dbname = 'aion' | ||
user = 'root' | ||
password = 'password' | ||
host = 'db' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import psycopg2 | ||
from tqdm import tqdm | ||
|
||
from .config import dbname, user, password, host | ||
from .queries.answers import insert_answers | ||
from .queries.subjects import get_or_insert_subject | ||
from .queries.questions import get_or_insert_question | ||
|
||
|
||
def load_questions(questions): | ||
conn = psycopg2.connect("dbname={} user={} password={} host={}".format(dbname, user, password, host)) | ||
|
||
for question in tqdm(questions, desc="Loading questions into the database"): | ||
get_or_insert_subject(question['subject'], conn) | ||
get_or_insert_question(question, conn) | ||
insert_answers(question, conn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
def escape_string(s): | ||
if s is None: | ||
return s | ||
|
||
replacements = { | ||
"'": "''", | ||
'"': '""', | ||
} | ||
for old, new in replacements.items(): | ||
s = s.replace(old, new) | ||
return s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
from src.queries import escape_string | ||
from src.queries.questions import get_question_id | ||
from src.queries.subjects import get_subject_id | ||
|
||
|
||
def get_answer_id(question_id, answer_content, conn): | ||
with conn.cursor() as cur: | ||
query = """SELECT id FROM answers WHERE question_id = {} AND content = '{}'""".format(question_id, | ||
answer_content) | ||
cur.execute(query) | ||
answer_id = cur.fetchone() | ||
|
||
return answer_id | ||
|
||
|
||
def get_or_insert_answer(question, answer, conn): | ||
subject = escape_string(question['subject']) | ||
question_content = escape_string(question['question']) | ||
image = "'" + escape_string(question['image']) + "'" if 'image' in question else 'NULL' | ||
|
||
subject_id = get_subject_id(subject, conn) | ||
question_id = get_question_id(subject_id, question_content, image, conn) | ||
answer = escape_string(answer) | ||
|
||
with conn.cursor() as cur: | ||
if get_answer_id(question_id, answer, conn) is None: | ||
query = """INSERT INTO answers (content, question_id, inserted_at, updated_at) VALUES ('{}', {}, now(), now())""".format( | ||
answer, question_id) | ||
cur.execute(query) | ||
|
||
return get_answer_id(question_id, answer, conn) | ||
|
||
|
||
def insert_answers(question, conn): | ||
answers = question['answers'].split(';') | ||
for answer in answers: | ||
get_or_insert_answer(question, answer, conn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from src.queries import escape_string | ||
from src.queries.subjects import get_subject_id | ||
|
||
|
||
def get_question_id(subject_id, content, image, conn): | ||
with conn.cursor() as cur: | ||
if image == 'NULL': | ||
query = """SELECT id FROM questions WHERE content = '{}' AND image_name IS NULL AND subject_id = '{}'""".format( | ||
content, subject_id) | ||
else: | ||
query = """SELECT id FROM questions WHERE content = '{}' AND image_name = {} AND subject_id = '{}'""".format( | ||
content, image, subject_id) | ||
cur.execute(query) | ||
question_id = cur.fetchone() | ||
return question_id[0] if question_id is not None else None | ||
|
||
|
||
def get_or_insert_question(question, conn): | ||
subject_id = get_subject_id(question['subject'], conn) | ||
content = escape_string(question['question']) | ||
image = question.get('image', None) | ||
if image is None: | ||
image = "NULL" | ||
else: | ||
image = "'" + escape_string(image) + "'" | ||
|
||
with conn.cursor() as cur: | ||
if get_question_id(subject_id, content, image, conn) is None: | ||
query = """INSERT INTO questions (subject_id, content, image_name, inserted_at, updated_at) VALUES ({}, '{}', {}, now(), now());""".format( | ||
subject_id, content, image) | ||
cur.execute(query) | ||
conn.commit() | ||
else: | ||
print("Question already in the database: {}".format(question)) | ||
|
||
return get_question_id(subject_id, content, image, conn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
def get_subject_id(subject, conn): | ||
with conn.cursor() as cur: | ||
cur.execute("""SELECT id FROM subjects WHERE name = '{}'""".format(subject)) | ||
subject_id = cur.fetchone() | ||
|
||
return subject_id[0] if subject_id is not None else None | ||
|
||
|
||
def get_or_insert_subject(subject, conn): | ||
with conn.cursor() as cur: | ||
if get_subject_id(subject, conn) is None: | ||
cur.execute( | ||
"""INSERT INTO subjects (name, inserted_at, updated_at) VALUES ('{}', now(), now());""".format(subject) | ||
) | ||
conn.commit() | ||
|
||
return get_subject_id(subject, conn) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import os, re | ||
|
||
from src.codingfix import fix_coding | ||
|
||
|
||
def parse_file(path): | ||
question_pattern = r'P\n' \ | ||
'%(?P<subject>.*?)%\n' \ | ||
'(\$(?P<image>([^\$]|\n)*?)\$\n)?' \ | ||
'@(?P<question>([^@]|\n)*?)@\n' \ | ||
'@(?P<answers>([^@]|\n)*?)@\s*?' | ||
|
||
questions = list() | ||
with open(path, 'r') as f: | ||
matches = re.finditer(re.compile(question_pattern), f.read()) | ||
for match in matches: | ||
group_names = ('subject', 'question', 'answers', 'image') | ||
question = {group_name: match.group(group_name) for group_name in group_names if match.group(group_name)} | ||
question = fix_coding(question) | ||
questions.append(question) | ||
assert 'answers' in question and 'subject' in question and 'question' in question | ||
return questions | ||
|
||
|
||
def explore_directory(path): | ||
def gather_questions(): | ||
for elem in os.listdir(path): | ||
elem_path = os.path.join(path, elem) | ||
if os.path.isfile(elem_path): | ||
yield parse_file(elem_path) | ||
|
||
questions = [question for questions in list(gather_questions()) for question in questions] | ||
return questions | ||
|
||
|
||
def explore_directory(path): | ||
def gather_questions(): | ||
for elem in os.listdir(path): | ||
elem_path = os.path.join(path, elem) | ||
if os.path.isfile(elem_path): | ||
yield parse_file(elem_path) | ||
|
||
questions = [question for questions in list(gather_questions()) for question in questions] | ||
return questions | ||
|
||
|
||
def get_questions(path): | ||
if os.path.isdir(path): | ||
results = explore_directory(path) | ||
elif os.path.isfile(path): | ||
results = parse_file(path) | ||
else: | ||
raise FileNotFoundError("The specified path doesn't lead to any directory or file") | ||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
python3 /fixtures/main.py -path /fixtures/jpks/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
sleep 4 | ||
mix ecto.migrate | ||
mix phoenix.server |