Skip to content

Commit

Permalink
feat: added initial setup of project with working databasepdf
Browse files Browse the repository at this point in the history
  • Loading branch information
Dospalko committed Nov 25, 2023
0 parents commit c459166
Show file tree
Hide file tree
Showing 34 changed files with 18,772 additions and 0 deletions.
15 changes: 15 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Dependency directories
node_modules/
venv/

# Environment variables
.env

# Build output
build/
dist/

# Misc
.DS_Store
__pycache__/
*.pyc
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions .idea/flaskProject.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

65 changes: 65 additions & 0 deletions backend/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from flask import Flask, request, jsonify
from flask_cors import CORS
from flask_sqlalchemy import SQLAlchemy
from flask_migrate import Migrate
from PyPDF2 import PdfReader
import re
from dotenv import load_dotenv
load_dotenv()

app = Flask(__name__)
CORS(app)
app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql://postgres:heslo@localhost/hackathon'

db = SQLAlchemy(app)
migrate = Migrate(app, db)

class PdfText(db.Model):
id = db.Column(db.Integer, primary_key=True)
text = db.Column(db.Text)
filename = db.Column(db.String(255))

def __init__(self, text, filename):
self.text = text
self.filename = filename

@app.route('/upload_pdf', methods=['POST'])
def upload_pdf():
uploaded_file = request.files.get('file')
if uploaded_file and uploaded_file.filename.endswith('.pdf'):
pdf_reader = PdfReader(uploaded_file.stream)
text = ''
for page in pdf_reader.pages:
text += page.extract_text() or ''

text = re.sub(r'\s+', ' ', text).strip()
new_pdf_text = PdfText(text, uploaded_file.filename)
db.session.add(new_pdf_text)
db.session.commit()

return jsonify({"message": "PDF uploaded and text extracted."}), 201
else:
return jsonify({"error": "Invalid file format."}), 400

@app.route('/pdf_texts', methods=['GET'])
def get_pdf_texts():
pdf_texts = PdfText.query.all()
output = [{'id': text.id, 'text': text.text, 'filename': text.filename} for text in pdf_texts]
return jsonify(output)

@app.route('/delete_pdf_text/<int:pdf_text_id>', methods=['DELETE'])
def delete_pdf_text(pdf_text_id):
pdf_text = PdfText.query.get(pdf_text_id)
if pdf_text:
db.session.delete(pdf_text)
db.session.commit()
return jsonify({"message": "PDF Text deleted."}), 200
else:
return jsonify({"message": "PDF Text not found"}), 404

def init_db():
db.create_all()

if __name__ == '__main__':
init_db()
app.run(debug=True)
1 change: 1 addition & 0 deletions backend/migrations/README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Single-database configuration for Flask.
50 changes: 50 additions & 0 deletions backend/migrations/alembic.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# A generic, single database configuration.

[alembic]
# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false


# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic,flask_migrate

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[logger_flask_migrate]
level = INFO
handlers =
qualname = flask_migrate

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
113 changes: 113 additions & 0 deletions backend/migrations/env.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import logging
from logging.config import fileConfig

from flask import current_app

from alembic import context

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)
logger = logging.getLogger('alembic.env')


def get_engine():
try:
# this works with Flask-SQLAlchemy<3 and Alchemical
return current_app.extensions['migrate'].db.get_engine()
except (TypeError, AttributeError):
# this works with Flask-SQLAlchemy>=3
return current_app.extensions['migrate'].db.engine


def get_engine_url():
try:
return get_engine().url.render_as_string(hide_password=False).replace(
'%', '%%')
except AttributeError:
return str(get_engine().url).replace('%', '%%')


# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
config.set_main_option('sqlalchemy.url', get_engine_url())
target_db = current_app.extensions['migrate'].db

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def get_metadata():
if hasattr(target_db, 'metadatas'):
return target_db.metadatas[None]
return target_db.metadata


def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url, target_metadata=get_metadata(), literal_binds=True
)

with context.begin_transaction():
context.run_migrations()


def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""

# this callback is used to prevent an auto-migration from being generated
# when there are no changes to the schema
# reference: http://alembic.zzzcomputing.com/en/latest/cookbook.html
def process_revision_directives(context, revision, directives):
if getattr(config.cmd_opts, 'autogenerate', False):
script = directives[0]
if script.upgrade_ops.is_empty():
directives[:] = []
logger.info('No changes in schema detected.')

conf_args = current_app.extensions['migrate'].configure_args
if conf_args.get("process_revision_directives") is None:
conf_args["process_revision_directives"] = process_revision_directives

connectable = get_engine()

with connectable.connect() as connection:
context.configure(
connection=connection,
target_metadata=get_metadata(),
**conf_args
)

with context.begin_transaction():
context.run_migrations()


if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
24 changes: 24 additions & 0 deletions backend/migrations/script.py.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}

"""
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}


def upgrade():
${upgrades if upgrades else "pass"}


def downgrade():
${downgrades if downgrades else "pass"}
33 changes: 33 additions & 0 deletions backend/migrations/versions/c4b843d1ef4c_initial_migration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Initial migration
Revision ID: c4b843d1ef4c
Revises:
Create Date: 2023-11-25 13:45:06.008866
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'c4b843d1ef4c'
down_revision = None
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('pdf_text',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('text', sa.Text(), nullable=True),
sa.Column('filename', sa.String(length=255), nullable=True),
sa.PrimaryKeyConstraint('id')
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('pdf_text')
# ### end Alembic commands ###
21 changes: 21 additions & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
alembic==1.12.1
blinker==1.7.0
click==8.1.7
Flask==3.0.0
Flask-Cors==4.0.0
Flask-Migrate==4.0.5
Flask-SQLAlchemy==3.1.1
greenlet==3.0.1
itsdangerous==2.1.2
Jinja2==3.1.2
joblib==1.3.2
Mako==1.3.0
MarkupSafe==2.1.3
nltk==3.8.1
psycopg2-binary==2.9.9
PyPDF2==3.0.1
regex==2023.10.3
SQLAlchemy==2.0.23
tqdm==4.66.1
typing_extensions==4.8.0
Werkzeug==3.0.1
Loading

0 comments on commit c459166

Please sign in to comment.