diff --git a/ambuda/consts.py b/ambuda/consts.py index 72fc0ed3..1120a927 100644 --- a/ambuda/consts.py +++ b/ambuda/consts.py @@ -55,10 +55,14 @@ class Locale: "saundaranandam", "hamsadutam", ], - "upanishat": ["shivopanishat"], + "upanishat": [ + "shivopanishat", + "isa" + ], "anye": [ "bodhicaryavatara", "catuhshloki", + ], } diff --git a/ambuda/database.py b/ambuda/database.py index ea5b7e45..f7e1b74d 100644 --- a/ambuda/database.py +++ b/ambuda/database.py @@ -2,7 +2,7 @@ # For convenience, import all models into this module. -from ambuda.enums import SiteRole # NOQA F401 +from ambuda.enums import SitePageStatus, SiteRole, TextGenre # NOQA F401 from ambuda.models.auth import * # NOQA F401,F403 from ambuda.models.base import Base # NOQA F401,F403 from ambuda.models.blog import * # NOQA F401,F403 diff --git a/ambuda/enums.py b/ambuda/enums.py index 9c23656f..25901280 100644 --- a/ambuda/enums.py +++ b/ambuda/enums.py @@ -27,3 +27,11 @@ class SitePageStatus(str, Enum): R2 = "reviewed-2" #: Not relevant. SKIP = "skip" + +class TextGenre(str, Enum): + """Define text genres.""" + + ITIHASA = "Itihasa" + UPANISHAT = "Upanishat" + KAVYA = "Kavya" + ANYE = "Anye" diff --git a/ambuda/models/texts.py b/ambuda/models/texts.py index 070c8547..9f6241af 100644 --- a/ambuda/models/texts.py +++ b/ambuda/models/texts.py @@ -7,12 +7,11 @@ - `TextBlock` is typically a verse or paragraph within a `TextSection`. """ +from ambuda.models.base import Base, foreign_key, pk from sqlalchemy import Column, Integer, String from sqlalchemy import Text as _Text from sqlalchemy.orm import relationship -from ambuda.models.base import Base, foreign_key, pk - class Text(Base): @@ -30,6 +29,9 @@ class Text(Base): header = Column(_Text) #: An ordered list of the sections contained within this text. sections = relationship("TextSection", backref="text", cascade="delete") + #: Add a genre for the text + genre_id = foreign_key("genres.id") + genre = relationship("Genre") def __str__(self): return self.slug diff --git a/ambuda/queries.py b/ambuda/queries.py index 35b69cf0..c98eb45a 100644 --- a/ambuda/queries.py +++ b/ambuda/queries.py @@ -60,6 +60,16 @@ def texts() -> list[db.Text]: return session.query(db.Text).all() +def texts_genre(genre=None) -> list[str]: + """Return a list of all texts in a genre.""" + session = get_session() + query = session.query(db.Text.slug).join(db.Genre) # Join the Text and Genre tables + if genre: + query = query.filter(db.Genre.name == genre.name) # Filter by genre name + texts = query.all() + return [text[0] for text in texts] + + def page_statuses() -> list[db.PageStatus]: session = get_session() return session.query(db.PageStatus).all() diff --git a/ambuda/seed/lookup/__init__.py b/ambuda/seed/lookup/__init__.py index 3c500771..4a768aae 100644 --- a/ambuda/seed/lookup/__init__.py +++ b/ambuda/seed/lookup/__init__.py @@ -1,4 +1,4 @@ -from . import create_bot_user, page_status, role +from . import create_bot_user, genres, page_status, role def run(): @@ -9,9 +9,10 @@ def run(): page_status.run() role.run() create_bot_user.run() + genres.run() except Exception as ex: raise Exception( - "Error: Failed to create page statuses, " + "Error: Failed to genres, create page statuses, " "create roles, and creat bot user." f"Error: {ex}" ) from ex diff --git a/ambuda/seed/lookup/genres.py b/ambuda/seed/lookup/genres.py new file mode 100644 index 00000000..ad97fc66 --- /dev/null +++ b/ambuda/seed/lookup/genres.py @@ -0,0 +1,35 @@ +import logging + +import ambuda.database as db +from ambuda.enums import TextGenre +from ambuda.seed.utils.data_utils import create_db +from sqlalchemy.orm import Session + + +def run(engine=None): + """Create roles iff they don't exist already. + + NOTE: this script doesn't delete existing roles. + """ + + engine = engine or create_db() + with Session(engine) as session: + genres = session.query(db.Genre).all() + existing_names = {s.name for s in genres} + new_names = {r.value for r in TextGenre if r.value not in existing_names} + + if new_names: + for name in new_names: + status = db.Genre(name=name) + session.add(status) + logging.debug(f"Created genre: {name}") + session.commit() + + logging.debug("Done. The following genres are defined:") + with Session(engine) as session: + for g in session.query(db.Genre).all(): + logging.debug(f"- {g.name}") + + +if __name__ == "__main__": + run() diff --git a/ambuda/seed/texts/gretil.py b/ambuda/seed/texts/gretil.py index 11274de9..bae256c8 100644 --- a/ambuda/seed/texts/gretil.py +++ b/ambuda/seed/texts/gretil.py @@ -2,22 +2,10 @@ import logging import subprocess -from dataclasses import dataclass from pathlib import Path -from sqlalchemy.orm import Session - -import ambuda.database as db -from ambuda.seed.utils.data_utils import create_db -from ambuda.utils.tei_parser import Document, parse_document - - -@dataclass -class Spec: - slug: str - title: str - filename: str - +from ambuda.enums import TextGenre +from ambuda.seed.utils.data_utils import Spec, add_document, create_db REPO = "https://github.com/ambuda-org/gretil.git" PROJECT_DIR = Path(__file__).resolve().parents[3] @@ -25,25 +13,25 @@ class Spec: #: Slug to use for texts that have only one section. ALLOW = [ - Spec("amarushatakam", "amaruzatakam", "sa_amaru-amaruzataka.xml"), - Spec("kumarasambhavam", "kumArasambhavam", "sa_kAlidAsa-kumArasaMbhava.xml"), - Spec("raghuvamsham", "raghuvaMzam", "sa_kAlidAsa-raghuvaMza.xml"), - Spec("kiratarjuniyam", "kirAtArjunIyam", "sa_bhAravi-kirAtArjunIya.xml"), - Spec("shishupalavadham", "zizupAlavadham", "sa_mAgha-zizupAlavadha.xml"), - Spec("rtusamharam", "RtusaMhAram", "sa_kAlidAsa-RtusaMhAra.xml"), - Spec("shatakatrayam", "zatakatrayam", "sa_bhatRhari-zatakatraya.xml"), - Spec("bhattikavyam", "bhaTTikAvyam", "sa_bhaTTi-rAvaNavadha.xml"), - Spec("meghadutam-kale", "meghadUtam", "sa_kAlidAsa-meghadUta-edkale.xml"), - Spec("kokilasandesha", "kokilasaMdezaH", "sa_uddaNDa-kokilasaMdesa.xml"), - Spec("bodhicaryavatara", "bodhicaryAvatAraH", "sa_zAntideva-bodhicaryAvatAra.xml"), + Spec("amarushatakam", "amaruzatakam", "sa_amaru-amaruzataka.xml", TextGenre.KAVYA), + Spec("kumarasambhavam", "kumArasambhavam", "sa_kAlidAsa-kumArasaMbhava.xml", TextGenre.KAVYA), + Spec("raghuvamsham", "raghuvaMzam", "sa_kAlidAsa-raghuvaMza.xml", TextGenre.KAVYA), + Spec("kiratarjuniyam", "kirAtArjunIyam", "sa_bhAravi-kirAtArjunIya.xml", TextGenre.KAVYA), + Spec("shishupalavadham", "zizupAlavadham", "sa_mAgha-zizupAlavadha.xml", TextGenre.KAVYA), + Spec("rtusamharam", "RtusaMhAram", "sa_kAlidAsa-RtusaMhAra.xml", TextGenre.KAVYA), + Spec("shatakatrayam", "zatakatrayam", "sa_bhatRhari-zatakatraya.xml", TextGenre.KAVYA), + Spec("bhattikavyam", "bhaTTikAvyam", "sa_bhaTTi-rAvaNavadha.xml", TextGenre.KAVYA), + Spec("meghadutam-kale", "meghadUtam", "sa_kAlidAsa-meghadUta-edkale.xml", TextGenre.KAVYA), + Spec("kokilasandesha", "kokilasaMdezaH", "sa_uddaNDa-kokilasaMdesa.xml", TextGenre.KAVYA), + Spec("bodhicaryavatara", "bodhicaryAvatAraH", "sa_zAntideva-bodhicaryAvatAra.xml", TextGenre.ANYE), Spec( - "saundaranandam", "saundaranandam", "sa_azvaghoSa-saundarAnanda-edmatsunami.xml" + "saundaranandam", "saundaranandam", "sa_azvaghoSa-saundarAnanda-edmatsunami.xml", TextGenre.KAVYA ), - Spec("caurapancashika", "caurapaJcAzikA", "sa_bilhaNa-caurapaJcAzikA.xml"), - Spec("hamsadutam", "haMsadUtam", "sa_rUpagosvAmin-haMsadUta.xml"), - Spec("mukundamala", "mukundamAlA", "sa_kulazekhara-mukundamAlA-eddurgaprasad.xml"), - Spec("shivopanishat", "zivopaniSat", "sa_zivopaniSad.xml"), - Spec("catuhshloki", "catuHzlokI", "sa_yAmuna-catuHzlokI.xml"), + Spec("caurapancashika", "caurapaJcAzikA", "sa_bilhaNa-caurapaJcAzikA.xml", TextGenre.KAVYA), + Spec("hamsadutam", "haMsadUtam", "sa_rUpagosvAmin-haMsadUta.xml", TextGenre.KAVYA), + Spec("mukundamala", "mukundamAlA", "sa_kulazekhara-mukundamAlA-eddurgaprasad.xml", TextGenre.KAVYA), + Spec("shivopanishat", "zivopaniSat", "sa_zivopaniSad.xml", TextGenre.UPANISHAT), + Spec("catuhshloki", "catuHzlokI", "sa_yAmuna-catuHzlokI.xml", TextGenre.ANYE), ] @@ -62,47 +50,6 @@ def fetch_latest_data(): subprocess.call("git reset --hard origin/main", shell=True, cwd=DATA_DIR) -def _create_new_text(session, spec: Spec, document: Document): - text = db.Text(slug=spec.slug, title=spec.title, header=document.header) - session.add(text) - session.flush() - - n = 1 - for section in document.sections: - db_section = db.TextSection( - text_id=text.id, slug=section.slug, title=section.slug - ) - session.add(db_section) - session.flush() - - for block in section.blocks: - db_block = db.TextBlock( - text_id=text.id, - section_id=db_section.id, - slug=block.slug, - xml=block.blob, - n=n, - ) - session.add(db_block) - n += 1 - - session.commit() - - -def add_document(engine, spec: Spec): - document_path = DATA_DIR / "1_sanskr" / "tei" / spec.filename - - with Session(engine) as session: - if session.query(db.Text).filter_by(slug=spec.slug).first(): - # FIXME: update existing texts in-place so that we can capture - # changes. As a workaround for now, we can delete then re-create. - log(f"- Skipped {spec.slug} (already exists)") - else: - document = parse_document(document_path) - _create_new_text(session, spec, document) - log(f"- Created {spec.slug}") - - def run(): logging.getLogger().setLevel(0) log("Downloading the latest data ...") @@ -114,7 +61,8 @@ def run(): engine = create_db() for spec in ALLOW: - add_document(engine, spec) + document_path = DATA_DIR / "1_sanskr" / "tei" / spec.filename + add_document(engine, spec, document_path) except Exception as ex: raise Exception("Error: Failed to get latest from GRETIL.") from ex diff --git a/ambuda/seed/utils/data_utils.py b/ambuda/seed/utils/data_utils.py index 4ae3e65b..b1b50346 100644 --- a/ambuda/seed/utils/data_utils.py +++ b/ambuda/seed/utils/data_utils.py @@ -1,14 +1,26 @@ import hashlib import io +import logging import os import zipfile - -import requests -from sqlalchemy import create_engine +from dataclasses import dataclass import config +import requests from ambuda import database as db from ambuda.seed.utils.itihasa_utils import CACHE_DIR +from ambuda.utils.tei_parser import Document, parse_document +from sqlalchemy import create_engine +from sqlalchemy.orm import Session + +LOG = logging.getLogger(__name__) + +@dataclass +class Spec: + slug: str + title: str + filename: str + genre: db.TextGenre def fetch_text(url: str, read_from_cache: bool = True) -> str: @@ -83,3 +95,46 @@ def create_db(): db.Base.metadata.create_all(engine) return engine + + +def _create_new_text(session, spec: Spec, document: Document): + """Create new text in the database.""" + text_genre = session.query(db.Genre).filter_by(name=spec.genre.value).first() + text = db.Text(slug=spec.slug, title=spec.title, header=document.header, genre=text_genre) + session.add(text) + session.flush() + + n = 1 + for section in document.sections: + db_section = db.TextSection( + text_id=text.id, slug=section.slug, title=section.slug + ) + session.add(db_section) + session.flush() + + for block in section.blocks: + db_block = db.TextBlock( + text_id=text.id, + section_id=db_section.id, + slug=block.slug, + xml=block.blob, + n=n, + ) + session.add(db_block) + n += 1 + + session.commit() + + +def add_document(engine, spec: Spec, document_path): + " Add a document to the database. " + + with Session(engine) as session: # noqa: F821 + if session.query(db.Text).filter_by(slug=spec.slug).first(): + # FIXME: update existing texts in-place so that we can capture + # changes. As a workaround for now, we can delete then re-create. + LOG.info(f"- Skipped {spec.slug} (already exists)") # noqa: F821 + else: + document = parse_document(document_path) + _create_new_text(session, spec, document) + LOG.info(f"- Created {spec.slug}") # noqa: F821 diff --git a/ambuda/seed/utils/itihasa_utils.py b/ambuda/seed/utils/itihasa_utils.py index 753f97d8..e7cd3339 100644 --- a/ambuda/seed/utils/itihasa_utils.py +++ b/ambuda/seed/utils/itihasa_utils.py @@ -5,12 +5,11 @@ from dataclasses import dataclass from pathlib import Path +import ambuda.database as db from dotenv import load_dotenv from indic_transliteration import sanscript from sqlalchemy.orm import Session -import ambuda.database as db - load_dotenv() PROJECT_DIR = Path(__file__).parent.parent.parent CACHE_DIR = PROJECT_DIR / "data" / "download-cache" @@ -106,7 +105,8 @@ def write_kandas( xml_id_prefix: str, ): with Session(engine) as session: - text = db.Text(slug=text_slug, title=text_title, header=tei_header) + iti_genre = session.query(db.Genre).filter_by(name=db.TextGenre.ITIHASA.value).first() + text = db.Text(slug=text_slug, title=text_title, header=tei_header, genre=iti_genre) session.add(text) session.flush() diff --git a/ambuda/views/reader/texts.py b/ambuda/views/reader/texts.py index d67ca71c..2a0e8328 100644 --- a/ambuda/views/reader/texts.py +++ b/ambuda/views/reader/texts.py @@ -1,19 +1,21 @@ """Views related to texts: title pages, sections, verses, etc.""" import json - -from flask import Blueprint, abort, jsonify, render_template, url_for -from indic_transliteration import sanscript +import logging import ambuda.database as db import ambuda.queries as q -from ambuda.consts import TEXT_CATEGORIES + +#from ambuda.consts import TEXT_CATEGORIES from ambuda.utils import xml from ambuda.utils.json_serde import AmbudaJSONEncoder from ambuda.views.api import bp as api from ambuda.views.reader.schema import Block, Section +from flask import Blueprint, abort, jsonify, render_template, url_for +from indic_transliteration import sanscript bp = Blueprint("texts", __name__) +LOG = logging.getLogger(__name__) # A hacky list that decides which texts have parse data. HAS_NO_PARSE = { @@ -86,8 +88,19 @@ def _hk_to_dev(s: str) -> str: def index(): """Show all texts.""" all_texts = {t.slug: t for t in q.texts()} + + # Initialize a dictionary with keys as genres and values as texts in those genres + text_genres = {} + + # Retrieve all genres from the database + genres = q.genres() + + # Iterate over each genre and retrieve texts in that genre + for genre in genres: + texts = q.texts_genre(genre=genre) + text_genres[genre.name.lower()] = texts return render_template( - "texts/index.html", categories=TEXT_CATEGORIES, texts=all_texts + "texts/index.html", categories=text_genres, texts=all_texts ) diff --git a/cli.py b/cli.py index 608928b9..e4f4c581 100755 --- a/cli.py +++ b/cli.py @@ -11,7 +11,7 @@ import ambuda from ambuda import database as db from ambuda import queries as q -from ambuda.seed.utils.data_utils import create_db +from ambuda.seed.utils.data_utils import Spec, add_document, create_db from ambuda.tasks.projects import create_project_inner from ambuda.tasks.utils import LocalTaskStatus @@ -107,5 +107,22 @@ def create_project(title, pdf_path): ) +@cli.command() +@click.option("--title", help="title of the new text") +@click.option("--slug", help="slug of the new text") +@click.option("--tei-path", help="path to the source PDF") +@click.option("--genre", help="text genre from [Itihasa, Upanishat, Kavya, Anye]") +def publish_text(slug, title, tei_path, genre="Anye"): + """Publish a proofread text from a TEI-XML.""" + + # reverse mapping genre to an TextGenre enum member + genre_formatted = genre.title() + genre_enum = next((g for g in db.TextGenre if g.value == genre_formatted), None) + if genre_enum is None: + raise ValueError(f"Enter a valid genre: {genre}") + spec = Spec(slug, title, tei_path, genre_enum) + add_document(engine, spec, tei_path) + + if __name__ == "__main__": cli() diff --git a/deploy/local/docker-compose-dbsetup.yml b/deploy/local/docker-compose-dbsetup.yml index 6ff8a272..36eb45bf 100644 --- a/deploy/local/docker-compose-dbsetup.yml +++ b/deploy/local/docker-compose-dbsetup.yml @@ -5,6 +5,7 @@ services: image: ${AMBUDA_IMAGE} command: "/app/scripts/initialize_data.sh" volumes: + - ${PWD}/deploy/data/:/app/data/ - ${PWD}/deploy/data_database/:/app/data/database/ - ${PWD}/deploy/data_files/vidyut:/app/data/vidyut/ environment: diff --git a/deploy/staging/docker-compose.yml b/deploy/staging/docker-compose.yml index d8afb583..daab8d7d 100644 --- a/deploy/staging/docker-compose.yml +++ b/deploy/staging/docker-compose.yml @@ -19,7 +19,7 @@ services: - FLASK_UPLOAD_FOLDER=/app/data/file-uploads - SQLALCHEMY_DATABASE_URI=sqlite:////app/data/database/database.db - SECRET_KEY=insecure development secret key - - GOOGLE_APPLICATION_CREDENTIALS= + - GOOGLE_APPLICATION_CREDENTIALS=/app/data/.google.key - REDIS_URL=redis://redis:6579/0 - VIRTUAL_ENV=. - AMBUDA_BOT_PASSWORD=insecure bot password @@ -30,7 +30,11 @@ services: image: ${AMBUDA_IMAGE} command: /app/scripts/start_celery.sh environment: - - REDIS_URL=redis://redis:6579/0 + - REDIS_URL=redis://redis:6579/0 + - SQLALCHEMY_DATABASE_URI=sqlite:////app/data/database/database.db + - GOOGLE_APPLICATION_CREDENTIALS=/app/data/.google.key + - PATH=$PATH:/venv/bin/ + depends_on: - redis