Skip to content

Commit

Permalink
Add transform workflow for Master Duel for #68
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinlul committed Sep 17, 2023
1 parent 6c1a6c3 commit 79ab472
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/merge.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ jobs:
run: gh workflow run validate-data.yaml
env:
GH_TOKEN: ${{ github.token }}
- name: Transform (Master Duel)
working-directory: yaml-yugi
run: python src/main_masterduel.py ../yaml-yugipedia/wikitext/master-duel-cards > ../aggregate/master-duel-raw.json
- if: steps.commit.outputs.status > 0
working-directory: yaml-yugi
name: Merge
Expand Down
26 changes: 26 additions & 0 deletions src/job_masterduel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# SPDX-FileCopyrightText: © 2023 Kevin Lu
# SPDX-Licence-Identifier: AGPL-3.0-or-later
import logging
from multiprocessing import current_process
import os
from typing import Any, Dict, Optional

from ruamel.yaml import YAML

from common import initial_parse, int_or_og

module_logger = logging.getLogger(__name__)


def job(filepath: str) -> Optional[Dict[str, Any]]:
yaml = YAML()
basename = os.path.splitext(os.path.basename(filepath))[0]
page_id = int_or_og(basename)
logger = module_logger.getChild(current_process().name).getChild(basename)
logger.info(filepath)
wikitext = initial_parse(yaml, filepath, "Master Duel card")
if not wikitext:
logger.info("Skip")
return
wikitext["yugipedia_page_id"] = page_id
return wikitext
43 changes: 43 additions & 0 deletions src/main_masterduel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# SPDX-FileCopyrightText: © 2023 Kevin Lu
# SPDX-Licence-Identifier: AGPL-3.0-or-later
from argparse import ArgumentParser
import json
import logging
import os
import sys

from job_masterduel import job

parser = ArgumentParser()
parser.add_argument("wikitext_directory", help="yaml-yugipedia card texts")
parser.add_argument("--processes", type=int, default=0, help="number of worker processes, default ncpu")

logger = logging.getLogger(__name__)


def main() -> None:
logging.basicConfig(level=logging.INFO)
args = parser.parse_args()
processes = args.processes
if processes == 0:
processes = os.cpu_count()
logger.info(f"Using {processes} processes.")

files = [
os.path.join(args.wikitext_directory, filename) for filename in
os.listdir(args.wikitext_directory)
if os.path.isfile(os.path.join(args.wikitext_directory, filename))
]
if processes == 1:
cards = map(job, files)
else:
from multiprocessing import Pool
with Pool(processes) as pool:
cards = [card for card in pool.imap_unordered(job, files, 100) if card]

logger.info("Serializing to JSON")
json.dump(cards, sys.stdout)


if __name__ == "__main__":
main()

0 comments on commit 79ab472

Please sign in to comment.