Skip to content

Commit

Permalink
Equation extraction (#4809)
Browse files Browse the repository at this point in the history
  • Loading branch information
kbirk authored Sep 18, 2024
1 parent ef3e9d8 commit a0f3459
Show file tree
Hide file tree
Showing 25 changed files with 759 additions and 243 deletions.
159 changes: 92 additions & 67 deletions containers/scripts/docker-compose-taskrunner.yml
Original file line number Diff line number Diff line change
@@ -1,71 +1,96 @@
---
services:
gollm-taskrunner:
build:
context: ../..
dockerfile: ./packages/gollm/Dockerfile
target: gollm_taskrunner_builder
container_name: gollm-taskrunner
networks:
- terarium
environment:
TERARIUM_MQ_ADDRESSES: "amqp://rabbitmq:5672"
TERARIUM_MQ_PASSWORD: "terarium123"
TERARIUM_MQ_USERNAME: "terarium"
TERARIUM_TASKRUNNER_REQUEST_TYPE: "gollm"
OPENAI_API_KEY: "${secret_openai_key}"
depends_on:
rabbitmq:
condition: service_healthy
extra_hosts:
- "${local_host_name}:host-gateway"
volumes:
- ../../packages/gollm:/gollm_task
- ../../packages/taskrunner:/taskrunner
command: /gollm_task/dev.sh
gollm-taskrunner:
build:
context: ../..
dockerfile: ./packages/gollm/Dockerfile
target: gollm_taskrunner_builder
container_name: gollm-taskrunner
networks:
- terarium
environment:
TERARIUM_MQ_ADDRESSES: "amqp://rabbitmq:5672"
TERARIUM_MQ_PASSWORD: "terarium123"
TERARIUM_MQ_USERNAME: "terarium"
TERARIUM_TASKRUNNER_REQUEST_TYPE: "gollm"
OPENAI_API_KEY: "${secret_openai_key}"
depends_on:
rabbitmq:
condition: service_healthy
extra_hosts:
- "${local_host_name}:host-gateway"
volumes:
- ../../packages/gollm:/gollm_task
- ../../packages/taskrunner:/taskrunner
command: /gollm_task/dev.sh

mira-taskrunner:
build:
context: ../..
dockerfile: ./packages/mira/Dockerfile
target: mira_taskrunner_builder
container_name: mira-taskrunner
networks:
- terarium
environment:
TERARIUM_MQ_ADDRESSES: "amqp://rabbitmq:5672"
TERARIUM_MQ_PASSWORD: "terarium123"
TERARIUM_MQ_USERNAME: "terarium"
TERARIUM_TASKRUNNER_REQUEST_TYPE: "mira"
depends_on:
rabbitmq:
condition: service_healthy
extra_hosts:
- "${local_host_name}:host-gateway"
volumes:
- ../../packages/mira:/mira_task
- ../../packages/taskrunner:/taskrunner
command: /mira_task/dev.sh
mira-taskrunner:
build:
context: ../..
dockerfile: ./packages/mira/Dockerfile
target: mira_taskrunner_builder
container_name: mira-taskrunner
networks:
- terarium
environment:
TERARIUM_MQ_ADDRESSES: "amqp://rabbitmq:5672"
TERARIUM_MQ_PASSWORD: "terarium123"
TERARIUM_MQ_USERNAME: "terarium"
TERARIUM_TASKRUNNER_REQUEST_TYPE: "mira"
depends_on:
rabbitmq:
condition: service_healthy
extra_hosts:
- "${local_host_name}:host-gateway"
volumes:
- ../../packages/mira:/mira_task
- ../../packages/taskrunner:/taskrunner
command: /mira_task/dev.sh

funman-taskrunner:
build:
context: ../..
dockerfile: ./packages/funman/Dockerfile
target: funman_taskrunner_builder
container_name: funman-taskrunner
networks:
- terarium
environment:
TERARIUM_MQ_ADDRESSES: "amqp://rabbitmq:5672"
TERARIUM_MQ_PASSWORD: "terarium123"
TERARIUM_MQ_USERNAME: "terarium"
TERARIUM_TASKRUNNER_REQUEST_TYPE: "funman"
depends_on:
rabbitmq:
condition: service_healthy
extra_hosts:
- "${local_host_name}:host-gateway"
volumes:
- ../../packages/funman:/funman_task
- ../../packages/taskrunner:/taskrunner
command: /funman_task/dev.sh
funman-taskrunner:
build:
context: ../..
dockerfile: ./packages/funman/Dockerfile
target: funman_taskrunner_builder
container_name: funman-taskrunner
networks:
- terarium
environment:
TERARIUM_MQ_ADDRESSES: "amqp://rabbitmq:5672"
TERARIUM_MQ_PASSWORD: "terarium123"
TERARIUM_MQ_USERNAME: "terarium"
TERARIUM_TASKRUNNER_REQUEST_TYPE: "funman"
depends_on:
rabbitmq:
condition: service_healthy
extra_hosts:
- "${local_host_name}:host-gateway"
volumes:
- ../../packages/funman:/funman_task
- ../../packages/taskrunner:/taskrunner
command: /funman_task/dev.sh

nougat_cpu-taskrunner:
build:
context: ../..
dockerfile: ./packages/nougat_cpu/Dockerfile
target: nougat_cpu_taskrunner_builder
container_name: nougat_cpu-taskrunner
networks:
- terarium
ports:
- "6300:8002"
environment:
TERARIUM_MQ_ADDRESSES: "amqp://rabbitmq:5672"
TERARIUM_MQ_PASSWORD: "terarium123"
TERARIUM_MQ_USERNAME: "terarium"
TERARIUM_TASKRUNNER_REQUEST_TYPE: "nougat_cpu"
depends_on:
rabbitmq:
condition: service_healthy
extra_hosts:
- "${local_host_name}:host-gateway"
volumes:
- ../../packages/nougat_cpu:/nougat_task
- ../../packages/taskrunner:/taskrunner
command: /nougat_task/dev.sh
2 changes: 1 addition & 1 deletion packages/funman/dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cd /funman_task
pip install --no-cache-dir -e .

# run it
echo "Running taskrunner"
echo "Installing taskrunner"
cd /taskrunner
pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion packages/gollm/dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cd /gollm_task
pip install --no-cache-dir -e .

# run it
echo "Running taskrunner"
echo "Installing taskrunner"
cd /taskrunner
pip install --no-cache-dir -e .

Expand Down
2 changes: 1 addition & 1 deletion packages/mira/dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cd /mira_task
pip install --no-cache-dir -e .

# run it
echo "Running taskrunner"
echo "Installing taskrunner"
cd /taskrunner
pip install --no-cache-dir -e .

Expand Down
40 changes: 40 additions & 0 deletions packages/nougat_cpu/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
HELP.md
.gradle
build/
!gradle/wrapper/gradle-wrapper.jar
!**/src/main/**/build/
!**/src/test/**/build/
nougat_task.egg-info

### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
!**/src/main/**/bin/
!**/src/test/**/bin/

### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
out/
!**/src/main/**/out/
!**/src/test/**/out/

### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/

### VS Code ###
.vscode/
mira.egg-info
__pycache__
108 changes: 108 additions & 0 deletions packages/nougat_cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
# Build the Spring Boot application
FROM eclipse-temurin:17.0.12_7-jdk-focal AS nougat_cpu_taskrunner_builder

###### DEV ONLY ######
#VVVVVVVVVVVVVVVVVVVVV

# These deps are installed only for use during local development

# Install Python
RUN apt-get update && apt-get install -y --no-install-recommends \
software-properties-common
RUN add-apt-repository ppa:deadsnakes/ppa
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
python3.10 \
python3.10-venv \
python3.10-dev \
python3-setuptools \
wget \
git \
libgl1 \
poppler-utils \
python3-pip && \
rm -rf /var/lib/apt/lists/*

# Set the default Python version to 3.10
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
update-alternatives --set python /usr/bin/python3.10

RUN python -m ensurepip --upgrade

# Verify the installation
RUN python --version
RUN pip3 --version

# Install nougat
COPY ./packages/nougat_cpu/nougat-version.txt /nougatVersion.txt
RUN COMMIT_SHA="$(cat /nougatVersion.txt)" && \
echo "Using document_intelligence commit $COMMIT_SHA" && \
wget --progress=dot:giga -O nougat.tar.gz "https://github.com/DARPA-ASKEM/document_intelligence/archive/${COMMIT_SHA}.tar.gz" && \
tar -zxvf nougat.tar.gz && \
rm nougat.tar.gz && \
mv document_intelligence-* document_intelligence
WORKDIR /document_intelligence/document_intelligence/fast_latex
RUN pip3 install -r requirements.txt

#Install uvicorn and supervisord
RUN pip3 install uvicorn setuptools augraphy supervisor

#^^^^^^^^^^^^^^^^^^^^
######################

WORKDIR /taskrunner

COPY ./packages/taskrunner .

RUN ./gradlew bootJar

WORKDIR /

# Copy the jar to the root directory
RUN mv /taskrunner/build/libs/*.jar .
RUN mv /terarium-1.0.0-SNAPSHOT.jar /taskrunner.jar

# ------------------------------------------------------------------------------

# Set up the Python image with JRE
FROM python:3.10-slim

WORKDIR /

# Copy the supervisord configuration file
COPY ./packages/nougat_cpu/supervisord.conf /etc/supervisor/conf.d/supervisord.conf

# Install OpenJDK JRE and wget
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
git \
libgl1 \
poppler-utils \
openjdk-17-jre-headless && \
rm -rf /var/lib/apt/lists/*

# Copy the Spring Boot fat JAR from the builder image
COPY --from=nougat_cpu_taskrunner_builder /taskrunner/build/libs/*.jar /taskrunner.jar

# Install nougat
COPY --from=nougat_cpu_taskrunner_builder /document_intelligence /document_intelligence
WORKDIR /document_intelligence/document_intelligence/fast_latex
RUN pip install -r requirements.txt

# Install taskrunner
COPY ./packages/taskrunner/setup.py /taskrunner/setup.py
COPY ./packages/taskrunner/taskrunner.py /taskrunner/taskrunner.py
WORKDIR /taskrunner
RUN pip install --no-cache-dir -e .

# Install document_intelligence tasks
COPY ./packages/nougat_cpu /nougat_task
WORKDIR /nougat_task
RUN pip install --no-cache-dir -e .

#Install uvicorn and supervisord
RUN pip install uvicorn setuptools augraphy supervisor

WORKDIR /
CMD ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
17 changes: 17 additions & 0 deletions packages/nougat_cpu/dev.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# ensure the volume mounted python code is using editable mode
echo "Installing python tasks"
cd /nougat_task
pip3 install -e .

# run it
echo "Installing taskrunner"
cd /taskrunner
pip3 install -e .

# make the log directory
mkdir /var/log/supervisor

echo "Starting supervisord"
supervisord -c /nougat_task/supervisord.conf
1 change: 1 addition & 0 deletions packages/nougat_cpu/nougat-version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eebfe991f5348c4be601417bb211e897ba30a748
14 changes: 14 additions & 0 deletions packages/nougat_cpu/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from setuptools import setup, find_packages

setup(
name="nougat_task",
version="0.1.0",
packages=find_packages(),
install_requires=[],
entry_points={
"console_scripts": [
"nougat_task:extract_equations=tasks.extraction:main",
],
},
python_requires=">=3.10",
)
22 changes: 22 additions & 0 deletions packages/nougat_cpu/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[supervisord]
nodaemon=true

[program:process1]
command=uvicorn run:app --host 0.0.0.0 --port 8002
directory=/document_intelligence/document_intelligence/fast_latex
autostart=true
autorestart=true
stdout_logfile=/dev/fd/1
stderr_logfile=/dev/fd/2
stdout_logfile_maxbytes=0
stderr_logfile_maxbytes=0

[program:process2]
command=java -jar /taskrunner.jar
directory=/
autostart=true
autorestart=true
stdout_logfile=/dev/fd/1
stderr_logfile=/dev/fd/2
stdout_logfile_maxbytes=0
stderr_logfile_maxbytes=0
Empty file.
Loading

0 comments on commit a0f3459

Please sign in to comment.