Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.env
*.pyc
vendor/*
Assignee_Lawyer_Disambiguation/lib/alchemy/config.ini
Expand All @@ -8,16 +9,14 @@ venv
.vscode
*.yml
*.yaml
mydumper/
mydumper/*
!mydumper/mydumper.cnf.template
Development/config.ini
airflow/airflow-webserver.pid
airflow/airflow.db
airflow/lawyer.pickle
airflow/logs/scheduler/2019-01-15/update-db.py.log
airflow/logs/scheduler/2019-01-16/update-db.py.log
airflow/logs/scheduler/latest
airflow/*
!airflow/dags/*
!airflow/airflow.cfg.template
airflow-metadata-db-disk
airflow/unittests.cfg
airflow_pipeline_env.sh
Development/dev_config.ini
Assignee_Lawyer_Disambiguation/lib/alchemy/alchemy_config.ini
airflow-metadata-db-disk/*
Expand All @@ -44,7 +43,9 @@ scratch
**/*.err
pgpubs_*
resources/sql.conf
resources/us-patent-application-*.dtd
config.ini
airflow-metadata-*
patent_db_disk/
TableToggle.json
Z_Frame_job-*.csv
1,228 changes: 1,228 additions & 0 deletions airflow/airflow.cfg.template

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion gender_it
2 changes: 1 addition & 1 deletion lib/utils
Submodule utils updated from 23e24e to b3aa98
37 changes: 23 additions & 14 deletions mydumper/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
# Use an official Debian image as the base
FROM debian:bullseye
FROM patentsview/airflow:0.5

# Install mydumper and libmariadb3 using package manager
RUN apt-get update && \
apt-get install -y libmariadb-dev && apt-get install -y libmariadb3 && apt-get install -y libmariadb-dev-compat
USER root

# Use the mysqlboy/docker-mydumper image as a base
FROM mydumper/mydumper:latest
# Disable MySQL APT repo (it’s causing GPG failures), then install mydumper.
# If mydumper isn't in bullseye main, fall back to bullseye-backports.
RUN set -eux; \
# Comment out any MySQL repo entries (or remove the files if you prefer)
if ls /etc/apt/sources.list.d/*mysql* >/dev/null 2>&1; then \
sed -i 's/^[[:space:]]*deb /# deb /' /etc/apt/sources.list.d/*mysql* || true; \
fi; \
apt-get update; \
(apt-get install -y --no-install-recommends mydumper \
|| (echo "deb http://deb.debian.org/debian bullseye-backports main" > /etc/apt/sources.list.d/backports.list \
&& apt-get update \
&& apt-get -y -t bullseye-backports install --no-install-recommends mydumper)); \
rm -rf /var/lib/apt/lists/*

# Copy mydumper configuration
COPY mydumper.cnf /etc/
# Copy config (optional)
COPY mydumper.cnf /etc/mydumper.cnf

# Drop back to runtime user
USER airflow

# (Optional) keep this as a quick sanity check at runtime; your compose command overrides it anyway
CMD ["mydumper", "--version"]

# Set the command to run when the container starts
#CMD ["mydumper", "-c", "/etc/mydumper.cnf"]
#CMD ["mydumper", "--version"]
#CMD ["mydumper"]
CMD ["echo", "Container started without running mydumper"]
9 changes: 4 additions & 5 deletions mydumper/docker-compose-mydumper.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
version: '3'
services:
mydumper_service:
build:
Expand All @@ -10,7 +9,7 @@ services:
- S3_ENDPOINT_URL
- S3_BUCKET
volumes:
- {LOCAL_PATH}:/DatabaseBackups
- {LOCAL_PATH}:/project
command:
- mydumper
- /home/ec2-user/PipelineData/DatabaseBackups:/DatabaseBackups
- /home/ec2-user/PatentsView-DB:/project
- ./mydumper.cnf:/etc/mydumper.cnf
command: mydumper --verbose=3
8 changes: 4 additions & 4 deletions mydumper/mydumper.cnf → mydumper/mydumper.cnf.template
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[mydumper]
host =
user =
password =
port = 3306
host =
user =
password =
port =

[myloader]
host =
Expand Down
7 changes: 0 additions & 7 deletions persistent_files/ipc_technology.csv
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 15/%,,
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 17/%,,
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 19/%,"June 10, 2014",
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 23/%,"July 1, 2023",Newly added
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 25/%,"July 1, 2023",Newly added
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 101/%,,
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04R%,,
2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04S%,,
Expand Down Expand Up @@ -80,16 +78,13 @@
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06M%,,
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06N%,,
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06T%,,
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06V%,"July 1, 2021",Newly added
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G10L%,,
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G11C%,,
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16B%,"July 10, 2019",Newly added
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16C%,"July 10, 2019",Newly added
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16Y%,"May 17, 2020",Newly added
6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16Z%,"July 10, 2019",Newly added
7,Electrical engineering,Électrotechnique,Electricidad - Electrónica,IT methods for management,Méthodes de traitement des données à des fins de gestion,Métodos de gestión mediante T.I.,G06Q%,,
8,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Semiconductors,Semiconducteurs,Semiconductores,H01L%,,
8,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Semiconductors,Semiconducteurs,Semiconductores,H10%,"July 1, 2023",Newly added
9,Instruments,Instruments,Instrumentos,Optics,Optique,Óptica,G02B%,,
9,Instruments,Instruments,Instrumentos,Optics,Optique,Óptica,G02C%,,
9,Instruments,Instruments,Instrumentos,Optics,Optique,Óptica,G02F%,,
Expand Down Expand Up @@ -166,7 +161,6 @@
13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61G%,,
13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61H%,,
13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61J%,,
13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61K 40/%,"July 3, 2024",Newly added
13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61L%,,
13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61M%,,
13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61N%,,
Expand Down Expand Up @@ -665,7 +659,6 @@
32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64D%,,
32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64F%,,
32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64G%,,
32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64U%,"July 1, 2023",Newly added
33,Other fields,Autres domaines,Otros sectores,"Furniture, games","Mobilier, jeux","Mobiliario, juegos",A47B%,,
33,Other fields,Autres domaines,Otros sectores,"Furniture, games","Mobilier, jeux","Mobiliario, juegos",A47C%,,
33,Other fields,Autres domaines,Otros sectores,"Furniture, games","Mobilier, jeux","Mobiliario, juegos",A47D%,,
Expand Down
Loading