diff --git a/.gitignore b/.gitignore index 3e029c27..39b32c04 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.env *.pyc vendor/* Assignee_Lawyer_Disambiguation/lib/alchemy/config.ini @@ -8,16 +9,14 @@ venv .vscode *.yml *.yaml -mydumper/ +mydumper/* +!mydumper/mydumper.cnf.template Development/config.ini -airflow/airflow-webserver.pid -airflow/airflow.db -airflow/lawyer.pickle -airflow/logs/scheduler/2019-01-15/update-db.py.log -airflow/logs/scheduler/2019-01-16/update-db.py.log -airflow/logs/scheduler/latest +airflow/* +!airflow/dags/* +!airflow/airflow.cfg.template airflow-metadata-db-disk -airflow/unittests.cfg +airflow_pipeline_env.sh Development/dev_config.ini Assignee_Lawyer_Disambiguation/lib/alchemy/alchemy_config.ini airflow-metadata-db-disk/* @@ -44,7 +43,9 @@ scratch **/*.err pgpubs_* resources/sql.conf +resources/us-patent-application-*.dtd config.ini airflow-metadata-* patent_db_disk/ TableToggle.json +Z_Frame_job-*.csv diff --git a/gender_it b/gender_it index e8af867f..cc0f76d0 160000 --- a/gender_it +++ b/gender_it @@ -1 +1 @@ -Subproject commit e8af867f17354c28ae23de213db29213e49a8878 +Subproject commit cc0f76d008636a94d4f36601f3162df64f805015 diff --git a/lib/utils b/lib/utils index 23e24ec5..b3aa98b6 160000 --- a/lib/utils +++ b/lib/utils @@ -1 +1 @@ -Subproject commit 23e24ec5fb1310f653345cce995f39eb4288430d +Subproject commit b3aa98b60fe026f9b078ceb88b75c7747d8f732c diff --git a/persistent_files/ipc_technology.csv b/persistent_files/ipc_technology.csv index 126179ab..ddf33134 100644 --- a/persistent_files/ipc_technology.csv +++ b/persistent_files/ipc_technology.csv @@ -41,8 +41,6 @@ 2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 15/%,, 2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 17/%,, 2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 19/%,"June 10, 2014", -2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 23/%,"July 1, 2023",Newly added -2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 25/%,"July 1, 2023",Newly added 2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04N 101/%,, 2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04R%,, 2,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Audio-visual technology,Techniques audiovisuelles,Tecnología audiovisual,H04S%,, @@ -80,16 +78,13 @@ 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06M%,, 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06N%,, 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06T%,, -6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G06V%,"July 1, 2021",Newly added 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G10L%,, 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G11C%,, 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16B%,"July 10, 2019",Newly added 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16C%,"July 10, 2019",Newly added -6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16Y%,"May 17, 2020",Newly added 6,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Computer technology,Informatique,Tecnología informática,G16Z%,"July 10, 2019",Newly added 7,Electrical engineering,Électrotechnique,Electricidad - Electrónica,IT methods for management,Méthodes de traitement des données à des fins de gestion,Métodos de gestión mediante T.I.,G06Q%,, 8,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Semiconductors,Semiconducteurs,Semiconductores,H01L%,, -8,Electrical engineering,Électrotechnique,Electricidad - Electrónica,Semiconductors,Semiconducteurs,Semiconductores,H10%,"July 1, 2023",Newly added 9,Instruments,Instruments,Instrumentos,Optics,Optique,Óptica,G02B%,, 9,Instruments,Instruments,Instrumentos,Optics,Optique,Óptica,G02C%,, 9,Instruments,Instruments,Instrumentos,Optics,Optique,Óptica,G02F%,, @@ -166,7 +161,6 @@ 13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61G%,, 13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61H%,, 13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61J%,, -13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61K 40/%,"July 3, 2024",Newly added 13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61L%,, 13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61M%,, 13,Instruments,Instruments,Instrumentos,Medical technology,Technologie médicale,Tecnología médica,A61N%,, @@ -665,7 +659,6 @@ 32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64D%,, 32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64F%,, 32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64G%,, -32,Mechanical engineering,Mécanique,Ingeniería mecánica,Transport,Transport,Transporte,B64U%,"July 1, 2023",Newly added 33,Other fields,Autres domaines,Otros sectores,"Furniture, games","Mobilier, jeux","Mobiliario, juegos",A47B%,, 33,Other fields,Autres domaines,Otros sectores,"Furniture, games","Mobilier, jeux","Mobiliario, juegos",A47C%,, 33,Other fields,Autres domaines,Otros sectores,"Furniture, games","Mobilier, jeux","Mobiliario, juegos",A47D%,, diff --git a/updater/collect_supplemental_data/cpc_parser/download_cpc.py b/updater/collect_supplemental_data/cpc_parser/download_cpc.py index bdc0abae..00d68b4a 100644 --- a/updater/collect_supplemental_data/cpc_parser/download_cpc.py +++ b/updater/collect_supplemental_data/cpc_parser/download_cpc.py @@ -125,7 +125,6 @@ def find_cpc_grant_and_pgpub_urls( Scrape unfiltered grant and pgpub directories and filter out files from earlier months. """ - # 2-week window today = datetime.date.today() first_day_of_month = datetime.date(today.year, today.month, 1) @@ -312,4 +311,4 @@ def post_download(**kwargs): # post_download(**{ # "execution_date": datetime.date(2021, 12, 30) # }) - find_cpc_schema_url() \ No newline at end of file + find_cpc_schema_url() diff --git a/updater/disambiguation/__init__.py b/updater/disambiguation/__init__.py index ffb3d76c..c529198f 100644 --- a/updater/disambiguation/__init__.py +++ b/updater/disambiguation/__init__.py @@ -1 +1 @@ -from . import inventor_disambiguation \ No newline at end of file +#from . import inventor_disambiguation diff --git a/updater/disambiguation/hierarchical_clustering_disambiguation b/updater/disambiguation/hierarchical_clustering_disambiguation index ee259a77..11dec236 160000 --- a/updater/disambiguation/hierarchical_clustering_disambiguation +++ b/updater/disambiguation/hierarchical_clustering_disambiguation @@ -1 +1 @@ -Subproject commit ee259a7760d5b7cf6fcd4c95e58aa18465712fbd +Subproject commit 11dec236c2da607ca853450ce5a0d9f7b905be69