datacommonsorg
diff --git a/‎go.mod‎
Lines changed: 9 additions & 3 deletions b/‎go.mod‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎go.sum‎
Lines changed: 42 additions & 253 deletions b/‎go.sum‎
Lines changed: 42 additions & 253 deletions
diff --git a/‎import-automation/executor/Dockerfile‎
Lines changed: 3 additions & 2 deletions b/‎import-automation/executor/Dockerfile‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎import-automation/executor/app/configs.py‎
Lines changed: 13 additions & 2 deletions b/‎import-automation/executor/app/configs.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎import-automation/executor/app/executor/cloud_batch.py‎
Lines changed: 4 additions & 2 deletions b/‎import-automation/executor/app/executor/cloud_batch.py‎
Lines changed: 4 additions & 2 deletions
@@ -3,7 +3,13 @@ module github.com/datacommonsorg/data
 go 1.14
 
 require (
-	cloud.google.com/go/storage v1.10.0 // indirect
-	google.golang.org/grpc v1.29.1 // indirect
-	googlemaps.github.io/maps v1.2.2 // indirect
+	github.com/google/go-cmp v0.5.9 // indirect
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/kr/pretty v0.3.0 // indirect
+	github.com/rogpeppe/go-internal v1.9.0 // indirect
+	github.com/stretchr/testify v1.8.3 // indirect
+	go.opencensus.io v0.24.0 // indirect
+	golang.org/x/time v0.3.0 // indirect
+	googlemaps.github.io/maps v1.2.2
+	gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
 )
@@ -53,7 +53,7 @@ ENV GOOGLE_CLOUD_LOGGING_MAX_LATENCY=5.0
 ENV GOOGLE_CLOUD_LOGGING_GRACE_PERIOD=30.0
 ENV GRPC_VERBOSITY=ERROR
 
-RUN wget https://storage.googleapis.com/datacommons_public/import_tools/import-tool.jar
+ADD https://storage.googleapis.com/datacommons_public/import_tools/import-tool.jar /import-tool.jar
 COPY requirements.txt .
 RUN pip install -r requirements.txt
 
@@ -63,7 +63,8 @@ COPY --from=data . /data
 
 # build_type: cloud (clone github data repo) 
 FROM base as cloud
-RUN git clone https://github.com/datacommonsorg/data.git
+ARG CACHE_BUSTER=1
+RUN echo "Cache buster: $CACHE_BUSTER" && git clone https://github.com/datacommonsorg/data.git
 
 # build the final image
 FROM ${build_type} as final
 
@@ -34,7 +34,7 @@ class ExecutorConfig:
 
     # ID of the Google Cloud project that hosts the executor. The project
     # needs to enable App Engine and Cloud Scheduler.
-    gcp_project_id: str = 'datcom-import-automation'
+    gcp_project_id: str = 'datcom-import-automation-prod'
     # ID of the Google Cloud project that stores generated CSVs and MCFs. The
     # project needs to enable Cloud Storage and gives the service account the
     # executor uses sufficient permissions to read and write the bucket below.
@@ -55,6 +55,10 @@ class ExecutorConfig:
     # Name of the Cloud Storage bucket to store the generated data files
     # for importing to dev.
     storage_dev_bucket_name: str = 'unresolved_mcf'
+    # DataCommons API key
+    dc_api_key: str = ''
+    # Gemini API key
+    gemini_api_key: str = ''
     # Executor output prefix in the storage_dev_bucket_name bucket.
     storage_executor_output_prefix: str = 'datcom-dev-imports'
     # Name of the file that specifies the most recently generated data files
@@ -74,6 +78,10 @@ class ExecutorConfig:
     # The content of latest_version.txt would be a single line of
     # '2020_07_15T12_07_17_365264_07_00'.
     storage_version_filename: str = 'latest_version.txt'
+    # GCP secret name containg import config.
+    import_config_secret: str = 'import-config'
+    # Config override file.
+    config_override_file: str = ''
     # File with list of historical versions with the most recent at the top
     storage_version_history_filename: str = 'version_history.txt'
     # Name of the file that contains the import_metadata_mcf for the import.
@@ -130,7 +138,10 @@ class ExecutorConfig:
     # Arguments for the user script
     user_script_args: List[str] = ()
     # Environment variables for the user script
-    user_script_env: dict = None
+    user_script_env: dict = dataclasses.field(default_factory=lambda: {
+        "EXISTING_STATVAR_MCF":
+            "gs://unresolved_mcf/scripts/statvar/stat_vars.mcf"
+    })
     # Invoke import tool genmcf.
     invoke_import_tool: bool = True
     # Invoke differ tool.
 
@@ -151,6 +151,7 @@ def create_job_request(import_name: str, import_config: dict, import_spec: dict,
 
     resources["cpu"] = resources["cpu"] * 1000
     resources["memory"] = resources["memory"] * 1024
+    schedule = import_spec.get('cron_schedule')
     import_config_string = json.dumps(import_config)
     job_name = import_name.split(':')[1]
     job_name = job_name.replace("_", "-").lower()
@@ -159,7 +160,8 @@ def create_job_request(import_name: str, import_config: dict, import_spec: dict,
         "importName": import_name,
         "importConfig": import_config_string,
         "resources": resources,
-        "timeout": timeout
+        "timeout": timeout,
+        "schedule": schedule
     }
     argument_string = json.dumps(argument_payload)
     final_payload = {
@@ -198,7 +200,7 @@ def execute_cloud_batch_job(project_id: str, location: str, job_name: str,
     runnable.container.image_uri = image_uri
     runnable.container.commands = [
         f"--import_name={import_name}",
-        f'--import_config={json.dumps({"gcs_project_id": project_id, "storage_prod_bucket_name": gcs_bucket, "spanner_project_id": project_id, "spanner_instance_id": spanner_instance, "spanner_database_id": spanner_db})}'
+        f'--import_config={json.dumps({"gcp_project_id": project_id, "gcs_project_id": project_id, "storage_prod_bucket_name": gcs_bucket, "spanner_project_id": project_id, "spanner_instance_id": spanner_instance, "spanner_database_id": spanner_db})}'
     ]
 
     # We can specify what resources are requested by a task.