Skip to content

Commit

Permalink
Merge branch 'datacommonsorg:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
bchivers-stanford authored Nov 19, 2024
2 parents ddcfda7 + 864e945 commit 93b8f7c
Show file tree
Hide file tree
Showing 802 changed files with 42,414 additions and 320,755 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
Expand All @@ -50,7 +50,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@v3

# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
Expand All @@ -64,4 +64,4 @@ jobs:
# make release

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
40 changes: 40 additions & 0 deletions .github/workflows/release-branch-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Release branch checks

on:
pull_request:
branches: [ "customdc_stable" ]

jobs:
verify_all_commits_are_already_in_master:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
# Fetch all history for accurate comparison
fetch-depth: 0
# Check out the PR branch
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}

- name: Verify that all commits are already in the master branch
run: |
git remote add dc https://github.com/datacommonsorg/website.git
git fetch dc
MASTER_BRANCH="dc/master"
# Get the list of commits in the source branch that are not in the master branch
MISSING_COMMITS=$(git log --pretty="%H - %s" $MASTER_BRANCH..HEAD --)
if [[ -n "$MISSING_COMMITS" ]]; then
echo ""
echo "ERROR: The following commits are not present in $MASTER_BRANCH:"
echo ""
echo "$MISSING_COMMITS"
echo ""
echo "PRs to release branches should only contain commits that are already in master."
echo "To fix this PR, reset its branch locally to a commit at or behind https://github.com/datacommonsorg/website/commits/master/ and then force-push it."
echo "Note that a release branch PR should be based on master and not the previous version of the release branch, which contains merge commits."
exit 1
fi
echo "All commits are present in $MASTER_BRANCH"
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ screenshots/screenshot_url.json
# Local env.
*.swp
.vscode
.idea
!.vscode/launch.json
.env.list

Expand Down Expand Up @@ -70,6 +71,7 @@ experimental/sdg-static/datacommons/nl_interface.min.css

# Custom DC data
dc-data/
custom_dc/env.list

# Topic cache
gen_ordered_list_for_topics.mcf
Expand Down
29 changes: 22 additions & 7 deletions build/cdc_data/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ if [[ $OUTPUT_DIR == "" ]]; then
exit 1
fi

if [[ $DATA_RUN_MODE != "" ]]; then
if [[ $DATA_RUN_MODE != "schemaupdate" ]]; then
echo "DATA_RUN_MODE must be either empty or 'schemaupdate'"
exit 1
fi
echo "DATA_RUN_MODE=$DATA_RUN_MODE"
else
DATA_RUN_MODE="customdc"
fi

echo "INPUT_DIR=$INPUT_DIR"
echo "OUTPUT_DIR=$OUTPUT_DIR"

Expand All @@ -51,7 +61,7 @@ ADDITIONAL_CATALOG_PATH=$DC_NL_EMBEDDINGS_DIR/custom_catalog.yaml
CUSTOM_EMBEDDINGS_INDEX=user_all_minilm_mem

# Set IS_CUSTOM_DC var to true.
# This is used by the embeddings builder to set up a custom dc env.
# This is used by the embeddings builder to set up a custom dc env.
export IS_CUSTOM_DC=true

if [[ $USE_SQLITE == "true" ]]; then
Expand All @@ -67,15 +77,20 @@ cd $WORKSPACE_DIR/import/simple
# Run importer.
python3 -m stats.main \
--input_dir=$INPUT_DIR \
--output_dir=$DC_OUTPUT_DIR
--output_dir=$DC_OUTPUT_DIR \
--mode=$DATA_RUN_MODE

# cd back to workspace dir to run the embeddings builder.
cd $WORKSPACE_DIR

# Run embeddings builder.
python3 -m tools.nl.embeddings.build_embeddings \
--embeddings_name=$CUSTOM_EMBEDDINGS_INDEX \
if [[ $DATA_RUN_MODE == "schemaupdate" ]]; then
echo "Skipping embeddings builder because run mode is 'schemaupdate'."
echo "Schema update complete."
else
# Run embeddings builder.
python3 -m tools.nl.embeddings.build_embeddings \
--embeddings_name=$CUSTOM_EMBEDDINGS_INDEX \
--output_dir=$DC_NL_EMBEDDINGS_DIR \
--additional_catalog_path=$ADDITIONAL_CATALOG_PATH

echo "Data loading completed."
echo "Data loading complete."
fi
4 changes: 2 additions & 2 deletions build/cdc_services/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ else
fi

# Wait for any process to exit
wait
wait -n

# Exit with status of process that exited first
exit $?
exit $?
70 changes: 70 additions & 0 deletions build/ci/cloudbuild.push_cdc_stable.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Updates stable-tagged Docker images for custom DC.
# Assumes the stable branch is already checked out, which it should be
# if this is triggered on push to branch for the stable branch.

steps:
# Step 0: Initialize submods
- id: init-submods
name: gcr.io/cloud-builders/git
entrypoint: bash
args:
- -c
- |
set -e
git submodule update --init --recursive
waitFor: ["-"]

# Step 1: Get a label that combines commit hashes.
- id: get-label
name: gcr.io/cloud-builders/git
entrypoint: bash
args:
- -c
- |
set -e
./scripts/get_commits_label.sh | tail -1 >"$_IMAGE_LABEL_PATH"
waitFor: ["init-submods"]

# Step 1: Services container
- id: build-and-tag-stable-services
name: gcr.io/datcom-ci/deploy-tool
entrypoint: bash
args:
- -c
- |
set -e
image_label=$(cat "$_IMAGE_LABEL_PATH")
./scripts/build_cdc_services_and_tag_stable.sh $image_label
waitFor: ["get-label"]

# Step 2: Data management container
- id: build-and-tag-stable-data
name: gcr.io/datcom-ci/deploy-tool
entrypoint: bash
args:
- -c
- |
set -e
image_label=$(cat "$_IMAGE_LABEL_PATH")
./scripts/build_cdc_data_and_tag_stable.sh $image_label
waitFor: ["get-label"]

substitutions:
_IMAGE_LABEL_PATH: "/workspace/tmp_cdc_stable_image_label.txt"

options:
machineType: "E2_HIGHCPU_32"
17 changes: 15 additions & 2 deletions build/ci/cloudbuild.webdriver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,32 @@ steps:
# These js files generated will be necessery for the flask_webdriver_test task.
./run_test.sh -b
# Download the files needed for nl server to run. Do the download here because
# webdriver runs on mulitple processes & we only want to do the download once.
- id: download_nl_files
name: python:3.11.3
entrypoint: /bin/sh
waitFor:
- package_js
args:
- -c
- |
cd tools/nl/download_nl_files
./run.sh
# Run the webdriver tests
- id: flask_webdriver_test
name: gcr.io/datcom-ci/webdriver-chrome:2024-06-05
entrypoint: /bin/sh
waitFor:
- package_js
- download_nl_files
args:
- -c
- |
./run_test.sh --setup_python
./run_test.sh -w
timeout: 1800s
timeout: 2700s # 45 minutes

options:
machineType: "E2_HIGHCPU_32"
30 changes: 30 additions & 0 deletions build/ci/cloudbuild.website_sanity.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

steps:
- id: website_sanity
name: gcr.io/datcom-ci/webdriver-chrome:2024-06-05
entrypoint: /bin/bash
args:
- -c
- |
./run_website_sanity.sh $_WEBSITE_DOMAIN
substitutions:
_WEBSITE_DOMAIN: datacommons.org

timeout: 14400s

options:
machineType: "E2_HIGHCPU_32"
8 changes: 4 additions & 4 deletions build/nl_server/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ COPY shared/. /workspace/shared/

# Download nl files from gcs
COPY deploy/nl/catalog.yaml .
COPY build/nl_server/requirements.txt /workspace/build/nl_server/requirements.txt
COPY build/nl_server/download_nl_files.py .
RUN pip3 install -r /workspace/build/nl_server/requirements.txt
RUN python3 download_nl_files.py
COPY tools/nl/download_nl_files/requirements.txt /workspace/download_nl_files/requirements.txt
COPY tools/nl/download_nl_files/download_nl_files.py .
RUN pip3 install -r /workspace/download_nl_files/requirements.txt
RUN python3 download_nl_files.py --is_docker_mode=True

# Run server
WORKDIR /workspace
Expand Down
21 changes: 10 additions & 11 deletions build/website_cron_testing/run_website_cron_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if [[ $NODEJS_API_ROOT != "" ]]; then
failure_email="failure_email.json"
python3 differ.py -m diff -e "$TESTING_ENV" -t "$date_str" -g "$TESTING_ENV/$date_str/nodejs_query" -f "$failure_email"
if [[ -e "$failure_email" ]]; then
python3 send_email.py --recipient="datacommons+[email protected]" --email_content="$failure_email"
python3 send_email.py --recipient="datacommons-alerts+tests@google.com" --email_content="$failure_email"
fi
echo "Finished the nodejs Test."
echo "====================================================================================="
Expand All @@ -54,14 +54,14 @@ fi

# Run sanity tests if ENABLE_SANITY is "true"
if [[ $ENABLE_SANITY == "true" ]]; then
echo "====================================================================================="
echo "Starting sanity tests"
echo "====================================================================================="
python3 sanity.py --mode=home --url="$WEB_API_ROOT"
gsutil cp ./output/*.csv gs://datcom-website-periodic-testing/$TESTING_ENV/$date_str/sanity/
rm ./output/*.csv
echo "Finished the sanity tests."
echo "====================================================================================="
echo "====================================================================================="
echo "Starting sanity tests"
echo "====================================================================================="
python3 sanity.py --mode=home --url="$WEB_API_ROOT"
gsutil cp ./output/*.csv gs://datcom-website-periodic-testing/$TESTING_ENV/$date_str/sanity/
rm ./output/*.csv
echo "Finished the sanity tests."
echo "====================================================================================="
else
echo "====================================================================================="
echo "Sanity tests disabled."
Expand Down Expand Up @@ -92,8 +92,7 @@ if [[ $ENABLE_ADVERSARIAL == "true" ]]; then
mkdir -p input
gsutil cp gs://datcom-website-adversarial/input/frequent/* input/
dc_list=("main" "sdg")
for dc in "${dc_list[@]}"
do
for dc in "${dc_list[@]}"; do
echo "====================================================================================="
echo "Executing the Adversarial Test against the $dc index, detection and fulfillment."
python3 adversarial.py --mode=run_all --dc="$dc" --base_url="$WEB_API_ROOT"
Expand Down
Loading

0 comments on commit 93b8f7c

Please sign in to comment.