Sage-Bionetworks · sagely1 · Jun 6, 2024 · Jun 5, 2024 · Jun 5, 2024 · Jun 5, 2024
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
@@ -0,0 +1,146 @@
+name: Build Data Image and Run End-to-end Tests
+
+on: push
+
+jobs:
+  e2e:
+    # Run in Sage repo on develop or main branches
+    # and on all branches in user-owned forks
+    if: ${{ github.ref_name == 'develop' || github.ref_name == 'main' || github.actor == github.repository_owner }}
+    timeout-minutes: 60
+    runs-on: ubuntu-20.04
+    environment: e2e
+    env:
+      CI: true
+      LOCAL_DATA_PATH: './local/data'
+      LOCAL_TEAM_IMAGES_DIR: './local/data/team_images'
+      LOCAL_COLLECTIONS_PATH: './scripts/collections.csv'
+      LOCAL_IMPORT_SCRIPT_PATH: './docker/import-data.sh'
+      LOCAL_CREATE_INDEXES_PATH: './scripts/mongo-create-Indexes.js'
+      DOCKER_COMPOSE_PATH: './docker/docker-compose.yml'
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: get data image tag, data manifest id, and data version
+        env:
+          PACKAGE_JSON_PATH: 'package.json'
+          DATA_IMAGE_NAME: '${{ vars.DB_NAME }}/data'
+        run: |
+          DATA_MANIFEST_ID=$(jq -r '."data-file"' "${{ env.PACKAGE_JSON_PATH }}")
+          echo "DATA_MANIFEST_ID=${DATA_MANIFEST_ID}" >> "$GITHUB_ENV"
+
+          DATA_VERSION=$(jq -r '."data-version"' "${{ env.PACKAGE_JSON_PATH }}")
+          echo "DATA_VERSION=${DATA_VERSION}" >> "$GITHUB_ENV"
+
+          DATA_IMAGE_TAG="${{ env.DATA_IMAGE_NAME }}:${DATA_MANIFEST_ID}-v${DATA_VERSION}"
+          echo "DATA_IMAGE_TAG=${DATA_IMAGE_TAG}" >> "$GITHUB_ENV"
+
+      # TODO - check remote repository
+      - name: check whether image exists
+        id: image-exists
+        run: |
+          echo "==> data image tag: ${{ env.DATA_IMAGE_TAG }}"
+          echo "IMAGE_EXISTS=false" >> "$GITHUB_ENV"
+
+      # TODO - move into job which conditionally runs based on IMAGE_EXISTS
+      # https://github.com/actions/setup-python/issues/807
+      - name: create placeholder requirements.txt file
+        run: touch ./requirements.txt
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+          cache: 'pip'
+
+      - name: install synapseclient
+        run: |
+          pip install --upgrade synapseclient
+
+      - name: download data from synapse
+        timeout-minutes: 10
+        env:
+          SYNAPSE_AUTH_TOKEN: ${{ secrets.SYNAPSE_AUTH_TOKEN }}
+        run: |
+          echo "==> make directory for team images"
+          mkdir -p "${{ env.LOCAL_TEAM_IMAGES_DIR }}"
+
+          echo "==> download manifest file from Synapse"
+          synapse get --downloadLocation "${{ env.LOCAL_DATA_PATH }}" -v "${{ env.DATA_VERSION }}" "${{ env.DATA_MANIFEST_ID }}"
+
+          echo "==> download all files referenced in manifest from Synapse"
+          cat "${{ env.LOCAL_DATA_PATH }}"/data_manifest.csv | tail -n +2 | while IFS=, read -r id version; do
+              echo "===> Downloading ${id}.${version}"
+              synapse get --downloadLocation "${{ env.LOCAL_DATA_PATH }}" -v "${version}" "${id}"
+            done
+
+          echo "==> download team images"
+          synapse get -r --downloadLocation "${{ env.LOCAL_TEAM_IMAGES_DIR }}/" "${{ vars.TEAM_IMAGES_ID }}"
+
+      - name: build image
+        env:
+          DOCKERFILE_PATH: './docker/Dockerfile'
+        run: |
+          docker build \
+          -t "${{ env.DATA_IMAGE_TAG }}" \
+          -f "${{ env.DOCKERFILE_PATH }}" \
+          --build-arg LOCAL_DATA_PATH="${{ env.LOCAL_DATA_PATH }}" \
+          --build-arg LOCAL_COLLECTIONS_PATH="${{ env.LOCAL_COLLECTIONS_PATH }}" \
+          --build-arg LOCAL_IMPORT_SCRIPT_PATH="${{ env.LOCAL_IMPORT_SCRIPT_PATH }}" \
+          --build-arg LOCAL_CREATE_INDEXES_PATH="${{ env.LOCAL_CREATE_INDEXES_PATH }}" \
+          .
+      # TODO - push image to remote repository
+
+      # TODO - move into job that pulls image from remote repository
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 16
+
+      - name: install dependencies
+        run: npm install
+
+      - name: install Playwright Browsers
+        run: npx playwright install --with-deps
+
+      - name: start database, server, and app
+        env:
+          DB_USER: ${{ secrets.DB_USER }}
+          DB_PASS: ${{ secrets.DB_PASS }}
+          DB_PORT: ${{ vars.DB_PORT }}
+          DB_NAME: ${{ vars.DB_NAME }}
+        run: |
+          echo "==> start database and data containers"
+          docker-compose -f "${{ env.DOCKER_COMPOSE_PATH }}" up -d
+
+          echo "==> wait for data container to finish seeding database container"
+          DATA_CONTAINER=$(docker compose -f "${{ env.DOCKER_COMPOSE_PATH }}" ps -a --format '{{.Name}}' mongo-seed)
+          docker wait "${DATA_CONTAINER}"
+
+          echo "==> start server and app in background and wait for server to grab port"
+          MONGODB_HOST="localhost" MONGODB_PORT="${{ env.DB_PORT }}" APP_ENV="e2e" npm run dev >/dev/null 2>&1 &
+          sleep 5
+
+      - name: run e2e tests
+        run: npx playwright test
+
+      - name: clean up database, server, and app
+        env:
+          DB_USER: ${{ secrets.DB_USER }}
+          DB_PASS: ${{ secrets.DB_PASS }}
+          DB_PORT: ${{ vars.DB_PORT }}
+          DB_NAME: ${{ vars.DB_NAME }}
+        run: |
+          echo "==> stop app"
+          pid=$(lsof -i :3000 -t) && kill ${pid}
+
+          echo "==> stop server"
+          pid=$(lsof -i :8080 -t) && kill ${pid}
+
+          echo "==> stop database"
+          docker-compose -f "${{ env.DOCKER_COMPOSE_PATH }}" down
+
+      - uses: actions/upload-artifact@v4
+        if: ${{ !cancelled() }}
+        with:
+          name: playwright-report
+          path: playwright-report/
+          retention-days: 5
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -0,0 +1,27 @@
+FROM mongo:latest
+
+# Paths on local machine - may be overwritten at build time
+ARG LOCAL_DATA_PATH="./local/data" 
+ARG LOCAL_COLLECTIONS_PATH="./scripts/collections.csv"
+ARG LOCAL_IMPORT_SCRIPT_PATH="./docker/import-data.sh"
+ARG LOCAL_CREATE_INDEXES_PATH="./scripts/mongo-create-Indexes.js"
+
+# Paths within the image
+ENV DATA_DIR="input_data"
+ENV TEAM_IMAGES_DIR="${DATA_DIR}/team_images"
+ENV COLLECTIONS_FILE="collections.csv"
+ENV IMPORT_DATA_FILE="import-data.sh"
+ENV CREATE_INDEXES_FILE="create-indexes.js"
+
+# Copy data and scripts to image
+COPY "${LOCAL_DATA_PATH}" "${DATA_DIR}"
+COPY "${LOCAL_COLLECTIONS_PATH}" "${COLLECTIONS_FILE}"
+COPY "${LOCAL_IMPORT_SCRIPT_PATH}" "${IMPORT_DATA_FILE}"
+COPY "${LOCAL_CREATE_INDEXES_PATH}" "${CREATE_INDEXES_FILE}"
+
+# Make scripts executable
+RUN chmod +x "${IMPORT_DATA_FILE}"
+RUN chmod +x "${CREATE_INDEXES_FILE}"
+
+# On container startup, import data into mongodb
+CMD ["/bin/bash", "import-data.sh"]
diff --git a/docker/README.md b/docker/README.md
@@ -0,0 +1,26 @@
+# Agora Docker Image
+
+Files in this directory:
+
+- `Dockerfile`: creates the `agora/data` docker image, which contains data for a particular Agora data release (manifest synId + version) and team images and will seed a mongodb at start up using `import-data.sh`
+- `docker-compose.yml`: spins up `mongo` and `agora/data` docker containers
+
+## Workflow Setup
+
+The following secrets and variables need to be set up in GitHub for the `e2e.yml` workflow to create the `agora/data` Docker image:
+
+e2e Environment secrets:
+
+| Variable           | Description                                | Example           |
+| ------------------ | ------------------------------------------ | ----------------- |
+| DB_USER            | The database user                          | dbuser            |
+| DB_PASS            | The database password                      | supersecret       |
+| SYNAPSE_AUTH_TOKEN | The Synapse service user view/download PAT | token-string-here |
+
+e2e Environment variables:
+
+| Variable       | Description                         | Example |
+| -------------- | ----------------------------------- | ------- |
+| DB_NAME        | The database name                   | agora   |
+| DB_PORT        | The database port                   | 27017   |
+| TEAM_IMAGES_ID | The synId of the team images folder | syn123  |
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
@@ -0,0 +1,20 @@
+services:
+  mongodb:
+    image: mongo
+    ports:
+      - "${DB_PORT}:27017"
+    expose:
+      - "${DB_PORT}"
+    environment:
+      - MONGO_INITDB_ROOT_USERNAME=${DB_USER}
+      - MONGO_INITDB_ROOT_PASSWORD=${DB_PASS}
+      - MONGO_INITDB_DATABASE=${DB_NAME}
+  mongo-seed:
+    image: ${DATA_IMAGE_TAG}
+    depends_on: 
+      - mongodb
+    environment:
+      - DB_HOST=mongodb # must match mongo service name
+      - DB_USER=${DB_USER}
+      - DB_PASS=${DB_PASS}
+      - DB_NAME=${DB_NAME}
diff --git a/docker/import-data.sh b/docker/import-data.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Expects the following environment variables to be set: 
+# - DB_HOST - mongo db host
+# - DB_USER - mongo db root username
+# - DB_PASS - mongo db root password
+# - DB_NAME - mongo db name
+# - COLLECTIONS_FILE - path to file that maps collection names to file names
+# - DATA_DIR - path to directory that contains collections data
+# - TEAM_IMAGES_DIR - path to directory that contains team images files
+# - CREATE_INDEXES_FILE - path to file that creates mongo db indexes
+
+# Abort on error: https://bertvv.github.io/cheat-sheets/Bash.html#writing-robust-scripts-and-debugging
+set -o errexit   # abort on nonzero exitstatus
+set -o nounset   # abort on unbound variable
+set -o pipefail  # don't hide errors within pipes
+
+# Import collections into mongodb
+echo "importing collections"
+while IFS=',' read -r collection filename
+do
+    echo "importing ${collection} from ${filename}"
+    mongoimport \
+      -h "${DB_HOST}" -d "${DB_NAME}" -u "${DB_USER}" -p "${DB_PASS}" \
+      --authenticationDatabase admin \
+      --collection "${collection}" \
+      --jsonArray --drop \
+      --file "${DATA_DIR}/${filename}"
+done < "${COLLECTIONS_FILE}"
+
+# Create indexes
+echo "creating indexes"
+mongosh \
+  --host "${DB_HOST}" -u "${DB_USER}" -p "${DB_PASS}" \
+  --authenticationDatabase admin \
+  "${CREATE_INDEXES_FILE}"
+
+# Import images
+echo "importing images"
+pushd "${TEAM_IMAGES_DIR}"
+ls -1r *.{jpg,jpeg,png} | while read x; do mongofiles -h "${DB_HOST}" -d "${DB_NAME}" -u "${DB_USER}" -p "${DB_PASS}" --authenticationDatabase admin -v put $x; echo $x; done
+popd
diff --git a/playwright.config.ts b/playwright.config.ts
@@ -1,6 +1,7 @@
 import { defineConfig, devices } from '@playwright/test';
 
-export const baseURL = 'http://localhost:8080';
+const port = 8080;
+export const baseURL = `http://127.0.0.1:${port}`;
 
 /**
  * Read environment variables from file.
@@ -14,17 +15,17 @@ export const baseURL = 'http://localhost:8080';
 export default defineConfig({
   testDir: './tests',
   // timeout for every test
-  timeout: 25 * 60 * 1000,
+  timeout: 3 * 60 * 1000,
   /* Run tests in files in parallel */
   fullyParallel: true,
   /* Fail the build on CI if you accidentally left test.only in the source code. */
   forbidOnly: !!process.env.CI,
   /* Retry on CI only */
   retries: process.env.CI ? 2 : 0,
   /* Opt out of parallel tests on CI. */
-  workers: process.env.CI ? 1 : undefined,
+  workers: process.env.CI ? 2 : undefined,
   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
-  reporter: 'html',
+  reporter: process.env.CI ? [['list'], ['html']] : 'html',
   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
   use: {
     /* Base URL to use in actions like `await page.goto('/')`. */
@@ -75,7 +76,8 @@ export default defineConfig({
   /* Run your local dev server before starting the tests */
   webServer: {
     command: 'npm run start',
-    url: baseURL,
-    reuseExistingServer: !process.env.CI,
+    port,
+    reuseExistingServer: true,
+    timeout: 120 * 1000,
   },
 });
diff --git a/scripts/collections.csv b/scripts/collections.csv
@@ -0,0 +1,16 @@
+genes,rnaseq_differential_expression.json
+geneslinks,network.json
+geneinfo,gene_info.json
+teaminfo,team_info.json
+genesproteomics,proteomics.json
+genesmetabolomics,metabolomics.json
+genesneuropathcorr,neuropath_corr.json
+geneexpvalidation,target_exp_validation_harmonized.json
+genescoredistribution,distribution_data.json
+genesoverallscores,overall_scores.json
+rnaboxdistribution,rna_distribution_data.json
+proteomicsboxdistribution,proteomics_distribution_data.json
+proteomicstmt,proteomics_tmt.json
+proteomicssrm,proteomics_srm.json
+genesbiodomains,genes_biodomains.json
+biodomaininfo,biodomain_info.json
diff --git a/src/server/api.ts b/src/server/api.ts
@@ -38,30 +38,37 @@
         doc && ('doc => ' + util.inspect(doc)), '\n');
 }); */
 
-console.log(process.env);
-console.log(process.env.MONGODB_HOST);
-console.log(process.env.MONGODB_PORT);
-
 // Set the database url
 if (
   process.env.MONGODB_HOST &&
   process.env.MONGODB_PORT &&
   process.env.APP_ENV
 ) {
-  const results = awsParamStore.getParametersSync(
-    [
-      '/agora-' + process.env.APP_ENV + '/MongodbUsername',
-      '/agora-' + process.env.APP_ENV + '/MongodbPassword',
-    ],
-    { region: 'us-east-1' }
-  );
+  let dbUser: string | undefined;
+  let dbPass: string | undefined;
+  if (process.env.APP_ENV === 'e2e') {
+    dbUser = process.env.DB_USER;
+    dbPass = process.env.DB_PASS;
+  } else {
+    const results = awsParamStore.getParametersSync(
+      [
+        '/agora-' + process.env.APP_ENV + '/MongodbUsername',
+        '/agora-' + process.env.APP_ENV + '/MongodbPassword',
+      ],
+      { region: 'us-east-1' }
+    );
+    if (results && results.Parameters) {
+      dbUser = results.Parameters[1]['Value'];
+      dbPass = results.Parameters[0]['Value'];
+    }
+  }
 
-  if (results && results.Parameters) {
+  if (dbUser && dbPass) {
     database.url =
       'mongodb://' +
-      results.Parameters[1]['Value'] +
+      dbUser +
       ':' +
-      results.Parameters[0]['Value'] +
+      dbPass +
       '@' +
       process.env.MONGODB_HOST +
       ':' +
@@ -73,7 +80,7 @@
  database.url = 'mongodb://localhost:27017/agora';
 }

 mongoose.connect(database.url);

 // Get the default connection
 const connection = mongoose.connection;