Skip to content

Commit

Permalink
Merge branch 'main' into add-sql-linting
Browse files Browse the repository at this point in the history
  • Loading branch information
katybaulch committed Nov 28, 2024
2 parents 6d9a219 + b1ec83b commit 38720b1
Show file tree
Hide file tree
Showing 10 changed files with 286 additions and 76 deletions.
34 changes: 0 additions & 34 deletions .trunk/configure-pyright-with-pyenv.sh

This file was deleted.

51 changes: 51 additions & 0 deletions .trunk/configure-pyright.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#! /usr/bin/env bash
set -e

# Get the name of the expected venv for this repo from the pyproject.toml file.
venv_name=$(grep -m 1 venv pyproject.toml | tr -s ' ' | tr -d '"' | tr -d "'" | cut -d' ' -f3) || true

# Check if pyrightconfig already exists.
if [[ ! -f pyrightconfig.json ]]; then

# Capture poetry environment path and check if it's empty.
poetry_env_info=$(poetry env info --path 2>/dev/null || true)
if [[ -z ${poetry_env_info} ]]; then
# Check if pyenv is installed
if ! command -v pyenv &>/dev/null; then
echo "pyenv not installed. Please install pyenv..."
exit 1
fi

pyenv_root=$(pyenv root)
dir_path="${pyenv_root}"/plugins/pyenv-pyright
if [[ ! -d ${dir_path} ]]; then
# trunk-ignore(shellcheck/SC2312)
if [[ -z $(ls -A "${dir_path}") ]]; then
git clone https://github.com/alefpereira/pyenv-pyright.git "${dir_path}"
fi
fi

# Generate the pyrightconfig.json file.
pyenv pyright "${venv_name}"
pyenv local "${venv_name}"
else
poetry_env_info=$(poetry env info --path 2>/dev/null || true)
venv_path=$(dirname "${poetry_env_info}") # Get directory path
venv_name=$(basename "${poetry_env_info}") # Get base name

# Generate the pyrightconfig.json file.
json_string=$(jq -n \
--arg v "${venv_name}" \
--arg vp "${venv_path}" \
'{venv: $v, venvPath: $vp} ')

echo "${json_string}" >pyrightconfig.json
fi
fi

# Check whether required keys are present in pyrightconfig.json.
if ! jq -r --arg venv_name "${venv_name}" '. | select((.venv != $venv_name or .venv == "") and (.venvPath == null or .venvPath == ""))' pyrightconfig.json >/dev/null 2>&1; then
echo "Failed to configure pyright to use environment '${venv_name}' as interpreter. Please check pyrightconfig.json..."
exit 1
fi
exit 0
6 changes: 3 additions & 3 deletions .trunk/trunk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,13 @@ actions:
disabled:
- trunk-check-pre-push
enabled:
- configure-pyright-with-pyenv
- configure-pyright
- trunk-check-pre-commit
- trunk-announce
- trunk-fmt-pre-commit
- trunk-upgrade-available
definitions:
- id: configure-pyright-with-pyenv
run: source .trunk/configure-pyright-with-pyenv.sh
- id: configure-pyright
run: source .trunk/configure-pyright.sh
triggers:
- git_hooks: [pre-commit]
2 changes: 1 addition & 1 deletion app/api/api_v1/routers/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def search_documents(
token.decode_and_validate(db, request, app_token)

# If the search request IDs are null, we want to search using the app token corpora.
if search_body.corpus_import_ids is None:
if search_body.corpus_import_ids == [] or search_body.corpus_import_ids is None:
search_body.corpus_import_ids = cast(Sequence, token.allowed_corpora_ids)

# For the second validation, search request corpora Ids are validated against the
Expand Down
41 changes: 41 additions & 0 deletions docs/local_full_stack_setup/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,47 @@ cp .env.example .env
make start
```

### Load the postgres database from an sql dump

This section outlines steps for loading a database dump from an sql file in to a
local postgres database. This should be done if you want for example the latest
production data locally.

Assumptions:

- You've created a `.env` and exported the variables to your local terminal.
- You've run `make build` and `make start`
- You have no open connections to the database.
- You have `psql` installed.

1. Export the postgres db variables from the .env

```shell
export $(cat .env | xargs)
```

2. Drop and recreate the navigator database.

```shell
psql --host=0.0.0.0 --port=5432 --dbname=postgres --username=navigator --password
```

_Note enter the password from the .env when prompted._

When in the postgres shell run:

```shell
postgres=# drop database navigator
postgres=# create database navigator
postgres=# \q
```

3. Load the sql dump into the postgres database.

```shell
psql --host=0.0.0.0 --port=5432 --dbname=postgres --username=navigator --password -f ${path_to_sql_dump}/sql_dump.sql
```

### Authenticate with Vespa

```shell
Expand Down
56 changes: 28 additions & 28 deletions makefile-local.defs
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
# definitions for local development
# Definitions for local development

poetry_environment: create_venv
poetry install

check_dev_environment:
ifneq (,$(wildcard ./.env))
echo "Dev environment already configured."
exit 1
install_trunk:
$(eval trunk_installed=$(shell trunk --version > /dev/null 2>&1 ; echo $$? ))
ifneq (${trunk_installed},0)
$(eval OS_NAME=$(shell uname -s | tr A-Z a-z))
curl https://get.trunk.io -fsSL | bash
endif

configure_pyright:
trunk actions run configure-pyright-with-pyenv
uninstall_trunk:
sudo rm -if `which trunk`
rm -ifr ${HOME}/.cache/trunk

dev_install: install_trunk check_dev_environment
# Sets up a local dev environment
configure_pyright:
trunk actions run configure-pyright

create_env:
# Copy .env
cp .env.example .env

setup: install_trunk create_env ## Sets up a local dev environment using Poetry
# Install pip
pip install --upgrade pip
# Install poetry

# Install poetry & dependencies
pip install "poetry==1.8.1"
poetry install

make poetry_environment
make configure_pyright

create_venv:
-pyenv deactivate
pyenv virtualenv 3.10 backend
pyenv activate backend

install_trunk:
$(eval trunk_installed=$(shell trunk --version > /dev/null 2>&1 ; echo $$? ))
ifneq (${trunk_installed},0)
$(eval OS_NAME=$(shell uname -s | tr A-Z a-z))
curl https://get.trunk.io -fsSL | bash
endif
setup_with_pyenv: install_trunk create_env ## Sets up a local dev environment using Pyenv
$(eval venv_name=$(shell grep 'venv =' pyproject.toml | cut -d '"' -f 2 ))
if [ -n "$(venv_name)" ] && ! pyenv versions --bare | grep -q "^$(venv_name)$$"; then \
$(eval python_version=$(shell grep 'python =' pyproject.toml | cut -d '"' -f 2 | sed 's/^\^//')) \
$(eval pyenv_version=$(shell pyenv versions --bare | grep$(python_version) )) \
pyenv virtualenv $(pyenv_version) $(venv_name); \
fi
@eval "$$(pyenv init -)" && \
pyenv activate $(venv_name) && \
poetry install

uninstall_trunk:
sudo rm -if `which trunk`
rm -ifr ${HOME}/.cache/trunk
make configure_pyright
10 changes: 5 additions & 5 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "navigator_backend"
version = "1.19.12"
version = "1.19.15"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand All @@ -10,7 +10,7 @@ python = "^3.10"
Authlib = "^0.15.5"
bcrypt = "^3.2.0"
boto3 = "^1.26"
cpr_sdk = { version = "1.9.3", extras = ["vespa"] }
cpr_sdk = { version = "1.9.5", extras = ["vespa"] }
fastapi = "^0.104.1"
fastapi-health = "^0.4.0"
fastapi-pagination = { extras = ["sqlalchemy"], version = "^0.12.19" }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
schema document_passage {

field text_block_not_stemmed type string {
indexing: input text_block | summary | index
stemming: none
}

document document_passage {

field search_weights_ref type reference<search_weights> {
Expand Down Expand Up @@ -134,14 +139,55 @@ schema document_passage {
summary concepts {}
}

document-summary search_summary_with_tokens {
summary family_name {}
summary family_description {}
summary family_import_id {}
summary family_slug {}
summary family_category {}
summary family_publication_ts {}
summary family_geography {}
summary family_geographies {}
summary family_source {}
summary document_import_id {}
summary document_slug {}
summary document_languages {}
summary document_content_type {}
summary document_cdn_object {}
summary document_source_url {}
summary corpus_import_id {}
summary corpus_type_name {}
summary metadata {}
summary text_block {}
summary text_block_id {}
summary text_block_type {}
summary text_block_page {}
summary text_block_coords {}
summary concepts {}
summary text_block_tokens {
source: text_block
tokens
}
}

rank-profile exact inherits default {
function text_score() {
expression: attribute(passage_weight) * fieldMatch(text_block)
}
first-phase {
expression: text_score()
}
match-features: text_score()
match-features: text_score() fieldMatch(text_block)
}

rank-profile exact_not_stemmed inherits default {
function text_score() {
expression: attribute(passage_weight) * fieldMatch(text_block_not_stemmed)
}
first-phase {
expression: text_score()
}
match-features: text_score() fieldMatch(text_block)
}

rank-profile hybrid_no_closeness inherits default {
Expand All @@ -151,7 +197,7 @@ schema document_passage {
first-phase {
expression: text_score()
}
match-features: text_score()
match-features: text_score() bm25(text_block)
}

rank-profile hybrid inherits default {
Expand All @@ -164,6 +210,20 @@ schema document_passage {
first-phase {
expression: text_score()
}
match-features: text_score()
match-features: text_score() bm25(text_block) closeness(text_embedding)
}

rank-profile hybrid_custom_weight inherits default {
inputs {
query(query_embedding) tensor<float>(x[768])
query(bm25_weight) double
}
function text_score() {
expression: attribute(passage_weight) * (query(bm25_weight) * bm25(text_block) + closeness(text_embedding))
}
first-phase {
expression: text_score()
}
match-features: text_score() bm25(text_block) closeness(text_embedding)
}
}
Loading

0 comments on commit 38720b1

Please sign in to comment.