diff --git a/.github/workflows/pr_testing.yml b/.github/workflows/pr_testing.yml index d718f6bfb..88b6ec4ba 100644 --- a/.github/workflows/pr_testing.yml +++ b/.github/workflows/pr_testing.yml @@ -46,6 +46,11 @@ on: type: boolean required: false default: false + run-s3: + description: "Run S3 Datasets Tests" + type: boolean + required: false + default: false # Limit CI to cancel previous runs in the same PR concurrency: @@ -108,8 +113,6 @@ jobs: # Output to GitHub Actions expected format echo "matrix=$joined" >> $GITHUB_OUTPUT - - run-python-tests: name: Main Python Tests needs: [get-msg, get-py-ver-matrix] @@ -148,7 +151,7 @@ jobs: run: uv run ruff check . - name: Run Tests - run: uv run pytest tests/ -m "not (snowflake or mysql or postgres or sf_masked)" -rs + run: uv run pytest tests/ -m "not (snowflake or mysql or postgres or sf_masked or s3)" -rs run-defog-daily-update: name: Run DEFOG Daily Update @@ -232,3 +235,18 @@ jobs: python-versions: ${{ github.event_name == 'workflow_dispatch' && needs.get-py-ver-matrix.outputs.matrix || '["3.10", "3.11", "3.12"]' }} + + run-s3-tests: + name: S3 datasets Tests + needs: [get-msg, get-py-ver-matrix] + if: | + (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run all]')) || + (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run s3]')) || + (github.event_name == 'workflow_dispatch' && (inputs.run-all || inputs.run-s3)) + uses: ./.github/workflows/s3_testing.yml + secrets: + READ_LLM_FIXTURES_ROLE: ${{ secrets.READ_LLM_FIXTURES_ROLE }} + with: + python-versions: ${{ github.event_name == 'workflow_dispatch' + && needs.get-py-ver-matrix.outputs.matrix + || '["3.10", "3.11", "3.12"]' }} diff --git a/.github/workflows/s3_testing.yml b/.github/workflows/s3_testing.yml new file mode 100644 index 000000000..21285036a --- /dev/null +++ b/.github/workflows/s3_testing.yml @@ -0,0 +1,55 @@ +name: Run S3 Tests + +on: + workflow_call: + inputs: + python-versions: + description: "JSON string of Python versions" + type: string + required: true + secrets: + READ_LLM_FIXTURES_ROLE: + required: true + +jobs: + s3-tests: + name: S3 Tests (Python ${{ matrix.python-version }}) + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + strategy: + matrix: + python-version: ${{ fromJSON(inputs.python-versions) }} + + steps: + - name: Configure AWS Credentials (OIDC) + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.READ_LLM_FIXTURES_ROLE }} + aws-region: us-east-2 + + - uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + id: setup-python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.4.23" + + - name: Create virtual environment + run: uv venv + + - name: Install dependencies + run: uv pip install -e ".[boto3]" + + - name: Confirm all connectors are installed + run: uv run python -c "import boto3; print('Boto3 installed')" + + - name: Run S3 Tests + run: uv run pytest -m s3 tests/ -rs diff --git a/pyproject.toml b/pyproject.toml index 74502bbe0..a1ee52a9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ dev-dependencies = [ "pytest", "ruff==0.6.7", "pytest-repeat", + "boto3", "pydough[snowflake]", "pydough[mysql]", "pydough[postgres]", diff --git a/pytest.ini b/pytest.ini index af2436d3a..94c12569f 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,3 +6,4 @@ markers = postgres: marks tests that require PostgresSQL credentials server: marks tests that require api mock server sf_masked: marks tests that require Snowflake Masked credentials + s3: marks tests that require custom datasets from s3 diff --git a/tests/conftest.py b/tests/conftest.py index 2dcc763c5..dcc74b605 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,9 +10,11 @@ from collections.abc import Callable from functools import cache +import boto3 import httpx import pandas as pd import pytest +from botocore.exceptions import ClientError import pydough import pydough.pydough_operators as pydop @@ -202,21 +204,14 @@ def sample_graph_names(request) -> str: @pytest.fixture(scope="session") -def get_test_graph_by_name() -> graph_fetcher: +def get_custom_datasets_graph() -> graph_fetcher: """ - Returns a known test graph requested if the graph location was included in test_graph_location. + Returns the graph for the given custom dataset name. """ - test_graph_location: dict[str, str] = { - "synthea": "synthea_graph.json", - "world_development_indicators": "world_development_indicators_graph.json", - "keywords": "reserved_words_graph.json", - "keywords_case_insensitive": "reserved_words_graph.json", - } @cache def impl(name: str) -> GraphMetadata: - file_name: str = test_graph_location[name] - path: str = f"{os.path.dirname(__file__)}/test_metadata/{file_name}" + path: str = f"{os.path.dirname(__file__)}/test_metadata/{name}_graph.json" return pydough.parse_json_metadata_from_file(file_path=path, graph_name=name) return impl @@ -644,44 +639,223 @@ def sqlite_cryptbank_connection() -> DatabaseContext: @pytest.fixture(scope="session") -def sqlite_custom_datasets_connection() -> DatabaseContext: +def sqlite_custom_datasets_connection() -> Callable[[str], DatabaseContext]: """ - Returns the SQLITE database connection with all the custom datasets attached. + This fixture is used to connect to the sqlite database of the custom datasets. + Returns a DatabaseContext for the given custom database name. """ - gen_data_path: str = "tests/gen_data" - # Dataset tuple format: (schema_name, db_file_name, init_sql_file_name) - SQLite_datasets: list[tuple[str, str, str]] = [ - ("synthea", "synthea.db", "init_synthea_sqlite.sql"), - ("wdi", "world_development_indicators.db", "init_world_indicators_sqlite.sql"), - ("keywords", "reserved_words.db", "init_reserved_words_sqlite.sql"), - ] + custom_datasets_dir: str = "tests/gen_data" + # Setup the directory to be the main PyDough directory. + base_dir: str = os.path.dirname(os.path.dirname(__file__)) + + # Construct the full path to the datasets directory + full_dir_path: str = os.path.join(base_dir, custom_datasets_dir) + + @cache + def _impl(database_name: str) -> DatabaseContext: + connection: sqlite3.Connection + + file_path: str = os.path.join(full_dir_path, f"{database_name}.db") + + if not os.path.exists(file_path): + init_sql: str = f"{full_dir_path}/init_{database_name}_sqlite.sql" + + if not os.path.exists(init_sql): + raise PyDoughTestingException( + f"Cannot find database file '{file_path}' or " + f"initialization script '{init_sql}'" + ) + + subprocess.run(f"sqlite3 {file_path} < {init_sql}", shell=True, check=True) + + connection = sqlite3.connect(":memory:") + connection.execute(f"ATTACH DATABASE '{file_path}' AS {database_name}") + + return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE) + + return _impl + + +S3_DATASETS = ["synthea", "world_development_indicators"] +""" + Contains the name of all the custom datasets that will be used for testing. + This includes the datasets from S3 and initialized with a .sql file. +""" +S3_DATASETS_SCRIPTS = { + "world_development_indicators": "init_world_indicators_sqlite", +} +""" + Maps the datasets that need to be built with a sql script, with the name of + the script file. These datasets are NOT downloaded from S3. +""" + + +def get_s3_client() -> boto3.Session.client: + """ + Generates an S3 client with the stablished credentials. For CI assumes OIDC + credentials provided by Github Actions + + Returns: + The client of the created boto3 session + """ + session: boto3.Session + if is_ci(): + # Running in GitHub Actions CI — OIDC role will be assumed automatically + # Assuming in CI has aws-actions/configure-aws-credentials + session = boto3.Session() + else: + # Local development — use credentials stored in environment + # variables or ~/.aws/credentials + session = boto3.Session( + aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + region_name="us-east-2", + ) + + return session.client("s3") + + +def get_s3_datasets( + s3_client: boto3.Session.client, + data_folder: str, + metadata_folder: str, + datasets: list[str], + scripts: dict[str, str], +) -> None: + """ + Sets up the data and metadata for s3 datasets testing. This includes + downloading the data (.db file) and metadata (.json file) from llm-fixtures + bucket and place them in data_folder and metadata_folder respectvely. Also, + includes executing the init script avaliable in S3_DATASETS_SCRIPTS, + when the script is executed the metadata must be created manually for testing + """ + + bucket: str = "llm-fixtures" + + for dataset in datasets: + db_file: str = f"{data_folder}/{dataset}.db" + exists_db_file: bool = os.path.exists(db_file) - # List of shell commands required to re-create all the db files - commands: list[str] = [f"cd {gen_data_path}"] - # Collect all db_file_names into the rm command - rm_command: str = "rm -fv " + " ".join( - db_file for (_, db_file, _) in SQLite_datasets + # Database setup + if not exists_db_file: + if dataset in scripts: + # setting up with script + # assuming the metadata is already available in the metadata folder + init_sql = f"{data_folder}/{scripts[dataset]}.sql" + subprocess.run( + f"sqlite3 {db_file} < {init_sql}", shell=True, check=True + ) + + else: + # Download from s3 + + key_data: str = f"data/{dataset}.db" + + try: + s3_client.download_file(bucket, key_data, db_file) + except ClientError as e: + if e.response["Error"]["Code"] == "404": + print(f"The file {key_data} does not exist in bucket {bucket}.") + else: + raise + + # Download metadata from S3 + if dataset not in scripts: + local_metadata_path: str = f"{metadata_folder}/{dataset}_graph.json" + key_metadata: str = f"metadata/{dataset}.json" + try: + s3_client.download_file(bucket, key_metadata, local_metadata_path) + except ClientError as e: + if e.response["Error"]["Code"] == "404": + print(f"The file {key_metadata} does not exist in bucket {bucket}.") + else: + raise + + +def remove_s3_custom_metadata( + metadata_folder: str, datasets: list[str], scripts: dict[str, str] +) -> None: + """ + Removes the metadata file of s3 datasets + """ + metadata_path: str + + for dataset in datasets: + if dataset not in scripts: + # delete the metadata only for datasets from s3 + metadata_path = f"{metadata_folder}/{dataset}_graph.json" + try: + os.remove(metadata_path) + except FileNotFoundError: + print(f"Error: File '{metadata_path}' not found.") + except Exception as e: + print(f"An error occurred: {e}") + + +@pytest.fixture(scope="session") +def s3_datasets_setup(): + """ + Sets up all s3 datasets for testing downloading or running the init + script. After the test are done it removes the metadata files. + """ + data_folder: str = "./tests/gen_data" + metadata_folder: str = "./tests/test_metadata" + + # Create the client + s3_client: boto3.Session.client = get_s3_client() + + get_s3_datasets( + s3_client, + data_folder, + metadata_folder, + S3_DATASETS, + S3_DATASETS_SCRIPTS, ) - commands.append(rm_command) - # Add one sqlite3 command per dataset - for _, db_file, init_sql in SQLite_datasets: - commands.append(f"sqlite3 {db_file} < {init_sql}") - # Get the shell commands required to re-create all the db files - shell_cmd: str = "; ".join(commands) + yield + + remove_s3_custom_metadata(metadata_folder, S3_DATASETS, S3_DATASETS_SCRIPTS) + + +@pytest.fixture(scope="session") +def get_s3_datasets_graph(s3_datasets_setup) -> graph_fetcher: + """ + Returns the graph for the given s3 dataset name. + """ + + @cache + def impl(name: str) -> GraphMetadata: + path: str = f"{os.path.dirname(__file__)}/test_metadata/{name}_graph.json" + return pydough.parse_json_metadata_from_file(file_path=path, graph_name=name) + + return impl + + +@pytest.fixture(scope="session") +def sqlite_s3_datasets_connection( + s3_datasets_setup, +) -> Callable[[str], DatabaseContext]: + """ + This fixture is used to connect the sqlite database of the s3 datasets. + Returns a DatabaseContext for the given S3 database_name. + """ + s3_datasets_dir: str = "tests/gen_data" # Setup the directory to be the main PyDough directory. base_dir: str = os.path.dirname(os.path.dirname(__file__)) - # Setup the world development indicators database. - subprocess.run(shell_cmd, shell=True, check=True) - # Central in-memory connection - connection: sqlite3.Connection = sqlite3.connect(":memory:") - # Use (schema_name, db_file_name info) on SQLite_datasets to ATTACH DBs - for schema, db_file, _ in SQLite_datasets: - path: str = os.path.join(base_dir, gen_data_path, db_file) - connection.execute(f"ATTACH DATABASE '{path}' AS {schema}") + # Construct the full path to the datasets directory + full_dir_path: str = os.path.join(base_dir, s3_datasets_dir) - return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE) + @cache + def _impl(database_name: str) -> DatabaseContext: + connection: sqlite3.Connection + + file_path: str = os.path.join(full_dir_path, f"{database_name}.db") + connection = sqlite3.connect(file_path) + + return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE) + + return _impl SF_ENVS = ["SF_USERNAME", "SF_PASSWORD", "SF_ACCOUNT"] diff --git a/tests/gen_data/init_reserved_words_sqlite.sql b/tests/gen_data/init_keywords_sqlite.sql similarity index 100% rename from tests/gen_data/init_reserved_words_sqlite.sql rename to tests/gen_data/init_keywords_sqlite.sql diff --git a/tests/test_metadata/reserved_words_graph.json b/tests/test_metadata/keywords_graph.json similarity index 100% rename from tests/test_metadata/reserved_words_graph.json rename to tests/test_metadata/keywords_graph.json diff --git a/tests/test_metadata/synthea_graph.json b/tests/test_metadata/synthea_graph.json deleted file mode 100644 index 5ee9607a8..000000000 --- a/tests/test_metadata/synthea_graph.json +++ /dev/null @@ -1,1695 +0,0 @@ -[ - { - "name": "synthea", - "version": "V2", - "collections": [ - { - "name": "all_prevalences", - "type": "simple table", - "table path": "synthea.all_prevalences", - "unique properties": [ - "ITEM" - ], - "properties": [ - { - "name": "ITEM", - "type": "table column", - "column name": "ITEM", - "data type": "string", - "description": "Name of the item/condition/medication measured for prevalence.", - "sample values": [ - "Viral sinusitis (disorder)", - "Streptococcal sore throat (disorder)", - "Laceration of thigh" - ], - "synonyms": [] - }, - { - "name": "POPULATION_TYPE", - "type": "table column", - "column name": "`POPULATION TYPE`", - "data type": "string", - "description": "Population category used to compute prevalence.", - "sample values": [ - "LIVING" - ], - "synonyms": [] - }, - { - "name": "OCCURRENCES", - "type": "table column", - "column name": "OCCURRENCES", - "data type": "numeric", - "description": "Number of occurrences observed.", - "sample values": [ - 868, - 487, - 117 - ], - "synonyms": [] - }, - { - "name": "POPULATION_COUNT", - "type": "table column", - "column name": "`POPULATION COUNT`", - "data type": "numeric", - "description": "Population size used as denominator.", - "sample values": [ - 1000 - ], - "synonyms": [] - }, - { - "name": "PREVALENCE_RATE", - "type": "table column", - "column name": "`PREVALENCE RATE`", - "data type": "numeric", - "description": "Rate of occurrence (0-1).", - "sample values": [ - 0.868, - 0.487, - 0.117 - ], - "synonyms": [] - }, - { - "name": "PREVALENCE_PERCENTAGE", - "type": "table column", - "column name": "`PREVALENCE PERCENTAGE`", - "data type": "numeric", - "description": "Rate of occurrence as a percentage (0-100).", - "sample values": [ - 86.8, - 48.7, - 11.7 - ], - "synonyms": [] - } - ], - "description": "Aggregated prevalence metrics for simulated conditions, procedures, and medications across the synthetic population, including counts and rates.", - "synonyms": [] - }, - { - "name": "allergies", - "type": "simple table", - "table path": "synthea.allergies", - "unique properties": [ - "PATIENT", - "ENCOUNTER", - "CODE" - ], - "properties": [ - { - "name": "START", - "type": "table column", - "column name": "START", - "data type": "string", - "description": "Start timestamp/date of the record.", - "sample values": [ - "2/5/31", - "1/8/88", - "12/7/61" - ], - "synonyms": [] - }, - { - "name": "STOP", - "type": "table column", - "column name": "STOP", - "data type": "string", - "description": "End timestamp/date of the record.", - "sample values": [ - "5/26/08" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "273d3878-e990-4093-a261-9593886ba01c", - "0f67780a-2a08-46c6-8a4a-c585fb33a80a" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "numeric", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - 232347008, - 91935009, - 91934008 - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Dander (animal) allergy", - "Allergy to peanuts", - "Allergy to nut" - ], - "synonyms": [] - } - ], - "description": "Allergy/intolerance records linked to a specific encounter and patient, with SNOMED code and description.", - "synonyms": [] - }, - { - "name": "careplans", - "type": "simple table", - "table path": "synthea.careplans", - "unique properties": [ - [ - "ID", - "START", - "STOP", - "PATIENT", - "ENCOUNTER", - "CODE", - "DESCRIPTION", - "REASONCODE", - "REASONDESCRIPTION" - ] - ], - "properties": [ - { - "name": "ID", - "type": "table column", - "column name": "ID", - "data type": "string", - "description": "Unique identifier.", - "sample values": [ - "02b14bb0-500f-44e1-acb5-a20e9d92c747", - "a5ad694c-7746-490a-9d7e-df96884877c0", - "20ea9719-0399-41df-91d6-2d4b14444962" - ], - "synonyms": [] - }, - { - "name": "START", - "type": "table column", - "column name": "START", - "data type": "datetime", - "description": "Start timestamp/date of the record.", - "sample values": [ - "1931-02-05", - "1987-12-27", - "1988-01-08" - ], - "synonyms": [] - }, - { - "name": "STOP", - "type": "table column", - "column name": "STOP", - "data type": "datetime", - "description": "End timestamp/date of the record.", - "sample values": [ - "2008-10-20", - "2008-05-25", - "2012-07-23" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "273d3878-e990-4093-a261-9593886ba01c", - "0ad03aae-20cd-408e-a235-56b1fb96fa98" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "numeric", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - 326051000000105.0, - 58332002.0, - 409002.0 - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Self care", - "Allergy education", - "Food allergy diet" - ], - "synonyms": [] - }, - { - "name": "REASONCODE", - "type": "table column", - "column name": "REASONCODE", - "data type": "numeric", - "description": "Code indicating the reason for the event/record.", - "sample values": [ - 72892002, - 301011002, - 283371005 - ], - "synonyms": [] - }, - { - "name": "REASONDESCRIPTION", - "type": "table column", - "column name": "REASONDESCRIPTION", - "data type": "string", - "description": "Text describing the reason for the event/record.", - "sample values": [ - "Normal pregnancy", - "Escherichia coli urinary tract infection", - "Laceration of forearm" - ], - "synonyms": [] - } - ], - "description": "Care plan activities spanning a period for a patient and encounter, including reason codes.", - "synonyms": [] - }, - { - "name": "claims", - "type": "simple table", - "table path": "synthea.claims", - "unique properties": [ - "ID" - ], - "properties": [ - { - "name": "ID", - "type": "table column", - "column name": "ID", - "data type": "string", - "description": "Unique identifier.", - "sample values": [ - "0a66b603-1e91-4a39-8114-efaaa43b7ca3", - "58a729c2-8d75-4bcc-8ce2-e9114632df95", - "a9e7974e-23f6-4caf-bd59-10c507b3d7f4" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "BILLABLEPERIOD", - "type": "table column", - "column name": "BILLABLEPERIOD", - "data type": "datetime", - "description": "Billing period start date for the claim.", - "sample values": [ - "2017-10-18", - "2008-03-07", - "2009-02-22" - ], - "synonyms": [] - }, - { - "name": "ORGANIZATION", - "type": "table column", - "column name": "ORGANIZATION", - "data type": "string", - "description": "Submitting or responsible organization.", - "sample values": [ - "temp organization" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "b2efa4a2-c481-4804-a96d-429da47b47de", - "38684507-7e9d-4fae-8838-60d751e450fe" - ], - "synonyms": [] - }, - { - "name": "DIAGNOSIS", - "type": "table column", - "column name": "DIAGNOSIS", - "data type": "string", - "description": "Primary diagnosis code/text on the claim.", - "sample values": [ - "Viral sinusitis (disorder)", - "Normal pregnancy", - "Escherichia coli urinary tract infection" - ], - "synonyms": [] - }, - { - "name": "TOTAL", - "type": "table column", - "column name": "TOTAL", - "data type": "numeric", - "description": "Total amount for the claim.", - "sample values": [ - 100 - ], - "synonyms": [] - } - ], - "description": "Billing claims associated with encounters and patients, including diagnosis and total amount.", - "synonyms": [] - }, - { - "name": "conditions", - "type": "simple table", - "table path": "synthea.conditions", - "unique properties": [ - [ - "START", - "STOP", - "PATIENT", - "ENCOUNTER", - "CODE", - "DESCRIPTION" - ] - ], - "properties": [ - { - "name": "START", - "type": "table column", - "column name": "START", - "data type": "datetime", - "description": "Start timestamp/date of the record.", - "sample values": [ - "1931-01-25", - "1946-05-05", - "2009-02-12" - ], - "synonyms": [] - }, - { - "name": "STOP", - "type": "table column", - "column name": "STOP", - "data type": "datetime", - "description": "End timestamp/date of the record.", - "sample values": [ - "2009-03-03", - "2008-10-20", - "2008-05-25" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "b2efa4a2-c481-4804-a96d-429da47b47de", - "273d3878-e990-4093-a261-9593886ba01c" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "numeric", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - 367498001, - 38341003, - 444814009 - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Seasonal allergic rhinitis", - "Hypertension", - "Viral sinusitis (disorder)" - ], - "synonyms": [] - } - ], - "description": "Diagnosed conditions for a patient and encounter, including code and textual description.", - "synonyms": [] - }, - { - "name": "encounters", - "type": "simple table", - "table path": "synthea.encounters", - "unique properties": [ - "ID" - ], - "properties": [ - { - "name": "ID", - "type": "table column", - "column name": "ID", - "data type": "string", - "description": "Unique identifier.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "b2efa4a2-c481-4804-a96d-429da47b47de", - "38684507-7e9d-4fae-8838-60d751e450fe" - ], - "synonyms": [] - }, - { - "name": "DATE", - "type": "table column", - "column name": "DATE", - "data type": "datetime", - "description": "Event date/time.", - "sample values": [ - "2008-03-07", - "2009-02-22", - "2009-05-01" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "numeric", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - 185349003, - 185345009, - 308646001 - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Outpatient Encounter", - "Encounter for symptom", - "Death Certification" - ], - "synonyms": [] - }, - { - "name": "REASONCODE", - "type": "table column", - "column name": "REASONCODE", - "data type": "numeric", - "description": "Code indicating the reason for the event/record.", - "sample values": [ - 444814009, - 72892002, - 301011002 - ], - "synonyms": [] - }, - { - "name": "REASONDESCRIPTION", - "type": "table column", - "column name": "REASONDESCRIPTION", - "data type": "string", - "description": "Text describing the reason for the event/record.", - "sample values": [ - "Viral sinusitis (disorder)", - "Normal pregnancy", - "Escherichia coli urinary tract infection" - ], - "synonyms": [] - } - ], - "description": "Clinical encounters (visits) for patients, including date, visit code/description, and reason codes.", - "synonyms": [] - }, - { - "name": "immunizations", - "type": "simple table", - "table path": "synthea.immunizations", - "unique properties": [ - "DATE", - "PATIENT", - "ENCOUNTER", - "CODE" - ], - "properties": [ - { - "name": "DATE", - "type": "table column", - "column name": "DATE", - "data type": "datetime", - "description": "Event date/time.", - "sample values": [ - "2008-03-07", - "2009-05-01", - "2010-04-30" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "38684507-7e9d-4fae-8838-60d751e450fe", - "294cfdf7-dffc-46cc-8b7f-ce3377b52385" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "numeric", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - 140, - 113 - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Influenza seasonal injectable preservative free", - "Td (adult) preservative free" - ], - "synonyms": [] - } - ], - "description": "Immunization events administered to a patient during an encounter.", - "synonyms": [] - }, - { - "name": "medications", - "type": "simple table", - "table path": "synthea.medications", - "unique properties": [ - "START", - "PATIENT", - "ENCOUNTER", - "CODE" - ], - "properties": [ - { - "name": "START", - "type": "table column", - "column name": "START", - "data type": "datetime", - "description": "Start timestamp/date of the record.", - "sample values": [ - "1970-05-10", - "2009-02-22", - "1988-01-08" - ], - "synonyms": [] - }, - { - "name": "STOP", - "type": "table column", - "column name": "STOP", - "data type": "datetime", - "description": "End timestamp/date of the record.", - "sample values": [ - "2009-03-03", - "2002-07-28", - "2008-05-25" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "b2efa4a2-c481-4804-a96d-429da47b47de", - "273d3878-e990-4093-a261-9593886ba01c" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "numeric", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - 834101, - 824184, - 141918 - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Penicillin V Potassium 500 MG", - "Amoxicillin 250 MG / Clavulanate 125 MG [Augmentin]", - "Terfenadine 60 MG Oral Tablet" - ], - "synonyms": [] - }, - { - "name": "REASONCODE", - "type": "table column", - "column name": "REASONCODE", - "data type": "numeric", - "description": "Code indicating the reason for the event/record.", - "sample values": [ - 43878008, - 444814009, - 301011002 - ], - "synonyms": [] - }, - { - "name": "REASONDESCRIPTION", - "type": "table column", - "column name": "REASONDESCRIPTION", - "data type": "string", - "description": "Text describing the reason for the event/record.", - "sample values": [ - "Streptococcal sore throat (disorder)", - "Viral sinusitis (disorder)", - "Escherichia coli urinary tract infection" - ], - "synonyms": [] - } - ], - "description": "Medications prescribed or administered to a patient, with start/stop, codes, and reasons.", - "synonyms": [] - }, - { - "name": "observations", - "type": "simple table", - "table path": "synthea.observations", - "unique properties": [ - [ - "DATE", - "PATIENT", - "ENCOUNTER", - "CODE", - "DESCRIPTION", - "VALUE", - "UNITS" - ] - ], - "properties": [ - { - "name": "DATE", - "type": "table column", - "column name": "DATE", - "data type": "datetime", - "description": "Event date/time.", - "sample values": [ - "2008-03-07", - "2009-05-01", - "2010-04-30" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "38684507-7e9d-4fae-8838-60d751e450fe", - "294cfdf7-dffc-46cc-8b7f-ce3377b52385" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "string", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - "8302-2", - "29463-7", - "39156-5" - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Body Height", - "Body Weight", - "Body Mass Index" - ], - "synonyms": [] - }, - { - "name": "VALUE", - "type": "table column", - "column name": "VALUE", - "data type": "numeric", - "description": "Measured numeric value.", - "sample values": [ - 174.71, - 102.33, - 33.53 - ], - "synonyms": [] - }, - { - "name": "UNITS", - "type": "table column", - "column name": "UNITS", - "data type": "string", - "description": "Units of measure for VALUE.", - "sample values": [ - "cm", - "kg", - "kg/m2" - ], - "synonyms": [] - } - ], - "description": "Clinical observations and measurements (e.g., vitals, labs) for a patient during an encounter.", - "synonyms": [] - }, - { - "name": "patients", - "type": "simple table", - "table path": "synthea.patients", - "unique properties": [ - "patient" - ], - "properties": [ - { - "name": "patient", - "type": "table column", - "column name": "patient", - "data type": "string", - "description": "Patient field.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "304ef151-bcfb-4f05-94f3-a7598efac77e", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "birthdate", - "type": "table column", - "column name": "birthdate", - "data type": "datetime", - "description": "Date of birth.", - "sample values": [ - "1927-06-18", - "1961-01-13", - "1961-03-08" - ], - "synonyms": [] - }, - { - "name": "deathdate", - "type": "table column", - "column name": "deathdate", - "data type": "datetime", - "description": "Date of death (if any).", - "sample values": [ - "2011-02-19", - "2010-07-05", - "2009-08-05" - ], - "synonyms": [] - }, - { - "name": "ssn", - "type": "table column", - "column name": "ssn", - "data type": "string", - "description": "Social Security Number (synthetic).", - "sample values": [ - "999-47-5449", - "999-66-9307", - "999-57-1876" - ], - "synonyms": [] - }, - { - "name": "drivers", - "type": "table column", - "column name": "drivers", - "data type": "string", - "description": "Driver license number (synthetic).", - "sample values": [ - "S99956925", - "S99934260", - "S99971477" - ], - "synonyms": [] - }, - { - "name": "passport", - "type": "table column", - "column name": "passport", - "data type": "string", - "description": "Passport number (synthetic).", - "sample values": [ - "X76969056X", - "X19125077X", - "X20762697X" - ], - "synonyms": [] - }, - { - "name": "prefix", - "type": "table column", - "column name": "prefix", - "data type": "string", - "description": "Name prefix (e.g., Mr., Ms.).", - "sample values": [ - "Mr.", - "Mrs.", - "Ms." - ], - "synonyms": [] - }, - { - "name": "first", - "type": "table column", - "column name": "first", - "data type": "string", - "description": "Given name.", - "sample values": [ - "Shane", - "Courtney", - "Darin" - ], - "synonyms": [] - }, - { - "name": "last", - "type": "table column", - "column name": "last", - "data type": "string", - "description": "Family name.", - "sample values": [ - "Schumm", - "Lindgren", - "Weissnat" - ], - "synonyms": [] - }, - { - "name": "suffix", - "type": "table column", - "column name": "suffix", - "data type": "string", - "description": "Name suffix (e.g., Jr.).", - "sample values": [], - "synonyms": [] - }, - { - "name": "maiden", - "type": "table column", - "column name": "maiden", - "data type": "string", - "description": "Maiden name.", - "sample values": [ - "Schowalter", - "Cremin", - "McLaughlin" - ], - "synonyms": [] - }, - { - "name": "marital", - "type": "table column", - "column name": "marital", - "data type": "string", - "description": "Marital status.", - "sample values": [ - "M", - "S" - ], - "synonyms": [] - }, - { - "name": "race", - "type": "table column", - "column name": "race", - "data type": "string", - "description": "Race (synthetic value).", - "sample values": [ - "white", - "asian", - "hispanic" - ], - "synonyms": [] - }, - { - "name": "ethnicity", - "type": "table column", - "column name": "ethnicity", - "data type": "string", - "description": "Ethnicity (synthetic value).", - "sample values": [ - "italian", - "french", - "chinese" - ], - "synonyms": [] - }, - { - "name": "gender", - "type": "table column", - "column name": "gender", - "data type": "string", - "description": "Gender.", - "sample values": [ - "M", - "F" - ], - "synonyms": [] - }, - { - "name": "birthplace", - "type": "table column", - "column name": "birthplace", - "data type": "string", - "description": "Place of birth.", - "sample values": [ - "Pittsfield MA US", - "Orange MA US", - "Lynn MA US" - ], - "synonyms": [] - }, - { - "name": "address", - "type": "table column", - "column name": "address", - "data type": "string", - "description": "Postal address.", - "sample values": [ - "8962 Conroy Shore Springfield MA 01109 US", - "379 Margaretta Extension Apt. 934 Brockton MA 02302 US", - "31047 McCullough Cliffs Methuen Town MA 01844 US" - ], - "synonyms": [] - } - ], - "description": "Synthetic patient master records with demographics and identifiers.", - "synonyms": [] - }, - { - "name": "procedures", - "type": "simple table", - "table path": "synthea.procedures", - "unique properties": [ - [ - "DATE", - "PATIENT", - "ENCOUNTER", - "CODE", - "DESCRIPTION", - "REASONCODE", - "REASONDESCRIPTION" - ] - ], - "properties": [ - { - "name": "DATE", - "type": "table column", - "column name": "DATE", - "data type": "datetime", - "description": "Event date/time.", - "sample values": [ - "2008-03-07", - "2009-05-01", - "2010-04-30" - ], - "synonyms": [] - }, - { - "name": "PATIENT", - "type": "table column", - "column name": "PATIENT", - "data type": "string", - "description": "Patient identifier linking to patients.patient.", - "sample values": [ - "5f615f2b-efb7-4032-8c88-dea2dc2eacbb", - "1d0e18c5-b47c-4196-b1d4-93b141098ae1", - "d3167a7a-9259-46f5-9a4c-4a8822dbcd1c" - ], - "synonyms": [] - }, - { - "name": "ENCOUNTER", - "type": "table column", - "column name": "ENCOUNTER", - "data type": "string", - "description": "Encounter identifier linking to encounters.ID.", - "sample values": [ - "adfcd316-7421-4e5c-a3c5-515b5c16d35b", - "38684507-7e9d-4fae-8838-60d751e450fe", - "294cfdf7-dffc-46cc-8b7f-ce3377b52385" - ], - "synonyms": [] - }, - { - "name": "CODE", - "type": "table column", - "column name": "CODE", - "data type": "numeric", - "description": "Coded value (often SNOMED/ICD/LOINC) representing the clinical concept.", - "sample values": [ - 428191000124101, - 252160004, - 237001001 - ], - "synonyms": [] - }, - { - "name": "DESCRIPTION", - "type": "table column", - "column name": "DESCRIPTION", - "data type": "string", - "description": "Human-readable description of the coded concept.", - "sample values": [ - "Documentation of current medications", - "Standard pregnancy test", - "Augmentation of labor" - ], - "synonyms": [] - }, - { - "name": "REASONCODE", - "type": "table column", - "column name": "REASONCODE", - "data type": "numeric", - "description": "Code indicating the reason for the event/record.", - "sample values": [ - 72892002, - 283371005, - 10509002 - ], - "synonyms": [] - }, - { - "name": "REASONDESCRIPTION", - "type": "table column", - "column name": "REASONDESCRIPTION", - "data type": "string", - "description": "Text describing the reason for the event/record.", - "sample values": [ - "Normal pregnancy", - "Laceration of forearm", - "Acute bronchitis (disorder)" - ], - "synonyms": [] - } - ], - "description": "Procedures performed on a patient during an encounter, with codes and reasons.", - "synonyms": [] - } - ], - "relationships": [ - { - "type": "simple join", - "name": "patient", - "parent collection": "allergies", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from allergies to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "allergies", - "original parent": "allergies", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of allergies.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "allergies", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from allergies to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "allergies", - "original parent": "allergies", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of allergies.encounter.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "careplans", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from careplans to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "careplans", - "original parent": "careplans", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of careplans.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "careplans", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from careplans to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "careplans", - "original parent": "careplans", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of careplans.encounter.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "claims", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from claims to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "claims", - "original parent": "claims", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of claims.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "claims", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from claims to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "claims", - "original parent": "claims", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of claims.encounter.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "conditions", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from conditions to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "conditions", - "original parent": "conditions", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of conditions.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "conditions", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from conditions to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "conditions", - "original parent": "conditions", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of conditions.encounter.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "encounters", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from encounters to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "encounters", - "original parent": "encounters", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of encounters.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "immunizations", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from immunizations to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "immunizations", - "original parent": "immunizations", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of immunizations.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "immunizations", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from immunizations to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "immunizations", - "original parent": "immunizations", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of immunizations.encounter.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "medications", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from medications to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "medications", - "original parent": "medications", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of medications.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "medications", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from medications to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "medications", - "original parent": "medications", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of medications.encounter.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "observations", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from observations to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "observations", - "original parent": "observations", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of observations.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "observations", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from observations to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "observations", - "original parent": "observations", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of observations.encounter.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "patient", - "parent collection": "procedures", - "child collection": "patients", - "singular": true, - "always matches": true, - "keys": { - "PATIENT": [ - "patient" - ] - }, - "description": "one-to-one link from procedures to patients on PATIENT \u2192 patient.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "procedures", - "original parent": "procedures", - "original property": "patient", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of procedures.patient.", - "synonyms": [] - }, - { - "type": "simple join", - "name": "encounter", - "parent collection": "procedures", - "child collection": "encounters", - "singular": true, - "always matches": true, - "keys": { - "ENCOUNTER": [ - "ID" - ] - }, - "description": "one-to-one link from procedures to encounters on ENCOUNTER \u2192 ID.", - "synonyms": [] - }, - { - "type": "reverse", - "name": "procedures", - "original parent": "procedures", - "original property": "encounter", - "singular": false, - "always matches": true, - "description": "Reverse one-to-many relationship of procedures.encounter.", - "synonyms": [] - } - ] - } -] diff --git a/tests/test_metadata/world_development_indicators_graph.json b/tests/test_metadata/world_development_indicators_graph.json index 71d908b62..b6f1b9f84 100644 --- a/tests/test_metadata/world_development_indicators_graph.json +++ b/tests/test_metadata/world_development_indicators_graph.json @@ -6,7 +6,7 @@ { "name": "Country", "type": "simple table", - "table path": "wdi.Country", + "table path": "main.Country", "unique properties": [ "CountryCode" ], @@ -421,7 +421,7 @@ { "name": "CountryNotes", "type": "simple table", - "table path": "wdi.CountryNotes", + "table path": "main.CountryNotes", "unique properties": [ "Countrycode", "Seriescode" @@ -473,7 +473,7 @@ { "name": "Footnotes", "type": "simple table", - "table path": "wdi.Footnotes", + "table path": "main.Footnotes", "unique properties": [ "Countrycode", "Seriescode", @@ -539,7 +539,7 @@ { "name": "Indicators", "type": "simple table", - "table path": "wdi.Indicators", + "table path": "main.Indicators", "unique properties": [ "CountryCode", "IndicatorCode", @@ -631,7 +631,7 @@ { "name": "Series", "type": "simple table", - "table path": "wdi.Series", + "table path": "main.Series", "unique properties": [ "SeriesCode" ], @@ -903,7 +903,7 @@ { "name": "SeriesNotes", "type": "simple table", - "table path": "wdi.SeriesNotes", + "table path": "main.SeriesNotes", "unique properties": [ "Seriescode", "Year" diff --git a/tests/test_pipeline_custom_datasets.py b/tests/test_pipeline_custom_datasets.py index cabc8a256..4ed2ba80d 100644 --- a/tests/test_pipeline_custom_datasets.py +++ b/tests/test_pipeline_custom_datasets.py @@ -14,105 +14,6 @@ @pytest.fixture( params=[ - pytest.param( - PyDoughPandasTest( - """ -result = ( - patients - .WHERE((gender == 'F') & (ethnicity == 'italian')) - .conditions - .PARTITION(name='condition_groups', by=DESCRIPTION) - .CALCULATE(condition_description=DESCRIPTION, occurrence_count=COUNT(conditions)) - .TOP_K(1, by=(occurrence_count.DESC(), condition_description.ASC())) - .CALCULATE(condition_description) -) - """, - "synthea", - lambda: pd.DataFrame( - { - "condition_description": ["Normal pregnancy"], - } - ), - "synthea_most_common_conditions", - ), - id="synthea_most_common_conditions", - ), - pytest.param( - PyDoughPandasTest( - """ -result = ( - world_development_indicators - .Country - .WHERE((IncomeGroup == 'Low income') & HAS(CountryNotes.WHERE(Series.SeriesCode == 'DT.DOD.DECT.CD'))) - .CALCULATE(country_code=CountryCode) -) - """, - "world_development_indicators", - lambda: pd.DataFrame( - { - "country_code": [ - "AFG", - "BDI", - "BEN", - "BFA", - "CAF", - "COM", - "ERI", - "ETH", - "GIN", - "GMB", - "GNB", - "HTI", - "KHM", - "LBR", - "MDG", - "MLI", - "MOZ", - "MWI", - "NER", - "NPL", - "RWA", - "SLE", - "SOM", - "TCD", - "TGO", - "TZA", - "UGA", - "ZAR", - "ZWE", - ], - } - ), - "wdi_low_income_country_with_series", - ), - id="wdi_low_income_country_with_series", - ), - pytest.param( - PyDoughPandasTest( - """ -result = ( - world_development_indicators - .Country - .WHERE(ShortName == 'Albania') - .Footnotes - .WHERE(Year == 'YR2012') - .CALCULATE(footnote_description=Description) -) - """, - "world_development_indicators", - lambda: pd.DataFrame( - { - "condition_description": [ - "As reported", - "Period: 2008-2012.Grouped consumption data.Growth rates are based on survey means of 2011 PPP$.Survey reference CPI years for the initial and final years are 2008 and 2012, respectively.", - "Source: Labour force survey. Coverage: Civilian. Coverage (unemployment): Not available. Age: 15-74. Coverage limitation: Excluding institutional population. Education: International Standard Classification of Education, 1997 version.", - ] - } - ), - "wdi_albania_footnotes_1978", - ), - id="wdi_albania_footnotes_1978", - ), pytest.param( PyDoughPandasTest( r""" @@ -295,6 +196,24 @@ ), id="keywords_quoted_table_name", ), + pytest.param( + PyDoughPandasTest( + """ +result = keywords.CALCULATE( + max_len=MAX(partition_.integer) +).calculate_.WHERE( + where_ == max_len +).CALCULATE(key=where_, len=length) + """, + "keywords", + lambda: pd.DataFrame({"key": [3], "len": [7]}), + "keywords_function_quoted_name", + ), + id="keywords_function_quoted_name", + marks=pytest.mark.skip( + "FIX: (issue #458): Invalid composed SQL alias where column_name is quoted." + ), + ), ], ) def custom_datasets_test_data(request) -> PyDoughPandasTest: @@ -307,7 +226,7 @@ def custom_datasets_test_data(request) -> PyDoughPandasTest: def test_pipeline_until_relational_custom_datasets( custom_datasets_test_data: PyDoughPandasTest, - get_test_graph_by_name: graph_fetcher, + get_custom_datasets_graph: graph_fetcher, get_plan_test_filename: Callable[[str], str], update_tests: bool, ) -> None: @@ -317,13 +236,13 @@ def test_pipeline_until_relational_custom_datasets( """ file_path: str = get_plan_test_filename(custom_datasets_test_data.test_name) custom_datasets_test_data.run_relational_test( - get_test_graph_by_name, file_path, update_tests + get_custom_datasets_graph, file_path, update_tests ) def test_pipeline_until_sql_custom_datasets( custom_datasets_test_data: PyDoughPandasTest, - get_test_graph_by_name: graph_fetcher, + get_custom_datasets_graph: graph_fetcher, empty_context_database: DatabaseContext, get_sql_test_filename: Callable[[str, DatabaseDialect], str], update_tests: bool, @@ -336,7 +255,7 @@ def test_pipeline_until_sql_custom_datasets( custom_datasets_test_data.test_name, empty_context_database.dialect ) custom_datasets_test_data.run_sql_test( - get_test_graph_by_name, + get_custom_datasets_graph, file_path, update_tests, empty_context_database, @@ -346,13 +265,15 @@ def test_pipeline_until_sql_custom_datasets( @pytest.mark.execute def test_pipeline_e2e_custom_datasets( custom_datasets_test_data: PyDoughPandasTest, - get_test_graph_by_name: graph_fetcher, - sqlite_custom_datasets_connection: DatabaseContext, + get_custom_datasets_graph: graph_fetcher, + sqlite_custom_datasets_connection: Callable[[str], DatabaseContext], ): """ Test executing the the custom queries with the custom datasets against the refsol DataFrame. """ custom_datasets_test_data.run_e2e_test( - get_test_graph_by_name, sqlite_custom_datasets_connection, coerce_types=True + get_custom_datasets_graph, + sqlite_custom_datasets_connection(custom_datasets_test_data.graph_name.lower()), + coerce_types=True, ) diff --git a/tests/test_pipeline_mysql.py b/tests/test_pipeline_mysql.py index d4cb3848d..adc0fb13d 100644 --- a/tests/test_pipeline_mysql.py +++ b/tests/test_pipeline_mysql.py @@ -563,19 +563,15 @@ def test_pipeline_e2e_mysql_defog( @pytest.mark.execute def test_pipeline_e2e_mysql_custom_datasets( custom_datasets_test_data: PyDoughPandasTest, # noqa: F811 - get_test_graph_by_name: graph_fetcher, + get_custom_datasets_graph: graph_fetcher, mysql_conn_db_context: Callable[[str], DatabaseContext], ): """ Test executing the the custom queries with the custom datasets against the refsol DataFrame. """ - # Just run the "keywords" tests - if custom_datasets_test_data.graph_name.lower() == "keywords": - custom_datasets_test_data.run_e2e_test( - get_test_graph_by_name, - mysql_conn_db_context(custom_datasets_test_data.graph_name.lower()), - coerce_types=True, - ) - else: - pytest.skip("Skipping non-keywords custom dataset tests for MySQL.") + custom_datasets_test_data.run_e2e_test( + get_custom_datasets_graph, + mysql_conn_db_context(custom_datasets_test_data.graph_name.lower()), + coerce_types=True, + ) diff --git a/tests/test_pipeline_postgres.py b/tests/test_pipeline_postgres.py index 063f0b7c8..6aa002f0d 100644 --- a/tests/test_pipeline_postgres.py +++ b/tests/test_pipeline_postgres.py @@ -562,17 +562,14 @@ def test_pipeline_e2e_postgres_defog( @pytest.mark.execute def test_pipeline_e2e_postgres_custom_datasets( custom_datasets_test_data: PyDoughPandasTest, # noqa: F811 - get_test_graph_by_name: graph_fetcher, + get_custom_datasets_graph: graph_fetcher, postgres_conn_db_context: DatabaseContext, ): """ Test executing the the custom queries with the custom datasets against the refsol DataFrame. """ - # Just run the "keywords" tests - if custom_datasets_test_data.graph_name.lower() == "keywords": - custom_datasets_test_data.run_e2e_test( - get_test_graph_by_name, postgres_conn_db_context, coerce_types=True - ) - else: - pytest.skip("Skipping non-keywords custom dataset tests for Postgres.") + + custom_datasets_test_data.run_e2e_test( + get_custom_datasets_graph, postgres_conn_db_context, coerce_types=True + ) diff --git a/tests/test_pipeline_s3_datasets.py b/tests/test_pipeline_s3_datasets.py new file mode 100644 index 000000000..d8b24a183 --- /dev/null +++ b/tests/test_pipeline_s3_datasets.py @@ -0,0 +1,181 @@ +""" +Integration tests for the PyDough workflow with custom questions on diverse +s3 datasets. +""" + +from collections.abc import Callable + +import pandas as pd +import pytest + +from pydough.database_connectors import DatabaseContext, DatabaseDialect +from tests.testing_utilities import PyDoughPandasTest, graph_fetcher + + +@pytest.fixture( + params=[ + pytest.param( + PyDoughPandasTest( + """ +result = ( + patients + .WHERE((gender == 'F') & (ethnicity == 'italian')) + .conditions + .PARTITION(name='condition_groups', by=DESCRIPTION) + .CALCULATE(condition_description=DESCRIPTION, occurrence_count=COUNT(conditions)) + .TOP_K(1, by=(occurrence_count.DESC(), condition_description.ASC())) + .CALCULATE(condition_description) +) + """, + "synthea", + lambda: pd.DataFrame( + { + "condition_description": ["Viral sinusitis (disorder)"], + } + ), + "synthea_most_common_conditions", + ), + id="synthea_most_common_conditions", + ), + pytest.param( + PyDoughPandasTest( + """ +result = ( + world_development_indicators + .Country + .WHERE((IncomeGroup == 'Low income') & HAS(CountryNotes.WHERE(Series.SeriesCode == 'DT.DOD.DECT.CD'))) + .CALCULATE(country_code=CountryCode) +) + """, + "world_development_indicators", + lambda: pd.DataFrame( + { + "country_code": [ + "AFG", + "BDI", + "BEN", + "BFA", + "CAF", + "COM", + "ERI", + "ETH", + "GIN", + "GMB", + "GNB", + "HTI", + "KHM", + "LBR", + "MDG", + "MLI", + "MOZ", + "MWI", + "NER", + "NPL", + "RWA", + "SLE", + "SOM", + "TCD", + "TGO", + "TZA", + "UGA", + "ZAR", + "ZWE", + ], + } + ), + "wdi_low_income_country_with_series", + ), + id="wdi_low_income_country_with_series", + ), + pytest.param( + PyDoughPandasTest( + """ +result = ( + world_development_indicators + .Country + .WHERE(ShortName == 'Albania') + .Footnotes + .WHERE(Year == 'YR2012') + .CALCULATE(footnote_description=Description) +) + """, + "world_development_indicators", + lambda: pd.DataFrame( + { + "condition_description": [ + "As reported", + "Period: 2008-2012.Grouped consumption data.Growth rates are based on survey means of 2011 PPP$.Survey reference CPI years for the initial and final years are 2008 and 2012, respectively.", + "Source: Labour force survey. Coverage: Civilian. Coverage (unemployment): Not available. Age: 15-74. Coverage limitation: Excluding institutional population. Education: International Standard Classification of Education, 1997 version.", + ] + } + ), + "wdi_albania_footnotes_1978", + ), + id="wdi_albania_footnotes_1978", + ), + ], +) +def s3_datasets_test_data(request) -> PyDoughPandasTest: + """ + Test data for e2e tests using epoch test data. Returns an instance of + PyDoughPandasTest containing information about the test. + """ + return request.param + + +@pytest.mark.s3 +def test_pipeline_until_relational_s3_datasets( + s3_datasets_test_data: PyDoughPandasTest, + get_s3_datasets_graph: graph_fetcher, + get_plan_test_filename: Callable[[str], str], + update_tests: bool, +) -> None: + """ + Tests the conversion of the PyDough queries on the s3 datasets into + relational plans. + """ + file_path: str = get_plan_test_filename(s3_datasets_test_data.test_name) + s3_datasets_test_data.run_relational_test( + get_s3_datasets_graph, file_path, update_tests + ) + + +@pytest.mark.s3 +def test_pipeline_until_sql_s3_datasets( + s3_datasets_test_data: PyDoughPandasTest, + get_s3_datasets_graph: graph_fetcher, + empty_context_database: DatabaseContext, + get_sql_test_filename: Callable[[str, DatabaseDialect], str], + update_tests: bool, +): + """ + Tests the conversion of the PyDough queries on the s3 datasets into + SQL text. + """ + file_path: str = get_sql_test_filename( + s3_datasets_test_data.test_name, empty_context_database.dialect + ) + s3_datasets_test_data.run_sql_test( + get_s3_datasets_graph, + file_path, + update_tests, + empty_context_database, + ) + + +@pytest.mark.s3 +@pytest.mark.execute +def test_pipeline_e2e_s3_datasets( + s3_datasets_test_data: PyDoughPandasTest, + get_s3_datasets_graph: graph_fetcher, + sqlite_s3_datasets_connection: Callable[[str], DatabaseContext], +): + """ + Test executing the e2e queries with the s3 datasets against the + refsol DataFrame. + """ + s3_datasets_test_data.run_e2e_test( + get_s3_datasets_graph, + sqlite_s3_datasets_connection(s3_datasets_test_data.graph_name.lower()), + coerce_types=True, + ) diff --git a/tests/test_pipeline_sf.py b/tests/test_pipeline_sf.py index f7954fb46..808f8287c 100644 --- a/tests/test_pipeline_sf.py +++ b/tests/test_pipeline_sf.py @@ -574,22 +574,18 @@ def test_defog_e2e( @pytest.mark.execute def test_pipeline_e2e_snowflake_custom_datasets( custom_datasets_test_data: PyDoughPandasTest, # noqa: F811 - get_test_graph_by_name: graph_fetcher, + get_custom_datasets_graph: graph_fetcher, sf_conn_db_context: DatabaseContext, ): """ Test executing the the custom queries with the custom datasets against the refsol DataFrame. """ - # Just run the "keywords" tests - if custom_datasets_test_data.graph_name.lower() == "keywords": - custom_datasets_test_data.run_e2e_test( - get_test_graph_by_name, - sf_conn_db_context("DEFOG", custom_datasets_test_data.graph_name), - coerce_types=True, - ) - else: - pytest.skip("Skipping non-keywords custom dataset tests for Snowflake.") + custom_datasets_test_data.run_e2e_test( + get_custom_datasets_graph, + sf_conn_db_context("DEFOG", custom_datasets_test_data.graph_name), + coerce_types=True, + ) # NOTE: this should move and be part of tpch_custom_pipeline_test_data once the diff --git a/tests/test_plan_refsols/synthea_most_common_conditions.txt b/tests/test_plan_refsols/synthea_most_common_conditions.txt index ae03fdd14..854e29cc2 100644 --- a/tests/test_plan_refsols/synthea_most_common_conditions.txt +++ b/tests/test_plan_refsols/synthea_most_common_conditions.txt @@ -2,5 +2,5 @@ ROOT(columns=[('condition_description', DESCRIPTION)], orderings=[(n_rows):desc_ AGGREGATE(keys={'DESCRIPTION': DESCRIPTION}, aggregations={'n_rows': COUNT()}) JOIN(condition=t0.patient == t1.PATIENT, type=INNER, cardinality=PLURAL_ACCESS, reverse_cardinality=SINGULAR_FILTER, columns={'DESCRIPTION': t1.DESCRIPTION}) FILTER(condition=ethnicity == 'italian':string & gender == 'F':string, columns={'patient': patient}) - SCAN(table=synthea.patients, columns={'ethnicity': ethnicity, 'gender': gender, 'patient': patient}) - SCAN(table=synthea.conditions, columns={'DESCRIPTION': DESCRIPTION, 'PATIENT': PATIENT}) + SCAN(table=main.patients, columns={'ethnicity': ethnicity, 'gender': gender, 'patient': patient}) + SCAN(table=main.conditions, columns={'DESCRIPTION': DESCRIPTION, 'PATIENT': PATIENT}) diff --git a/tests/test_plan_refsols/wdi_albania_footnotes_1978.txt b/tests/test_plan_refsols/wdi_albania_footnotes_1978.txt index ff69d96c0..dd07a0abe 100644 --- a/tests/test_plan_refsols/wdi_albania_footnotes_1978.txt +++ b/tests/test_plan_refsols/wdi_albania_footnotes_1978.txt @@ -1,6 +1,6 @@ ROOT(columns=[('footnote_description', Description)], orderings=[]) JOIN(condition=t0.CountryCode == t1.Countrycode, type=INNER, cardinality=PLURAL_FILTER, reverse_cardinality=SINGULAR_FILTER, columns={'Description': t1.Description}) FILTER(condition=ShortName == 'Albania':string, columns={'CountryCode': CountryCode}) - SCAN(table=wdi.Country, columns={'CountryCode': CountryCode, 'ShortName': ShortName}) + SCAN(table=main.Country, columns={'CountryCode': CountryCode, 'ShortName': ShortName}) FILTER(condition=Year == 'YR2012':string, columns={'Countrycode': Countrycode, 'Description': Description}) - SCAN(table=wdi.Footnotes, columns={'Countrycode': Countrycode, 'Description': Description, 'Year': Year}) + SCAN(table=main.Footnotes, columns={'Countrycode': Countrycode, 'Description': Description, 'Year': Year}) diff --git a/tests/test_plan_refsols/wdi_low_income_country_with_series.txt b/tests/test_plan_refsols/wdi_low_income_country_with_series.txt index eb6f2d707..e97818b10 100644 --- a/tests/test_plan_refsols/wdi_low_income_country_with_series.txt +++ b/tests/test_plan_refsols/wdi_low_income_country_with_series.txt @@ -1,8 +1,8 @@ ROOT(columns=[('country_code', CountryCode)], orderings=[]) JOIN(condition=t0.CountryCode == t1.Countrycode, type=SEMI, columns={'CountryCode': t0.CountryCode}) FILTER(condition=IncomeGroup == 'Low income':string, columns={'CountryCode': CountryCode}) - SCAN(table=wdi.Country, columns={'CountryCode': CountryCode, 'IncomeGroup': IncomeGroup}) + SCAN(table=main.Country, columns={'CountryCode': CountryCode, 'IncomeGroup': IncomeGroup}) JOIN(condition=t0.Seriescode == t1.SeriesCode, type=INNER, cardinality=SINGULAR_FILTER, reverse_cardinality=PLURAL_ACCESS, columns={'Countrycode': t0.Countrycode}) - SCAN(table=wdi.CountryNotes, columns={'Countrycode': Countrycode, 'Seriescode': Seriescode}) + SCAN(table=main.CountryNotes, columns={'Countrycode': Countrycode, 'Seriescode': Seriescode}) FILTER(condition=SeriesCode == 'DT.DOD.DECT.CD':string, columns={'SeriesCode': SeriesCode}) - SCAN(table=wdi.Series, columns={'SeriesCode': SeriesCode}) + SCAN(table=main.Series, columns={'SeriesCode': SeriesCode}) diff --git a/tests/test_sql_refsols/synthea_most_common_conditions_ansi.sql b/tests/test_sql_refsols/synthea_most_common_conditions_ansi.sql index 19fdf1f21..d6e8b8406 100644 --- a/tests/test_sql_refsols/synthea_most_common_conditions_ansi.sql +++ b/tests/test_sql_refsols/synthea_most_common_conditions_ansi.sql @@ -1,7 +1,7 @@ SELECT conditions.description AS condition_description -FROM synthea.patients AS patients -JOIN synthea.conditions AS conditions +FROM main.patients AS patients +JOIN main.conditions AS conditions ON conditions.patient = patients.patient WHERE patients.ethnicity = 'italian' AND patients.gender = 'F' diff --git a/tests/test_sql_refsols/synthea_most_common_conditions_mysql.sql b/tests/test_sql_refsols/synthea_most_common_conditions_mysql.sql index 782f8ee93..686465e2b 100644 --- a/tests/test_sql_refsols/synthea_most_common_conditions_mysql.sql +++ b/tests/test_sql_refsols/synthea_most_common_conditions_mysql.sql @@ -1,7 +1,7 @@ SELECT conditions.description COLLATE utf8mb4_bin AS condition_description -FROM synthea.patients AS patients -JOIN synthea.conditions AS conditions +FROM main.patients AS patients +JOIN main.conditions AS conditions ON conditions.patient = patients.patient WHERE patients.ethnicity = 'italian' AND patients.gender = 'F' diff --git a/tests/test_sql_refsols/synthea_most_common_conditions_postgres.sql b/tests/test_sql_refsols/synthea_most_common_conditions_postgres.sql index 90c67f5da..352d86dc8 100644 --- a/tests/test_sql_refsols/synthea_most_common_conditions_postgres.sql +++ b/tests/test_sql_refsols/synthea_most_common_conditions_postgres.sql @@ -1,7 +1,7 @@ SELECT conditions.description AS condition_description -FROM synthea.patients AS patients -JOIN synthea.conditions AS conditions +FROM main.patients AS patients +JOIN main.conditions AS conditions ON conditions.patient = patients.patient WHERE patients.ethnicity = 'italian' AND patients.gender = 'F' diff --git a/tests/test_sql_refsols/synthea_most_common_conditions_snowflake.sql b/tests/test_sql_refsols/synthea_most_common_conditions_snowflake.sql index 90c67f5da..352d86dc8 100644 --- a/tests/test_sql_refsols/synthea_most_common_conditions_snowflake.sql +++ b/tests/test_sql_refsols/synthea_most_common_conditions_snowflake.sql @@ -1,7 +1,7 @@ SELECT conditions.description AS condition_description -FROM synthea.patients AS patients -JOIN synthea.conditions AS conditions +FROM main.patients AS patients +JOIN main.conditions AS conditions ON conditions.patient = patients.patient WHERE patients.ethnicity = 'italian' AND patients.gender = 'F' diff --git a/tests/test_sql_refsols/synthea_most_common_conditions_sqlite.sql b/tests/test_sql_refsols/synthea_most_common_conditions_sqlite.sql index 19fdf1f21..d6e8b8406 100644 --- a/tests/test_sql_refsols/synthea_most_common_conditions_sqlite.sql +++ b/tests/test_sql_refsols/synthea_most_common_conditions_sqlite.sql @@ -1,7 +1,7 @@ SELECT conditions.description AS condition_description -FROM synthea.patients AS patients -JOIN synthea.conditions AS conditions +FROM main.patients AS patients +JOIN main.conditions AS conditions ON conditions.patient = patients.patient WHERE patients.ethnicity = 'italian' AND patients.gender = 'F' diff --git a/tests/test_sql_refsols/wdi_albania_footnotes_1978_ansi.sql b/tests/test_sql_refsols/wdi_albania_footnotes_1978_ansi.sql index f48dbdba5..289f1a53a 100644 --- a/tests/test_sql_refsols/wdi_albania_footnotes_1978_ansi.sql +++ b/tests/test_sql_refsols/wdi_albania_footnotes_1978_ansi.sql @@ -1,7 +1,7 @@ SELECT footnotes.description AS footnote_description -FROM wdi.country AS country -JOIN wdi.footnotes AS footnotes +FROM main.country AS country +JOIN main.footnotes AS footnotes ON country.countrycode = footnotes.countrycode AND footnotes.year = 'YR2012' WHERE country.shortname = 'Albania' diff --git a/tests/test_sql_refsols/wdi_albania_footnotes_1978_mysql.sql b/tests/test_sql_refsols/wdi_albania_footnotes_1978_mysql.sql index 483121416..db93bd850 100644 --- a/tests/test_sql_refsols/wdi_albania_footnotes_1978_mysql.sql +++ b/tests/test_sql_refsols/wdi_albania_footnotes_1978_mysql.sql @@ -1,7 +1,7 @@ SELECT Footnotes.description AS footnote_description -FROM wdi.Country AS Country -JOIN wdi.Footnotes AS Footnotes +FROM main.Country AS Country +JOIN main.Footnotes AS Footnotes ON Country.countrycode = Footnotes.countrycode AND Footnotes.year = 'YR2012' WHERE Country.shortname = 'Albania' diff --git a/tests/test_sql_refsols/wdi_albania_footnotes_1978_postgres.sql b/tests/test_sql_refsols/wdi_albania_footnotes_1978_postgres.sql index f48dbdba5..289f1a53a 100644 --- a/tests/test_sql_refsols/wdi_albania_footnotes_1978_postgres.sql +++ b/tests/test_sql_refsols/wdi_albania_footnotes_1978_postgres.sql @@ -1,7 +1,7 @@ SELECT footnotes.description AS footnote_description -FROM wdi.country AS country -JOIN wdi.footnotes AS footnotes +FROM main.country AS country +JOIN main.footnotes AS footnotes ON country.countrycode = footnotes.countrycode AND footnotes.year = 'YR2012' WHERE country.shortname = 'Albania' diff --git a/tests/test_sql_refsols/wdi_albania_footnotes_1978_snowflake.sql b/tests/test_sql_refsols/wdi_albania_footnotes_1978_snowflake.sql index f48dbdba5..289f1a53a 100644 --- a/tests/test_sql_refsols/wdi_albania_footnotes_1978_snowflake.sql +++ b/tests/test_sql_refsols/wdi_albania_footnotes_1978_snowflake.sql @@ -1,7 +1,7 @@ SELECT footnotes.description AS footnote_description -FROM wdi.country AS country -JOIN wdi.footnotes AS footnotes +FROM main.country AS country +JOIN main.footnotes AS footnotes ON country.countrycode = footnotes.countrycode AND footnotes.year = 'YR2012' WHERE country.shortname = 'Albania' diff --git a/tests/test_sql_refsols/wdi_albania_footnotes_1978_sqlite.sql b/tests/test_sql_refsols/wdi_albania_footnotes_1978_sqlite.sql index f48dbdba5..289f1a53a 100644 --- a/tests/test_sql_refsols/wdi_albania_footnotes_1978_sqlite.sql +++ b/tests/test_sql_refsols/wdi_albania_footnotes_1978_sqlite.sql @@ -1,7 +1,7 @@ SELECT footnotes.description AS footnote_description -FROM wdi.country AS country -JOIN wdi.footnotes AS footnotes +FROM main.country AS country +JOIN main.footnotes AS footnotes ON country.countrycode = footnotes.countrycode AND footnotes.year = 'YR2012' WHERE country.shortname = 'Albania' diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql index 0faaf80d5..6dd7b2a24 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_ansi.sql @@ -1,9 +1,9 @@ SELECT country.countrycode AS country_code -FROM wdi.country AS country -JOIN wdi.countrynotes AS countrynotes +FROM main.country AS country +JOIN main.countrynotes AS countrynotes ON country.countrycode = countrynotes.countrycode -JOIN wdi.series AS series +JOIN main.series AS series ON countrynotes.seriescode = series.seriescode AND series.seriescode = 'DT.DOD.DECT.CD' WHERE diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql index 64ea58619..49b7dd612 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_mysql.sql @@ -1,8 +1,8 @@ WITH _u_0 AS ( SELECT CountryNotes.countrycode AS _u_1 - FROM wdi.CountryNotes AS CountryNotes - JOIN wdi.Series AS Series + FROM main.CountryNotes AS CountryNotes + JOIN main.Series AS Series ON CountryNotes.seriescode = Series.seriescode AND Series.seriescode = 'DT.DOD.DECT.CD' GROUP BY @@ -10,7 +10,7 @@ WITH _u_0 AS ( ) SELECT Country.countrycode AS country_code -FROM wdi.Country AS Country +FROM main.Country AS Country LEFT JOIN _u_0 AS _u_0 ON Country.countrycode = _u_0._u_1 WHERE diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql index 9315d0277..fcb2cde21 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_postgres.sql @@ -1,8 +1,8 @@ WITH _u_0 AS ( SELECT countrynotes.countrycode AS _u_1 - FROM wdi.countrynotes AS countrynotes - JOIN wdi.series AS series + FROM main.countrynotes AS countrynotes + JOIN main.series AS series ON countrynotes.seriescode = series.seriescode AND series.seriescode = 'DT.DOD.DECT.CD' GROUP BY @@ -10,7 +10,7 @@ WITH _u_0 AS ( ) SELECT country.countrycode AS country_code -FROM wdi.country AS country +FROM main.country AS country LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = country.countrycode WHERE diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql index 9315d0277..fcb2cde21 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_snowflake.sql @@ -1,8 +1,8 @@ WITH _u_0 AS ( SELECT countrynotes.countrycode AS _u_1 - FROM wdi.countrynotes AS countrynotes - JOIN wdi.series AS series + FROM main.countrynotes AS countrynotes + JOIN main.series AS series ON countrynotes.seriescode = series.seriescode AND series.seriescode = 'DT.DOD.DECT.CD' GROUP BY @@ -10,7 +10,7 @@ WITH _u_0 AS ( ) SELECT country.countrycode AS country_code -FROM wdi.country AS country +FROM main.country AS country LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = country.countrycode WHERE diff --git a/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql b/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql index 9315d0277..fcb2cde21 100644 --- a/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql +++ b/tests/test_sql_refsols/wdi_low_income_country_with_series_sqlite.sql @@ -1,8 +1,8 @@ WITH _u_0 AS ( SELECT countrynotes.countrycode AS _u_1 - FROM wdi.countrynotes AS countrynotes - JOIN wdi.series AS series + FROM main.countrynotes AS countrynotes + JOIN main.series AS series ON countrynotes.seriescode = series.seriescode AND series.seriescode = 'DT.DOD.DECT.CD' GROUP BY @@ -10,7 +10,7 @@ WITH _u_0 AS ( ) SELECT country.countrycode AS country_code -FROM wdi.country AS country +FROM main.country AS country LEFT JOIN _u_0 AS _u_0 ON _u_0._u_1 = country.countrycode WHERE