Skip to content

qualify_columns: expand_stars does not expand SELECT * into individual columns #25

@karakanb

Description

@karakanb

qualify_columns() with expand_stars: true does not seem to rewrite SELECT * into the individual columns from the provided schema. The AST is returned unchanged.

sqlglot repro

# /// script
# requires-python = ">=3.9"
# dependencies = ["sqlglot>=26.0"]
# ///
from sqlglot import exp
from sqlglot.optimizer import optimize
from sqlglot.optimizer.qualify import qualify

query = "SELECT * FROM users"
schema = {"users": {"id": "INT64", "name": "TEXT", "email": "TEXT"}}
print(f"Query: {query}\n")

optimized = optimize(exp.maybe_parse(query, dialect="bigquery"), schema, dialect="bigquery", rules=(qualify,))
select = optimized.find(exp.Select)
col_names = [e.alias_or_name for e in select.expressions]
print(f"column names: {col_names}")

polyglot repro

use polyglot_sql::{self as pgsql, DialectType, MappingSchema, Schema};
use polyglot_sql::expressions::DataType;

fn main() {
    let query = "SELECT * FROM users";
    println!("Query: {query}\n");

    let exprs = pgsql::parse(query, DialectType::BigQuery).unwrap();

    // Build schema with known columns
    let mut schema = MappingSchema::with_dialect(DialectType::BigQuery);
    schema.add_table("users", &[
        ("id".into(), DataType::Int { length: None, integer_spelling: false }),
        ("name".into(), DataType::Text),
        ("email".into(), DataType::Text),
    ], None).unwrap();

    // qualify_columns has expand_stars=true by default
    let options = pgsql::optimizer::qualify_columns::QualifyColumnsOptions::new()
        .with_expand_stars(true)
        .with_dialect(DialectType::BigQuery);
    let qualified = pgsql::optimizer::qualify_columns::qualify_columns(
        exprs[0].clone(), &schema, &options,
    ).unwrap();

    let sql = pgsql::generate(&qualified, DialectType::BigQuery).unwrap();
    println!("after qualify_columns: {sql}");

    let col_names = pgsql::ast_transforms::get_column_names(&qualified);
    println!("get_column_names: {col_names:?}");
}

sqlglot rewrites SELECT * into SELECT users.id, users.name, users.email when a schema is provided. Please let me know if I am missing how to use the library here.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions