diff --git a/misc/python/materialize/mzcompose/__init__.py b/misc/python/materialize/mzcompose/__init__.py index bc6526c8bb0c0..5d95385d32664 100644 --- a/misc/python/materialize/mzcompose/__init__.py +++ b/misc/python/materialize/mzcompose/__init__.py @@ -107,6 +107,7 @@ def get_minimal_system_parameters( "enable_rbac_checks": "true", "enable_reduce_mfp_fusion": "true", "enable_refresh_every_mvs": "true", + "enable_repr_typecheck": "true", "enable_cluster_schedule_refresh": "true", "enable_sql_server_source": "true", "enable_statement_lifecycle_logging": "true", diff --git a/src/adapter/src/optimize/copy_to.rs b/src/adapter/src/optimize/copy_to.rs index 35532b8075e23..8086e75ac2903 100644 --- a/src/adapter/src/optimize/copy_to.rs +++ b/src/adapter/src/optimize/copy_to.rs @@ -28,6 +28,9 @@ use mz_storage_types::connections::Connection; use mz_storage_types::sinks::S3UploadInfo; use mz_transform::dataflow::DataflowMetainfo; use mz_transform::normalize_lets::normalize_lets; +use mz_transform::reprtypecheck::{ + SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, +}; use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use mz_transform::{StatisticsOracle, TransformCtx}; use timely::progress::Antichain; @@ -48,6 +51,8 @@ use crate::optimize::{ pub struct Optimizer { /// A typechecking context to use throughout the optimizer pipeline. typecheck_ctx: TypecheckContext, + /// A representation typechecking context to use throughout the optimizer pipeline. + repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. catalog: Arc, /// A snapshot of the cluster that will run the dataflows. @@ -75,6 +80,7 @@ impl Optimizer { ) -> Self { Self { typecheck_ctx: empty_context(), + repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, select_id, @@ -167,6 +173,7 @@ impl Optimize for Optimizer { let mut transform_ctx = TransformCtx::local( &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), Some(self.select_id), @@ -344,6 +351,7 @@ impl<'s> Optimize>> for Optimizer { &*stats, &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), ); diff --git a/src/adapter/src/optimize/index.rs b/src/adapter/src/optimize/index.rs index 848b312d0d955..423dd6c00e0fa 100644 --- a/src/adapter/src/optimize/index.rs +++ b/src/adapter/src/optimize/index.rs @@ -38,6 +38,9 @@ use mz_transform::TransformCtx; use mz_transform::dataflow::DataflowMetainfo; use mz_transform::normalize_lets::normalize_lets; use mz_transform::notice::{IndexAlreadyExists, IndexKeyEmpty}; +use mz_transform::reprtypecheck::{ + SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, +}; use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use crate::optimize::dataflows::{ @@ -51,6 +54,8 @@ use crate::optimize::{ pub struct Optimizer { /// A typechecking context to use throughout the optimizer pipeline. typecheck_ctx: TypecheckContext, + /// A representation typechecking context to use throughout the optimizer pipeline. + repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. catalog: Arc, /// A snapshot of the cluster that will run the dataflows. @@ -75,6 +80,7 @@ impl Optimizer { ) -> Self { Self { typecheck_ctx: empty_context(), + repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, exported_index_id, @@ -177,6 +183,7 @@ impl Optimize for Optimizer { &mz_transform::EmptyStatisticsOracle, // TODO: wire proper stats &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), ); diff --git a/src/adapter/src/optimize/materialized_view.rs b/src/adapter/src/optimize/materialized_view.rs index 5f92cb3c0a8da..411bc38c28070 100644 --- a/src/adapter/src/optimize/materialized_view.rs +++ b/src/adapter/src/optimize/materialized_view.rs @@ -42,6 +42,9 @@ use mz_sql::plan::HirRelationExpr; use mz_transform::TransformCtx; use mz_transform::dataflow::DataflowMetainfo; use mz_transform::normalize_lets::normalize_lets; +use mz_transform::reprtypecheck::{ + SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, +}; use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use timely::progress::Antichain; @@ -56,6 +59,8 @@ use crate::optimize::{ pub struct Optimizer { /// A typechecking context to use throughout the optimizer pipeline. typecheck_ctx: TypecheckContext, + /// A representation typechecking context to use throughout the optimizer pipeline. + repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. catalog: Arc, /// A snapshot of the cluster that will run the dataflows. @@ -115,6 +120,7 @@ impl Optimizer { ) -> Self { Self { typecheck_ctx: empty_context(), + repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, sink_id, @@ -197,6 +203,7 @@ impl Optimize for Optimizer { let mut transform_ctx = TransformCtx::local( &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), Some(self.view_id), @@ -286,6 +293,7 @@ impl Optimize for Optimizer { &mz_transform::EmptyStatisticsOracle, // TODO: wire proper stats &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), ); diff --git a/src/adapter/src/optimize/peek.rs b/src/adapter/src/optimize/peek.rs index 416a2c9a5fd85..89431fc6a3d92 100644 --- a/src/adapter/src/optimize/peek.rs +++ b/src/adapter/src/optimize/peek.rs @@ -25,6 +25,9 @@ use mz_sql::plan::HirRelationExpr; use mz_sql::session::metadata::SessionMetadata; use mz_transform::dataflow::DataflowMetainfo; use mz_transform::normalize_lets::normalize_lets; +use mz_transform::reprtypecheck::{ + SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, +}; use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use mz_transform::{StatisticsOracle, TransformCtx}; use timely::progress::Antichain; @@ -45,6 +48,8 @@ use crate::optimize::{ pub struct Optimizer { /// A typechecking context to use throughout the optimizer pipeline. typecheck_ctx: TypecheckContext, + /// A representation typechecking context to use throughout the optimizer pipeline. + repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. catalog: Arc, /// A snapshot of the cluster that will run the dataflows. @@ -75,6 +80,7 @@ impl Optimizer { ) -> Self { Self { typecheck_ctx: empty_context(), + repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, finishing, @@ -181,6 +187,7 @@ impl Optimize for Optimizer { let mut transform_ctx = TransformCtx::local( &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), Some(self.select_id), @@ -336,6 +343,7 @@ impl<'s> Optimize>> for Optimizer { &*stats, &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), ); diff --git a/src/adapter/src/optimize/subscribe.rs b/src/adapter/src/optimize/subscribe.rs index 8c06e0d08d9fa..ea6e140b315b7 100644 --- a/src/adapter/src/optimize/subscribe.rs +++ b/src/adapter/src/optimize/subscribe.rs @@ -26,6 +26,9 @@ use mz_sql::plan::SubscribeFrom; use mz_transform::TransformCtx; use mz_transform::dataflow::DataflowMetainfo; use mz_transform::normalize_lets::normalize_lets; +use mz_transform::reprtypecheck::{ + SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, +}; use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use timely::progress::Antichain; @@ -42,6 +45,8 @@ use crate::optimize::{ pub struct Optimizer { /// A typechecking context to use throughout the optimizer pipeline. typecheck_ctx: TypecheckContext, + /// A representation typechecking context to use throughout the optimizer pipeline. + repr_typecheck_ctx: ReprTypecheckContext, /// A snapshot of the catalog state. catalog: Arc, /// A snapshot of the cluster that will run the dataflows. @@ -95,6 +100,7 @@ impl Optimizer { ) -> Self { Self { typecheck_ctx: empty_context(), + repr_typecheck_ctx: empty_repr_context(), catalog, compute_instance, view_id, @@ -229,6 +235,7 @@ impl Optimize for Optimizer { let mut transform_ctx = TransformCtx::local( &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), Some(self.view_id), @@ -272,6 +279,7 @@ impl Optimize for Optimizer { &mz_transform::EmptyStatisticsOracle, // TODO: wire proper stats &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, Some(&self.metrics), ); diff --git a/src/adapter/src/optimize/view.rs b/src/adapter/src/optimize/view.rs index c9096fc6eda26..d281d37695877 100644 --- a/src/adapter/src/optimize/view.rs +++ b/src/adapter/src/optimize/view.rs @@ -26,6 +26,9 @@ use mz_sql::optimizer_metrics::OptimizerMetrics; use mz_sql::plan::HirRelationExpr; use mz_transform::TransformCtx; use mz_transform::dataflow::DataflowMetainfo; +use mz_transform::reprtypecheck::{ + SharedContext as ReprTypecheckContext, empty_context as empty_repr_context, +}; use mz_transform::typecheck::{SharedContext as TypecheckContext, empty_context}; use crate::optimize::dataflows::{ExprPrepStyle, prep_relation_expr}; @@ -37,6 +40,8 @@ use crate::optimize::{ pub struct Optimizer<'a> { /// A typechecking context to use throughout the optimizer pipeline. typecheck_ctx: TypecheckContext, + /// A representation typechecking context to use throughout the optimizer pipeline. + repr_typecheck_ctx: ReprTypecheckContext, /// Optimizer config. config: OptimizerConfig, /// Optimizer metrics. @@ -54,6 +59,7 @@ impl<'a> Optimizer<'a> { pub fn new(config: OptimizerConfig, metrics: Option) -> Self { Self { typecheck_ctx: empty_context(), + repr_typecheck_ctx: empty_repr_context(), config, metrics, expr_prep_style: None, @@ -71,6 +77,7 @@ impl<'a> Optimizer<'a> { ) -> Optimizer<'a> { Self { typecheck_ctx: empty_context(), + repr_typecheck_ctx: empty_repr_context(), config, metrics, expr_prep_style: Some(expr_prep_style), @@ -95,6 +102,7 @@ impl Optimize for Optimizer<'_> { let mut transform_ctx = TransformCtx::local( &self.config.features, &self.typecheck_ctx, + &self.repr_typecheck_ctx, &mut df_meta, self.metrics.as_ref(), None, diff --git a/src/expr/src/scalar/func/variadic.rs b/src/expr/src/scalar/func/variadic.rs index af646735dff28..578e5e2d215d6 100644 --- a/src/expr/src/scalar/func/variadic.rs +++ b/src/expr/src/scalar/func/variadic.rs @@ -29,7 +29,7 @@ use mz_repr::adt::range::{InvalidRangeError, Range, RangeBound, parse_range_boun use mz_repr::adt::system::Oid; use mz_repr::adt::timestamp::CheckedTimestamp; use mz_repr::role_id::RoleId; -use mz_repr::{ColumnName, Datum, Row, RowArena, SqlColumnType, SqlScalarType}; +use mz_repr::{ColumnName, Datum, ReprScalarType, Row, RowArena, SqlColumnType, SqlScalarType}; use serde::{Deserialize, Serialize}; use sha1::Sha1; use sha2::{Sha224, Sha256, Sha384, Sha512}; @@ -1321,8 +1321,11 @@ impl VariadicFunc { .nullable(true), ArrayCreate { elem_type } => { debug_assert!( - input_types.iter().all(|t| t.scalar_type.base_eq(elem_type)), - "Args to ArrayCreate should have types that are compatible with the elem_type" + input_types + .iter() + .all(|t| ReprScalarType::from(&t.scalar_type) + == ReprScalarType::from(elem_type)), + "Args to ArrayCreate should have types that are repr-compatible with the elem_type" ); match elem_type { SqlScalarType::Array(_) => elem_type.clone().nullable(false), diff --git a/src/repr/src/explain.rs b/src/repr/src/explain.rs index 2696b82a3b72a..e373d15542392 100644 --- a/src/repr/src/explain.rs +++ b/src/repr/src/explain.rs @@ -45,7 +45,7 @@ use crate::explain::dot::{DisplayDot, dot_string}; use crate::explain::json::{DisplayJson, json_string}; use crate::explain::text::{DisplayText, text_string}; use crate::optimize::OptimizerFeatureOverrides; -use crate::{GlobalId, SqlColumnType, SqlScalarType}; +use crate::{GlobalId, ReprColumnType, ReprScalarType, SqlColumnType, SqlScalarType}; pub mod dot; pub mod json; @@ -444,6 +444,12 @@ pub trait ExprHumanizer: fmt::Debug { /// compatibility is more important. fn humanize_scalar_type(&self, ty: &SqlScalarType, postgres_compat: bool) -> String; + /// Returns a human-readable name for the specified scalar type. + /// Calls `humanize_scalar_type` with the `SqlScalarType` representation of the specified type. + fn humanize_scalar_type_repr(&self, typ: &ReprScalarType, postgres_compat: bool) -> String { + self.humanize_scalar_type(&SqlScalarType::from_repr(typ), postgres_compat) + } + /// Returns a human-readable name for the specified column type. /// Used in, e.g., EXPLAIN and error msgs, in which case exact Postgres compatibility is less /// important than showing as much detail as possible. Also used in `pg_typeof`, where Postgres @@ -456,6 +462,12 @@ pub trait ExprHumanizer: fmt::Debug { ) } + /// Returns a human-readable name for the specified column type. + /// Calls `humanize_column_type` with the `SqlColumnType` representation of the specified type. + fn humanize_column_type_repr(&self, typ: &ReprColumnType, postgres_compat: bool) -> String { + self.humanize_column_type(&SqlColumnType::from_repr(typ), postgres_compat) + } + /// Returns a vector of column names for the relation identified by `id`. fn column_names_for_id(&self, id: GlobalId) -> Option>; diff --git a/src/repr/src/lib.rs b/src/repr/src/lib.rs index d7f4c8b687360..4dfc931e91359 100644 --- a/src/repr/src/lib.rs +++ b/src/repr/src/lib.rs @@ -54,8 +54,8 @@ pub use crate::global_id::GlobalId; pub use crate::relation::{ ColumnIndex, ColumnName, NotNullViolation, PropRelationDescDiff, ProtoColumnName, ProtoColumnType, ProtoRelationDesc, ProtoRelationType, RelationDesc, RelationDescBuilder, - RelationVersion, RelationVersionSelector, ReprColumnType, SqlColumnType, SqlRelationType, - UNKNOWN_COLUMN_NAME, VersionedRelationDesc, arb_relation_desc_diff, + RelationVersion, RelationVersionSelector, ReprColumnType, ReprRelationType, SqlColumnType, + SqlRelationType, UNKNOWN_COLUMN_NAME, VersionedRelationDesc, arb_relation_desc_diff, arb_relation_desc_projection, arb_row_for_relation, }; pub use crate::row::encode::{RowColumnarDecoder, RowColumnarEncoder, preserves_order}; @@ -66,7 +66,7 @@ pub use crate::row::{ }; pub use crate::scalar::{ ArrayRustType, AsColumnType, Datum, DatumType, PropArray, PropDatum, PropDict, PropList, - ProtoScalarType, ReprScalarType, ScalarBaseType, SqlScalarType, arb_datum, - arb_datum_for_column, arb_datum_for_scalar, arb_range_type, + ProtoScalarType, ReprScalarBaseType, ReprScalarType, SqlScalarBaseType, SqlScalarType, + arb_datum, arb_datum_for_column, arb_datum_for_scalar, arb_range_type, }; pub use crate::timestamp::{Timestamp, TimestampManipulation}; diff --git a/src/repr/src/optimize.rs b/src/repr/src/optimize.rs index fb2e5c64ac2fe..3ad66e4482fb4 100644 --- a/src/repr/src/optimize.rs +++ b/src/repr/src/optimize.rs @@ -131,6 +131,7 @@ optimizer_feature_flags!({ // See the feature flag of the same name. enable_dequadratic_eqprop_map: bool, enable_fast_path_plan_insights: bool, + enable_repr_typecheck: bool, }); /// A trait used to implement layered config construction. diff --git a/src/repr/src/relation.rs b/src/repr/src/relation.rs index 7dd7f06e00e11..d7e8969f1e82a 100644 --- a/src/repr/src/relation.rs +++ b/src/repr/src/relation.rs @@ -89,6 +89,13 @@ impl SqlColumnType { ); }; + if fields.len() != other_fields.len() { + bail!( + "Can't union types: {:?} and {:?}", + self.scalar_type, + other.scalar_type + ); + } let mut union_fields = Vec::with_capacity(fields.len()); for ((name, typ), (other_name, other_typ)) in fields.iter().zip_eq(other_fields.iter()) @@ -259,6 +266,94 @@ impl RustType for Vec { } } +/// The type of a relation. +#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash)] +pub struct ReprRelationType { + /// The type for each column, in order. + pub column_types: Vec, + /// Sets of indices that are "keys" for the collection. + /// + /// Each element in this list is a set of column indices, each with the + /// property that the collection contains at most one record with each + /// distinct set of values for each column. Alternately, for a specific set + /// of values assigned to the these columns there is at most one record. + /// + /// A collection can contain multiple sets of keys, although it is common to + /// have either zero or one sets of key indices. + #[serde(default)] + pub keys: Vec>, +} + +impl ReprRelationType { + /// Constructs a `ReprRelationType` representing the relation with no columns and + /// no keys. + pub fn empty() -> Self { + ReprRelationType::new(vec![]) + } + + /// Constructs a new `ReprRelationType` from specified column types. + /// + /// The `ReprRelationType` will have no keys. + pub fn new(column_types: Vec) -> Self { + ReprRelationType { + column_types, + keys: Vec::new(), + } + } + + /// Adds a new key for the relation. + pub fn with_key(mut self, mut indices: Vec) -> Self { + indices.sort_unstable(); + if !self.keys.contains(&indices) { + self.keys.push(indices); + } + self + } + + pub fn with_keys(mut self, keys: Vec>) -> Self { + for key in keys { + self = self.with_key(key) + } + self + } + + /// Computes the number of columns in the relation. + pub fn arity(&self) -> usize { + self.column_types.len() + } + + /// Gets the index of the columns used when creating a default index. + pub fn default_key(&self) -> Vec { + if let Some(key) = self.keys.first() { + if key.is_empty() { + (0..self.column_types.len()).collect() + } else { + key.clone() + } + } else { + (0..self.column_types.len()).collect() + } + } + + /// Returns all the column types in order, for this relation. + pub fn columns(&self) -> &[ReprColumnType] { + &self.column_types + } +} + +impl From<&SqlRelationType> for ReprRelationType { + fn from(sql_relation_type: &SqlRelationType) -> Self { + ReprRelationType { + column_types: sql_relation_type + .column_types + .iter() + .map(ReprColumnType::from) + .collect(), + keys: sql_relation_type.keys.clone(), + } + } +} + #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize, Hash, MzReflect)] pub struct ReprColumnType { /// The underlying representation scalar type (e.g., Int32 or String) of this column. diff --git a/src/repr/src/scalar.rs b/src/repr/src/scalar.rs index 1955482bb4442..7f33492bf299a 100644 --- a/src/repr/src/scalar.rs +++ b/src/repr/src/scalar.rs @@ -1591,7 +1591,7 @@ impl fmt::Display for Datum<'_> { #[derive( Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Ord, PartialOrd, Hash, EnumKind, MzReflect, )] -#[enum_kind(ScalarBaseType, derive(PartialOrd, Ord, Hash))] +#[enum_kind(SqlScalarBaseType, derive(PartialOrd, Ord, Hash))] pub enum SqlScalarType { /// The type of [`Datum::True`] and [`Datum::False`]. Bool, @@ -3212,7 +3212,7 @@ impl SqlScalarType { && a.1.scalar_type.eq_inner(&b.1.scalar_type, structure_only) }) } - (s, o) => ScalarBaseType::from(s) == ScalarBaseType::from(o), + (s, o) => SqlScalarBaseType::from(s) == SqlScalarBaseType::from(o), } } @@ -3968,12 +3968,8 @@ impl Arbitrary for ReprScalarType { Just(ReprScalarType::Numeric).boxed(), Just(ReprScalarType::Date).boxed(), Just(ReprScalarType::Time).boxed(), - any::>() - .prop_map(|precision| ReprScalarType::Timestamp { precision }) - .boxed(), - any::>() - .prop_map(|precision| ReprScalarType::TimestampTz { precision }) - .boxed(), + Just(ReprScalarType::Timestamp).boxed(), + Just(ReprScalarType::TimestampTz).boxed(), Just(ReprScalarType::MzTimestamp).boxed(), Just(ReprScalarType::Interval).boxed(), Just(ReprScalarType::Bytes).boxed(), @@ -3995,12 +3991,8 @@ impl Arbitrary for ReprScalarType { Just(ReprScalarType::Int64).boxed(), Just(ReprScalarType::Date).boxed(), Just(ReprScalarType::Numeric).boxed(), - any::>() - .prop_map(|precision| ReprScalarType::Timestamp { precision }) - .boxed(), - any::>() - .prop_map(|precision| ReprScalarType::TimestampTz { precision }) - .boxed(), + Just(ReprScalarType::Timestamp).boxed(), + Just(ReprScalarType::TimestampTz).boxed(), ]); let range = range_leaf .prop_map(|inner_type| ReprScalarType::Range { @@ -4068,7 +4060,10 @@ impl Arbitrary for ReprScalarType { /// There is a direct correspondence between `Datum` variants and `ReprScalarType` /// variants: every `Datum` variant corresponds to exactly one `ReprScalarType` variant /// (with an exception for `Datum::Array`, which could be both an `Int2Vector` and an `Array`). -#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Ord, PartialOrd, Hash, MzReflect)] +#[derive( + Clone, Debug, EnumKind, PartialEq, Eq, Serialize, Deserialize, Ord, PartialOrd, Hash, MzReflect, +)] +#[enum_kind(ReprScalarBaseType, derive(PartialOrd, Ord, Hash))] pub enum ReprScalarType { Bool, Int16, @@ -4083,12 +4078,8 @@ pub enum ReprScalarType { Numeric, Date, Time, - Timestamp { - precision: Option, - }, - TimestampTz { - precision: Option, - }, + Timestamp, + TimestampTz, MzTimestamp, Interval, Bytes, @@ -4097,18 +4088,10 @@ pub enum ReprScalarType { Uuid, Array(Box), Int2Vector, // differs from Array enough to stick around - List { - element_type: Box, - }, - Record { - fields: Box<[ReprColumnType]>, - }, - Map { - value_type: Box, - }, - Range { - element_type: Box, - }, + List { element_type: Box }, + Record { fields: Box<[ReprColumnType]> }, + Map { value_type: Box }, + Range { element_type: Box }, MzAclItem, AclItem, } @@ -4128,12 +4111,8 @@ impl From<&SqlScalarType> for ReprScalarType { SqlScalarType::Numeric { max_scale: _ } => ReprScalarType::Numeric, SqlScalarType::Date => ReprScalarType::Date, SqlScalarType::Time => ReprScalarType::Time, - SqlScalarType::Timestamp { precision } => ReprScalarType::Timestamp { - precision: *precision, - }, - SqlScalarType::TimestampTz { precision } => ReprScalarType::TimestampTz { - precision: *precision, - }, + SqlScalarType::Timestamp { precision: _ } => ReprScalarType::Timestamp, + SqlScalarType::TimestampTz { precision: _ } => ReprScalarType::TimestampTz, SqlScalarType::Interval => ReprScalarType::Interval, SqlScalarType::PgLegacyChar => ReprScalarType::UInt8, SqlScalarType::PgLegacyName => ReprScalarType::String, @@ -4214,12 +4193,8 @@ impl SqlScalarType { ReprScalarType::Numeric => SqlScalarType::Numeric { max_scale: None }, ReprScalarType::Date => SqlScalarType::Date, ReprScalarType::Time => SqlScalarType::Time, - ReprScalarType::Timestamp { precision } => SqlScalarType::Timestamp { - precision: *precision, - }, - ReprScalarType::TimestampTz { precision } => SqlScalarType::TimestampTz { - precision: *precision, - }, + ReprScalarType::Timestamp => SqlScalarType::Timestamp { precision: None }, + ReprScalarType::TimestampTz => SqlScalarType::TimestampTz { precision: None }, ReprScalarType::MzTimestamp => SqlScalarType::MzTimestamp, ReprScalarType::Interval => SqlScalarType::Interval, ReprScalarType::Bytes => SqlScalarType::Bytes, @@ -5006,6 +4981,19 @@ mod tests { } } + proptest! { + #![proptest_config(ProptestConfig::with_cases(10000))] + #[mz_ore::test] + #[cfg_attr(miri, ignore)] + fn sql_type_base_eq_implies_repr_type_eq(sql_type1 in any::(), sql_type2 in any::()) { + let repr_type1 = ReprScalarType::from(&sql_type1); + let repr_type2 = ReprScalarType::from(&sql_type2); + if sql_type1.base_eq(&sql_type2) { + assert_eq!(repr_type1, repr_type2); + } + } + } + proptest! { #[mz_ore::test] #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `decContextDefault` on OS `linux` diff --git a/src/sql/src/func.rs b/src/sql/src/func.rs index e7707094344bf..ffbdeabd86051 100644 --- a/src/sql/src/func.rs +++ b/src/sql/src/func.rs @@ -21,7 +21,7 @@ use mz_ore::collections::CollectionExt; use mz_ore::str::StrExt; use mz_pgrepr::oid; use mz_repr::role_id::RoleId; -use mz_repr::{ColumnName, Datum, ScalarBaseType, SqlRelationType, SqlScalarType}; +use mz_repr::{ColumnName, Datum, SqlRelationType, SqlScalarBaseType, SqlScalarType}; use crate::ast::{SelectStatement, Statement}; use crate::catalog::{CatalogType, TypeCategory, TypeReference}; @@ -856,9 +856,9 @@ impl From for ParamType { } } -impl From for ParamType { - fn from(s: ScalarBaseType) -> ParamType { - use ScalarBaseType::*; +impl From for ParamType { + fn from(s: SqlScalarBaseType) -> ParamType { + use SqlScalarBaseType::*; let s = match s { Array | List | Map | Record | Range => { panic!("use polymorphic parameters rather than {:?}", s); @@ -938,8 +938,8 @@ impl From for ReturnType { } } -impl From for ReturnType { - fn from(s: ScalarBaseType) -> ReturnType { +impl From for ReturnType { + fn from(s: SqlScalarBaseType) -> ReturnType { ParamType::from(s).into() } } @@ -1802,7 +1802,7 @@ macro_rules! privilege_fn { /// Correlates a built-in function name to its implementations. pub static PG_CATALOG_BUILTINS: LazyLock> = LazyLock::new(|| { use ParamType::*; - use ScalarBaseType::*; + use SqlScalarBaseType::*; let mut builtins = builtins! { // Literal OIDs collected from PG 13 using a version of this query // ```sql @@ -3537,7 +3537,7 @@ pub static INFORMATION_SCHEMA_BUILTINS: LazyLock> = pub static MZ_CATALOG_BUILTINS: LazyLock> = LazyLock::new(|| { use ParamType::*; - use ScalarBaseType::*; + use SqlScalarBaseType::*; builtins! { "constant_time_eq" => Scalar { params!(Bytes, Bytes) => BinaryFunc::from(func::ConstantTimeEqBytes) => Bool, oid::FUNC_CONSTANT_TIME_EQ_BYTES_OID; @@ -3952,7 +3952,7 @@ pub static MZ_CATALOG_BUILTINS: LazyLock> = LazyLoc pub static MZ_INTERNAL_BUILTINS: LazyLock> = LazyLock::new(|| { use ParamType::*; - use ScalarBaseType::*; + use SqlScalarBaseType::*; builtins! { "aclitem_grantor" => Scalar { params!(AclItem) => UnaryFunc::AclItemGrantor(func::AclItemGrantor) => Oid, oid::FUNC_ACL_ITEM_GRANTOR_OID; @@ -4339,7 +4339,7 @@ pub static MZ_INTERNAL_BUILTINS: LazyLock> = LazyLo pub static MZ_UNSAFE_BUILTINS: LazyLock> = LazyLock::new(|| { use ParamType::*; - use ScalarBaseType::*; + use SqlScalarBaseType::*; builtins! { "mz_all" => Aggregate { params!(Any) => AggregateFunc::All => Bool, oid::FUNC_MZ_ALL_OID; @@ -4459,7 +4459,7 @@ fn array_to_string( pub static OP_IMPLS: LazyLock> = LazyLock::new(|| { use BinaryFunc as BF; use ParamType::*; - use ScalarBaseType::*; + use SqlScalarBaseType::*; builtins! { // Literal OIDs collected from PG 13 using a version of this query // ```sql diff --git a/src/sql/src/plan/statement/ddl.rs b/src/sql/src/plan/statement/ddl.rs index 5b48d71182efd..01c0c947b1c13 100644 --- a/src/sql/src/plan/statement/ddl.rs +++ b/src/sql/src/plan/statement/ddl.rs @@ -4890,6 +4890,7 @@ pub fn unplan_create_cluster( enable_dequadratic_eqprop_map: _, enable_eq_classes_withholding_errors: _, enable_fast_path_plan_insights: _, + enable_repr_typecheck: _, } = optimizer_feature_overrides; // The ones from above that don't occur below are not wired up to cluster features. let features_extracted = ClusterFeatureExtracted { diff --git a/src/sql/src/plan/statement/dml.rs b/src/sql/src/plan/statement/dml.rs index 5749b937cc8ce..acc3d9350eced 100644 --- a/src/sql/src/plan/statement/dml.rs +++ b/src/sql/src/plan/statement/dml.rs @@ -564,6 +564,7 @@ impl TryFrom for ExplainConfig { enable_dequadratic_eqprop_map: Default::default(), enable_eq_classes_withholding_errors: Default::default(), enable_fast_path_plan_insights: Default::default(), + enable_repr_typecheck: Default::default(), }, }) } diff --git a/src/sql/src/plan/typeconv.rs b/src/sql/src/plan/typeconv.rs index 1e1883ca8a5b8..e1a7703ab8e22 100644 --- a/src/sql/src/plan/typeconv.rs +++ b/src/sql/src/plan/typeconv.rs @@ -18,7 +18,9 @@ use dynfmt::{Format, SimpleCurlyFormat}; use itertools::Itertools; use mz_expr::func::{CastArrayToJsonb, CastListToJsonb}; use mz_expr::{VariadicFunc, func}; -use mz_repr::{ColumnName, Datum, ScalarBaseType, SqlColumnType, SqlRelationType, SqlScalarType}; +use mz_repr::{ + ColumnName, Datum, SqlColumnType, SqlRelationType, SqlScalarBaseType, SqlScalarType, +}; use crate::catalog::TypeCategory; use crate::plan::error::PlanError; @@ -294,9 +296,9 @@ macro_rules! casts( }}; ); -static VALID_CASTS: LazyLock> = LazyLock::new( - || { - use ScalarBaseType::*; +static VALID_CASTS: LazyLock> = + LazyLock::new(|| { + use SqlScalarBaseType::*; use UnaryFunc::*; casts! { @@ -865,8 +867,7 @@ static VALID_CASTS: LazyLock for OptimizerFeatures { @@ -2230,6 +2236,7 @@ impl From<&super::SystemVars> for OptimizerFeatures { enable_dequadratic_eqprop_map: vars.enable_dequadratic_eqprop_map(), enable_eq_classes_withholding_errors: vars.enable_eq_classes_withholding_errors(), enable_fast_path_plan_insights: vars.enable_fast_path_plan_insights(), + enable_repr_typecheck: vars.enable_repr_typecheck(), } } } diff --git a/src/transform/src/fusion/filter.rs b/src/transform/src/fusion/filter.rs index 0daf063794be9..37360844646a5 100644 --- a/src/transform/src/fusion/filter.rs +++ b/src/transform/src/fusion/filter.rs @@ -15,7 +15,7 @@ //! use mz_expr::{MirRelationExpr, MirScalarExpr}; //! use mz_repr::{SqlColumnType, Datum, SqlRelationType, SqlScalarType}; //! use mz_repr::optimize::OptimizerFeatures; -//! use mz_transform::{typecheck, Transform, TransformCtx}; +//! use mz_transform::{reprtypecheck, typecheck, Transform, TransformCtx}; //! use mz_transform::dataflow::DataflowMetainfo; //! //! use mz_transform::fusion::filter::Filter; @@ -36,8 +36,9 @@ //! //! let features = OptimizerFeatures::default(); //! let typecheck_ctx = typecheck::empty_context(); +//! let repr_typecheck_ctx = reprtypecheck::empty_context(); //! let mut df_meta = DataflowMetainfo::default(); -//! let mut transform_ctx = TransformCtx::local(&features, &typecheck_ctx, &mut df_meta, None, None); +//! let mut transform_ctx = TransformCtx::local(&features, &typecheck_ctx, &repr_typecheck_ctx, &mut df_meta, None, None); //! //! // Filter.transform() will deduplicate any predicates //! Filter.transform(&mut expr, &mut transform_ctx); diff --git a/src/transform/src/lib.rs b/src/transform/src/lib.rs index 1a89315de4090..b05a5103ae9bc 100644 --- a/src/transform/src/lib.rs +++ b/src/transform/src/lib.rs @@ -54,6 +54,7 @@ use crate::reduce_elision::ReduceElision; use crate::reduce_reduction::ReduceReduction; use crate::reduction_pushdown::ReductionPushdown; use crate::redundant_join::RedundantJoin; +use crate::reprtypecheck::{SharedContext as ReprSharedContext, Typecheck as ReprTypecheck}; use crate::semijoin_idempotence::SemijoinIdempotence; use crate::threshold_elision::ThresholdElision; use crate::typecheck::{SharedContext, Typecheck}; @@ -88,6 +89,7 @@ pub mod reduce_elision; pub mod reduce_reduction; pub mod reduction_pushdown; pub mod redundant_join; +pub mod reprtypecheck; pub mod semijoin_idempotence; pub mod threshold_elision; pub mod typecheck; @@ -122,6 +124,8 @@ pub struct TransformCtx<'a> { pub features: &'a OptimizerFeatures, /// Typechecking context. pub typecheck_ctx: &'a SharedContext, + /// Representation typechecking context. + pub repr_typecheck_ctx: &'a ReprSharedContext, /// Transforms can use this field to communicate information outside the result plans. pub df_meta: &'a mut DataflowMetainfo, /// Metrics for the optimizer. @@ -142,6 +146,7 @@ impl<'a> TransformCtx<'a> { pub fn local( features: &'a OptimizerFeatures, typecheck_ctx: &'a SharedContext, + repr_typecheck_ctx: &'a ReprSharedContext, df_meta: &'a mut DataflowMetainfo, metrics: Option<&'a OptimizerMetrics>, global_id: Option, @@ -152,6 +157,7 @@ impl<'a> TransformCtx<'a> { global_id, features, typecheck_ctx, + repr_typecheck_ctx, df_meta, metrics, last_hash: Default::default(), @@ -167,6 +173,7 @@ impl<'a> TransformCtx<'a> { stats: &'a dyn StatisticsOracle, features: &'a OptimizerFeatures, typecheck_ctx: &'a SharedContext, + repr_typecheck_ctx: &'a ReprSharedContext, df_meta: &'a mut DataflowMetainfo, metrics: Option<&'a OptimizerMetrics>, ) -> Self { @@ -177,6 +184,7 @@ impl<'a> TransformCtx<'a> { features, df_meta, typecheck_ctx, + repr_typecheck_ctx, metrics, last_hash: Default::default(), } @@ -186,6 +194,10 @@ impl<'a> TransformCtx<'a> { Arc::clone(self.typecheck_ctx) } + fn repr_typecheck(&self) -> ReprSharedContext { + Arc::clone(self.repr_typecheck_ctx) + } + /// Lets self know the id of the object that is being optimized. pub fn set_global_id(&mut self, global_id: GlobalId) { self.global_id = Some(global_id); @@ -732,8 +744,9 @@ impl Optimizer { /// Builds a logical optimizer that only performs logical transformations. #[deprecated = "Create an Optimize instance and call `optimize` instead."] pub fn logical_optimizer(ctx: &mut TransformCtx) -> Self { - let transforms: Vec> = vec![ + let transforms: Vec> = transforms![ Box::new(Typecheck::new(ctx.typecheck()).strict_join_equivalences()), + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).strict_join_equivalences()); if ctx.features.enable_repr_typecheck, // 1. Structure-agnostic cleanup Box::new(normalize()), Box::new(NonNullRequirements::default()), @@ -786,6 +799,7 @@ impl Optimizer { .disallow_new_globals() .strict_join_equivalences(), ), + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, ]; Self { name: "logical", @@ -807,6 +821,7 @@ impl Optimizer { .disallow_new_globals() .strict_join_equivalences(), ), + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, // Considerations for the relationship between JoinImplementation and other transforms: // - there should be a run of LiteralConstraints before JoinImplementation lifts away // the Filters from the Gets; @@ -874,6 +889,7 @@ impl Optimizer { .disallow_new_globals() .disallow_dummy(), ), + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, ]; Self { name: "physical", @@ -890,12 +906,16 @@ impl Optimizer { pub fn logical_cleanup_pass(ctx: &mut TransformCtx, allow_new_globals: bool) -> Self { let mut typechecker = Typecheck::new(ctx.typecheck()).strict_join_equivalences(); + let mut repr_typechecker = + ReprTypecheck::new(ctx.repr_typecheck()).strict_join_equivalences(); if !allow_new_globals { typechecker = typechecker.disallow_new_globals(); + repr_typechecker = repr_typechecker.disallow_new_globals(); } - let transforms: Vec> = vec![ + let transforms: Vec> = transforms![ Box::new(typechecker), + Box::new(repr_typechecker); if ctx.features.enable_repr_typecheck, // Delete unnecessary maps. Box::new(fusion::Fusion), Box::new(Fixpoint { @@ -927,6 +947,7 @@ impl Optimizer { .disallow_new_globals() .strict_join_equivalences(), ), + Box::new(ReprTypecheck::new(ctx.repr_typecheck()).disallow_new_globals().strict_join_equivalences()); if ctx.features.enable_repr_typecheck, ]; Self { name: "logical_cleanup", diff --git a/src/transform/src/predicate_pushdown.rs b/src/transform/src/predicate_pushdown.rs index 060a0a4d78d9f..2113d18e18995 100644 --- a/src/transform/src/predicate_pushdown.rs +++ b/src/transform/src/predicate_pushdown.rs @@ -31,7 +31,7 @@ //! use mz_ore::id_gen::IdGen; //! use mz_repr::{SqlColumnType, Datum, SqlRelationType, SqlScalarType}; //! use mz_repr::optimize::OptimizerFeatures; -//! use mz_transform::{typecheck, Transform, TransformCtx}; +//! use mz_transform::{reprtypecheck, typecheck,Transform, TransformCtx}; //! use mz_transform::dataflow::DataflowMetainfo; //! //! use mz_transform::predicate_pushdown::PredicatePushdown; @@ -65,8 +65,9 @@ //! //! let features = OptimizerFeatures::default(); //! let typecheck_ctx = typecheck::empty_context(); +//! let repr_typecheck_ctx = reprtypecheck::empty_context(); //! let mut df_meta = DataflowMetainfo::default(); -//! let mut transform_ctx = TransformCtx::local(&features, &typecheck_ctx, &mut df_meta, None, None); +//! let mut transform_ctx = TransformCtx::local(&features, &typecheck_ctx, &repr_typecheck_ctx, &mut df_meta, None, None); //! //! PredicatePushdown::default().transform(&mut expr, &mut transform_ctx); //! diff --git a/src/transform/src/reprtypecheck.rs b/src/transform/src/reprtypecheck.rs new file mode 100644 index 0000000000000..7171bc3d9c9d5 --- /dev/null +++ b/src/transform/src/reprtypecheck.rs @@ -0,0 +1,1723 @@ +// Copyright Materialize, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//! Check that the visible type of each query has not been changed + +use std::collections::BTreeMap; +use std::fmt::Write; +use std::sync::{Arc, Mutex}; + +use itertools::Itertools; +use mz_expr::explain::{HumanizedExplain, HumanizerMode}; +use mz_expr::{ + AggregateExpr, ColumnOrder, Id, JoinImplementation, LocalId, MirRelationExpr, MirScalarExpr, + RECURSION_LIMIT, non_nullable_columns, +}; +use mz_ore::soft_panic_or_log; +use mz_ore::stack::{CheckedRecursion, RecursionGuard, RecursionLimitError}; +use mz_repr::explain::{DummyHumanizer, ExprHumanizer}; +use mz_repr::{ + ColumnName, ReprColumnType, ReprRelationType, ReprScalarBaseType, ReprScalarType, Row, + SqlColumnType, +}; + +/// Typechecking contexts as shared by various typechecking passes. +/// +/// We use a `RefCell` to ensure that contexts are shared by multiple typechecker passes. +/// Shared contexts help catch consistency issues. +pub type SharedContext = Arc>; + +/// Generates an empty context +pub fn empty_context() -> SharedContext { + Arc::new(Mutex::new(BTreeMap::new())) +} + +/// The possible forms of inconsistency/errors discovered during typechecking. +/// +/// Every variant has a `source` field identifying the MIR term that is home +/// to the error (though not necessarily the root cause of the error). +#[derive(Clone, Debug)] +pub enum TypeError<'a> { + /// Unbound identifiers (local or global) + Unbound { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The (unbound) identifier referenced + id: Id, + /// The type `id` was expected to have + typ: ReprRelationType, + }, + /// Dereference of a non-existent column + NoSuchColumn { + /// Expression with the bug + source: &'a MirRelationExpr, + /// Scalar expression that references an invalid column + expr: &'a MirScalarExpr, + /// The invalid column referenced + col: usize, + }, + /// A single column type does not match + MismatchColumn { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The column type we found (`sub` type) + got: ReprColumnType, + /// The column type we expected (`sup` type) + expected: ReprColumnType, + /// The difference between these types + diffs: Vec, + /// An explanatory message + message: String, + }, + /// Relation column types do not match + MismatchColumns { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The column types we found (`sub` type) + got: Vec, + /// The column types we expected (`sup` type) + expected: Vec, + /// The difference between these types + diffs: Vec, + /// An explanatory message + message: String, + }, + /// A constant row does not have the correct type + BadConstantRow { + /// Expression with the bug + source: &'a MirRelationExpr, + /// A constant row + got: Row, + /// The expected type (which that row does not have) + expected: Vec, + // TODO(mgree) with a good way to get the type of a Datum, we could give a diff here + }, + /// Projection of a non-existent column + BadProject { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The column projected + got: Vec, + /// The input columns (which don't have that column) + input_type: Vec, + }, + /// An equivalence class in a join was malformed + BadJoinEquivalence { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The join equivalences + got: Vec, + /// The problem with the join equivalences + message: String, + }, + /// TopK grouping by non-existent column + BadTopKGroupKey { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The bad column reference in the group key + k: usize, + /// The input columns (which don't have that column) + input_type: Vec, + }, + /// TopK ordering by non-existent column + BadTopKOrdering { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The ordering used + order: ColumnOrder, + /// The input columns (which don't work for that ordering) + input_type: Vec, + }, + /// LetRec bindings are malformed + BadLetRecBindings { + /// Expression with the bug + source: &'a MirRelationExpr, + }, + /// Local identifiers are shadowed + Shadowing { + /// Expression with the bug + source: &'a MirRelationExpr, + /// The id that was shadowed + id: Id, + }, + /// Recursion depth exceeded + Recursion { + /// The error that aborted recursion + error: RecursionLimitError, + }, + /// A dummy value was found + DisallowedDummy { + /// The expression with the dummy value + source: &'a MirRelationExpr, + }, +} + +impl<'a> From for TypeError<'a> { + fn from(error: RecursionLimitError) -> Self { + TypeError::Recursion { error } + } +} + +type Context = BTreeMap>; + +/// Characterizes differences between relation types +/// +/// Each constructor indicates a reason why some type `sub` was not a subtype of another type `sup` +#[derive(Clone, Debug, Hash)] +pub enum ReprRelationTypeDifference { + /// `sub` and `sup` don't have the same number of columns + Length { + /// Length of `sub` + len_sub: usize, + /// Length of `sup` + len_sup: usize, + }, + /// `sub` and `sup` differ at the indicated column + Column { + /// The column at which `sub` and `sup` differ + col: usize, + /// The difference between `sub` and `sup` + diff: ReprColumnTypeDifference, + }, +} + +/// Characterizes differences between individual column types +/// +/// Each constructor indicates a reason why some type `sub` was not a subtype of another type `sup` +/// There may be multiple reasons, e.g., `sub` may be missing fields and have fields of different types +#[derive(Clone, Debug, Hash)] +pub enum ReprColumnTypeDifference { + /// The `ReprScalarBaseType` of `sub` doesn't match that of `sup` + NotSubtype { + /// Would-be subtype + sub: ReprScalarType, + /// Would-be supertype + sup: ReprScalarType, + }, + /// `sub` was nullable but `sup` was not + Nullability { + /// Would-be subtype + sub: ReprColumnType, + /// Would-be supertype + sup: ReprColumnType, + }, + /// Both `sub` and `sup` are a list, map, array, or range, but `sub`'s element type differed from `sup`s + ElementType { + /// The type constructor (list, array, etc.) + ctor: String, + /// The difference in the element type + element_type: Box, + }, + /// `sub` and `sup` are both records, but `sub` is missing fields present in `sup` + RecordMissingFields { + /// The missing fields + missing: Vec, + }, + /// `sub` and `sup` are both records, but some fields in `sub` are not subtypes of fields in `sup` + RecordFields { + /// The differences, by field + fields: Vec, + }, +} + +impl ReprRelationTypeDifference { + /// Returns the same type difference, but ignoring nullability + /// + /// Returns `None` when _all_ of the differences are due to nullability + pub fn ignore_nullability(self) -> Option { + use ReprRelationTypeDifference::*; + + match self { + Length { .. } => Some(self), + Column { col, diff } => diff.ignore_nullability().map(|diff| Column { col, diff }), + } + } +} + +impl ReprColumnTypeDifference { + /// Returns the same type difference, but ignoring nullability + /// + /// Returns `None` when _all_ of the differences are due to nullability + pub fn ignore_nullability(self) -> Option { + use ReprColumnTypeDifference::*; + + match self { + Nullability { .. } => None, + NotSubtype { .. } | RecordMissingFields { .. } => Some(self), + ElementType { ctor, element_type } => { + element_type + .ignore_nullability() + .map(|element_type| ElementType { + ctor, + element_type: Box::new(element_type), + }) + } + RecordFields { fields } => { + let fields = fields + .into_iter() + .flat_map(|diff| diff.ignore_nullability()) + .collect::>(); + + if fields.is_empty() { + None + } else { + Some(RecordFields { fields }) + } + } + } + } +} + +/// Returns a list of differences that make `sub` not a subtype of `sup` +/// +/// This function returns an empty list when `sub` is a subtype of `sup` +pub fn relation_subtype_difference( + sub: &[ReprColumnType], + sup: &[ReprColumnType], +) -> Vec { + let mut diffs = Vec::new(); + + if sub.len() != sup.len() { + diffs.push(ReprRelationTypeDifference::Length { + len_sub: sub.len(), + len_sup: sup.len(), + }); + + // TODO(mgree) we could do an edit-distance computation to report more errors + return diffs; + } + + diffs.extend( + sub.iter() + .zip_eq(sup.iter()) + .enumerate() + .flat_map(|(col, (sub_ty, sup_ty))| { + column_subtype_difference(sub_ty, sup_ty) + .into_iter() + .map(move |diff| ReprRelationTypeDifference::Column { col, diff }) + }), + ); + + diffs +} + +/// Returns a list of differences that make `sub` not a subtype of `sup` +/// +/// This function returns an empty list when `sub` is a subtype of `sup` +pub fn column_subtype_difference( + sub: &ReprColumnType, + sup: &ReprColumnType, +) -> Vec { + let mut diffs = scalar_subtype_difference(&sub.scalar_type, &sup.scalar_type); + + if sub.nullable && !sup.nullable { + diffs.push(ReprColumnTypeDifference::Nullability { + sub: sub.clone(), + sup: sup.clone(), + }); + } + + diffs +} + +/// Returns a list of differences that make `sub` not a subtype of `sup` +/// +/// This function returns an empty list when `sub` is a subtype of `sup` +pub fn scalar_subtype_difference( + sub: &ReprScalarType, + sup: &ReprScalarType, +) -> Vec { + use ReprScalarType::*; + + let mut diffs = Vec::new(); + + match (sub, sup) { + ( + List { + element_type: sub_elt, + .. + }, + List { + element_type: sup_elt, + .. + }, + ) + | ( + Map { + value_type: sub_elt, + .. + }, + Map { + value_type: sup_elt, + .. + }, + ) + | ( + Range { + element_type: sub_elt, + .. + }, + Range { + element_type: sup_elt, + .. + }, + ) + | (Array(sub_elt), Array(sup_elt)) => { + let ctor = format!("{:?}", ReprScalarBaseType::from(sub)); + diffs.extend( + scalar_subtype_difference(sub_elt, sup_elt) + .into_iter() + .map(|diff| ReprColumnTypeDifference::ElementType { + ctor: ctor.clone(), + element_type: Box::new(diff), + }), + ); + } + ( + Record { + fields: sub_fields, .. + }, + Record { + fields: sup_fields, .. + }, + ) => { + if sub_fields.len() != sup_fields.len() { + diffs.push(ReprColumnTypeDifference::NotSubtype { + sub: sub.clone(), + sup: sup.clone(), + }); + return diffs; + } + + for (sub_ty, sup_ty) in sub_fields.iter().zip_eq(sup_fields.iter()) { + diffs.extend(column_subtype_difference(sub_ty, sup_ty)); + } + } + (_, _) => { + if ReprScalarBaseType::from(sub) != ReprScalarBaseType::from(sup) { + diffs.push(ReprColumnTypeDifference::NotSubtype { + sub: sub.clone(), + sup: sup.clone(), + }) + } + } + }; + + diffs +} + +/// Unions `other` into `typ`, returning a list of differences on failure +/// +/// This function returns an empty list when `typ` and `other` are a union +pub fn scalar_union( + typ: &mut ReprScalarType, + other: &ReprScalarType, +) -> Vec { + use ReprScalarType::*; + + let mut diffs = Vec::new(); + + // precomputing to appease the borrow checker + let ctor = ReprScalarBaseType::from(&*typ); + match (typ, other) { + ( + List { + element_type: typ_elt, + }, + List { + element_type: other_elt, + }, + ) + | ( + Map { + value_type: typ_elt, + }, + Map { + value_type: other_elt, + }, + ) + | ( + Range { + element_type: typ_elt, + }, + Range { + element_type: other_elt, + }, + ) + | (Array(typ_elt), Array(other_elt)) => { + let res = scalar_union(typ_elt.as_mut(), other_elt.as_ref()); + diffs.extend( + res.into_iter() + .map(|diff| ReprColumnTypeDifference::ElementType { + ctor: format!("{ctor:?}"), + element_type: Box::new(diff), + }), + ); + } + ( + Record { fields: typ_fields }, + Record { + fields: other_fields, + }, + ) => { + if typ_fields.len() != other_fields.len() { + diffs.push(ReprColumnTypeDifference::NotSubtype { + sub: ReprScalarType::Record { + fields: typ_fields.clone(), + }, + sup: other.clone(), + }); + return diffs; + } + + for (typ_ty, other_ty) in typ_fields.iter_mut().zip_eq(other_fields.iter()) { + diffs.extend(column_union(typ_ty, other_ty)); + } + } + (typ, _) => { + if ctor != ReprScalarBaseType::from(other) { + diffs.push(ReprColumnTypeDifference::NotSubtype { + sub: typ.clone(), + sup: other.clone(), + }) + } + } + }; + + diffs +} + +/// Unions `other` into `typ`, returning a list of differences on failure +/// +/// This function returns an empty list when `typ` and `other` are a union +pub fn column_union( + typ: &mut ReprColumnType, + other: &ReprColumnType, +) -> Vec { + let diffs = scalar_union(&mut typ.scalar_type, &other.scalar_type); + + if diffs.is_empty() { + typ.nullable |= other.nullable; + } + + diffs +} + +/// Returns true when it is safe to treat a `sub` row as an `sup` row +/// +/// In particular, the core types must be equal, and if a column in `sup` is nullable, that column should also be nullable in `sub` +/// Conversely, it is okay to treat a known non-nullable column as nullable: `sub` may be nullable when `sup` is not +pub fn is_subtype_of(sub: &[ReprColumnType], sup: &[ReprColumnType]) -> bool { + if sub.len() != sup.len() { + return false; + } + + sub.iter().zip_eq(sup.iter()).all(|(got, known)| { + (!known.nullable || got.nullable) && got.scalar_type == known.scalar_type + }) +} + +/// Check that the visible type of each query has not been changed +#[derive(Debug)] +pub struct Typecheck { + /// The known types of the queries so far + ctx: SharedContext, + /// Whether or not this is the first run of the transform + disallow_new_globals: bool, + /// Whether or not to be strict about join equivalences having the same nullability + strict_join_equivalences: bool, + /// Whether or not to disallow dummy values + disallow_dummy: bool, + /// Recursion guard for checked recursion + recursion_guard: RecursionGuard, +} + +impl CheckedRecursion for Typecheck { + fn recursion_guard(&self) -> &RecursionGuard { + &self.recursion_guard + } +} + +impl Typecheck { + /// Creates a typechecking consistency checking pass using a given shared context + pub fn new(ctx: SharedContext) -> Self { + Self { + ctx, + disallow_new_globals: false, + strict_join_equivalences: false, + disallow_dummy: false, + recursion_guard: RecursionGuard::with_limit(RECURSION_LIMIT), + } + } + + /// New non-transient global IDs will be treated as an error + /// + /// Only turn this on after the context has been appropriately populated by, e.g., an earlier run + pub fn disallow_new_globals(mut self) -> Self { + self.disallow_new_globals = true; + self + } + + /// Equivalence classes in joins must not only agree on scalar type, but also on nullability + /// + /// Only turn this on before `JoinImplementation` + pub fn strict_join_equivalences(mut self) -> Self { + self.strict_join_equivalences = true; + + self + } + + /// Disallow dummy values + pub fn disallow_dummy(mut self) -> Self { + self.disallow_dummy = true; + self + } + + /// Returns the type of a relation expression or a type error. + /// + /// This function is careful to check validity, not just find out the type. + /// + /// It should be linear in the size of the AST. + /// + /// ??? should we also compute keys and return a `ReprRelationType`? + /// ggevay: Checking keys would have the same problem as checking nullability: key inference + /// is very heuristic (even more so than nullability inference), so it's almost impossible to + /// reliably keep it stable across transformations. + pub fn typecheck<'a>( + &self, + expr: &'a MirRelationExpr, + ctx: &Context, + ) -> Result, TypeError<'a>> { + use MirRelationExpr::*; + + self.checked_recur(|tc| match expr { + Constant { typ, rows } => { + if let Ok(rows) = rows { + for (row, _id) in rows { + let datums = row.unpack(); + + // correct length + if datums.len() != typ.column_types.len() { + return Err(TypeError::BadConstantRow { + source: expr, + got: row.clone(), + expected: typ.column_types.iter().map(ReprColumnType::from).collect(), + }); + } + + // correct types + if datums + .iter() + .zip_eq(typ.column_types.iter()) + .any(|(d, ty)| d != &mz_repr::Datum::Dummy && !d.is_instance_of_sql(ty)) + { + return Err(TypeError::BadConstantRow { + source: expr, + got: row.clone(), + expected: typ.column_types.iter().map(ReprColumnType::from).collect(), + }); + } + + if self.disallow_dummy && datums.iter().any(|d| d == &mz_repr::Datum::Dummy) { + return Err(TypeError::DisallowedDummy { + source: expr, + }); + } + } + } + + Ok(typ.column_types.iter().map(ReprColumnType::from).collect_vec()) + } + Get { typ, id, .. } => { + if let Id::Global(_global_id) = id { + if !ctx.contains_key(id) { + // TODO(mgree) pass QueryContext through to check these types + return Ok(typ.column_types.iter().map(ReprColumnType::from).collect_vec()); + } + } + + let ctx_typ = ctx.get(id).ok_or_else(|| TypeError::Unbound { + source: expr, + id: id.clone(), + typ: ReprRelationType::from(typ), + })?; + + let column_types = typ.column_types.iter().map(ReprColumnType::from).collect_vec(); + + // covariant: the ascribed type must be a subtype of the actual type in the context + let diffs = relation_subtype_difference(&column_types, ctx_typ).into_iter().flat_map(|diff| diff.ignore_nullability()).collect::>(); + + if !diffs.is_empty() { + return Err(TypeError::MismatchColumns { + source: expr, + got: column_types, + expected: ctx_typ.clone(), + diffs, + message: "annotation did not match context type".to_string(), + }); + } + + Ok(column_types) + } + Project { input, outputs } => { + let t_in = tc.typecheck(input, ctx)?; + + for x in outputs { + if *x >= t_in.len() { + return Err(TypeError::BadProject { + source: expr, + got: outputs.clone(), + input_type: t_in, + }); + } + } + + Ok(outputs.iter().map(|col| t_in[*col].clone()).collect()) + } + Map { input, scalars } => { + let mut t_in = tc.typecheck(input, ctx)?; + + for scalar_expr in scalars.iter() { + t_in.push(tc.typecheck_scalar(scalar_expr, expr, &t_in)?); + + if self.disallow_dummy && scalar_expr.contains_dummy() { + return Err(TypeError::DisallowedDummy { + source: expr, + }); + } + } + + Ok(t_in) + } + FlatMap { input, func, exprs } => { + let mut t_in = tc.typecheck(input, ctx)?; + + let mut t_exprs = Vec::with_capacity(exprs.len()); + for scalar_expr in exprs { + t_exprs.push(tc.typecheck_scalar(scalar_expr, expr, &t_in)?); + + if self.disallow_dummy && scalar_expr.contains_dummy() { + return Err(TypeError::DisallowedDummy { + source: expr, + }); + } + } + // TODO(mgree) check t_exprs agrees with `func`'s input type + + let t_out = func.output_type().column_types.iter().map(ReprColumnType::from).collect_vec(); + + // FlatMap extends the existing columns + t_in.extend(t_out); + Ok(t_in) + } + Filter { input, predicates } => { + let mut t_in = tc.typecheck(input, ctx)?; + + // Set as nonnull any columns where null values would cause + // any predicate to evaluate to null. + for column in non_nullable_columns(predicates) { + t_in[column].nullable = false; + } + + for scalar_expr in predicates { + let t = tc.typecheck_scalar(scalar_expr, expr, &t_in)?; + + // filter condition must be boolean + // ignoring nullability: null is treated as false + // NB this behavior is slightly different from columns_match (for which we would set nullable to false in the expected type) + if t.scalar_type != ReprScalarType::Bool { + let sub = t.scalar_type.clone(); + + return Err(TypeError::MismatchColumn { + source: expr, + got: t, + expected: ReprColumnType { + scalar_type: ReprScalarType::Bool, + nullable: true, + }, + diffs: vec![ReprColumnTypeDifference::NotSubtype { sub, sup: ReprScalarType::Bool }], + message: "expected boolean condition".to_string(), + }); + } + + if self.disallow_dummy && scalar_expr.contains_dummy() { + return Err(TypeError::DisallowedDummy { + source: expr, + }); + } + } + + Ok(t_in) + } + Join { + inputs, + equivalences, + implementation, + } => { + let mut t_in_global = Vec::new(); + let mut t_in_local = vec![Vec::new(); inputs.len()]; + + for (i, input) in inputs.iter().enumerate() { + let input_t = tc.typecheck(input, ctx)?; + t_in_global.extend(input_t.clone()); + t_in_local[i] = input_t; + } + + for eq_class in equivalences { + let mut t_exprs: Vec = Vec::with_capacity(eq_class.len()); + + let mut all_nullable = true; + + for scalar_expr in eq_class { + // Note: the equivalences have global column references + let t_expr = tc.typecheck_scalar(scalar_expr, expr, &t_in_global)?; + + if !t_expr.nullable { + all_nullable = false; + } + + if let Some(t_first) = t_exprs.get(0) { + let diffs = scalar_subtype_difference(&t_expr.scalar_type, &t_first.scalar_type); + if !diffs.is_empty() { + return Err(TypeError::MismatchColumn { + source: expr, + got: t_expr, + expected: t_first.clone(), + diffs, + message: "equivalence class members have different scalar types".to_string(), + }); + } + + // equivalences may or may not match on nullability + // before JoinImplementation runs, nullability should match. + // but afterwards, some nulls may appear that are actually being filtered out elsewhere + if self.strict_join_equivalences { + if t_expr.nullable != t_first.nullable { + let sub = t_expr.clone(); + let sup = t_first.clone(); + + let err = TypeError::MismatchColumn { + source: expr, + got: t_expr.clone(), + expected: t_first.clone(), + diffs: vec![ReprColumnTypeDifference::Nullability { sub, sup }], + message: "equivalence class members have different nullability (and join equivalence checking is strict)".to_string(), + }; + + // TODO(mgree) this imprecision should be resolved, but we need to fix the optimizer + ::tracing::debug!("{err}"); + } + } + } + + if self.disallow_dummy && scalar_expr.contains_dummy() { + return Err(TypeError::DisallowedDummy { + source: expr, + }); + } + + t_exprs.push(t_expr); + } + + if self.strict_join_equivalences && all_nullable { + let err = TypeError::BadJoinEquivalence { + source: expr, + got: t_exprs, + message: "all expressions were nullable (and join equivalence checking is strict)".to_string(), + }; + + // TODO(mgree) this imprecision should be resolved, but we need to fix the optimizer + ::tracing::debug!("{err}"); + } + } + + // check that the join implementation is consistent + match implementation { + JoinImplementation::Differential((start_idx, first_key, _), others) => { + if let Some(key) = first_key { + for k in key { + let _ = tc.typecheck_scalar(k, expr, &t_in_local[*start_idx])?; + } + } + + for (idx, key, _) in others { + for k in key { + let _ = tc.typecheck_scalar(k, expr, &t_in_local[*idx])?; + } + } + } + JoinImplementation::DeltaQuery(plans) => { + for plan in plans { + for (idx, key, _) in plan { + for k in key { + let _ = tc.typecheck_scalar(k, expr, &t_in_local[*idx])?; + } + } + } + } + JoinImplementation::IndexedFilter(_coll_id, _idx_id, key, consts) => { + let typ: Vec = key + .iter() + .map(|k| tc.typecheck_scalar(k, expr, &t_in_global)) + .collect::, TypeError>>()?; + + for row in consts { + let datums = row.unpack(); + + // correct length + if datums.len() != typ.len() { + return Err(TypeError::BadConstantRow { + source: expr, + got: row.clone(), + expected: typ, + }); + } + + // correct types + if datums + .iter() + .zip_eq(typ.iter()) + .any(|(d, ty)| d != &mz_repr::Datum::Dummy && !d.is_instance_of(ty)) + { + return Err(TypeError::BadConstantRow { + source: expr, + got: row.clone(), + expected: typ, + }); + } + } + } + JoinImplementation::Unimplemented => (), + } + + Ok(t_in_global) + } + Reduce { + input, + group_key, + aggregates, + monotonic: _, + expected_group_size: _, + } => { + let t_in = tc.typecheck(input, ctx)?; + + let mut t_out = group_key + .iter() + .map(|scalar_expr| tc.typecheck_scalar(scalar_expr, expr, &t_in)) + .collect::, _>>()?; + + if self.disallow_dummy && group_key.iter().any(|scalar_expr| scalar_expr.contains_dummy()) { + return Err(TypeError::DisallowedDummy { + source: expr, + }); + } + + for agg in aggregates { + t_out.push(tc.typecheck_aggregate(agg, expr, &t_in)?); + } + + Ok(t_out) + } + TopK { + input, + group_key, + order_key, + limit: _, + offset: _, + monotonic: _, + expected_group_size: _, + } => { + let t_in = tc.typecheck(input, ctx)?; + + for &k in group_key { + if k >= t_in.len() { + return Err(TypeError::BadTopKGroupKey { + source: expr, + k, + input_type: t_in, + }); + } + } + + for order in order_key { + if order.column >= t_in.len() { + return Err(TypeError::BadTopKOrdering { + source: expr, + order: order.clone(), + input_type: t_in, + }); + } + } + + Ok(t_in) + } + Negate { input } => tc.typecheck(input, ctx), + Threshold { input } => tc.typecheck(input, ctx), + Union { base, inputs } => { + let mut t_base = tc.typecheck(base, ctx)?; + + for input in inputs { + let t_input = tc.typecheck(input, ctx)?; + + let len_sub = t_base.len(); + let len_sup = t_input.len(); + if len_sub != len_sup { + return Err(TypeError::MismatchColumns { + source: expr, + got: t_base.clone(), + expected: t_input, + diffs: vec![ReprRelationTypeDifference::Length { + len_sub, + len_sup, + }], + message: "Union branches have different numbers of columns".to_string(), + }); + } + + for (base_col, input_col) in t_base.iter_mut().zip_eq(t_input) { + let diffs = column_union(base_col, &input_col); + if !diffs.is_empty() { + return Err(TypeError::MismatchColumn { + source: expr, + got: input_col, + expected: base_col.clone(), + diffs, + message: + "couldn't compute union of column types in Union" + .to_string(), + }); + } + + } + } + + Ok(t_base) + } + Let { id, value, body } => { + let t_value = tc.typecheck(value, ctx)?; + + let binding = Id::Local(*id); + if ctx.contains_key(&binding) { + return Err(TypeError::Shadowing { + source: expr, + id: binding, + }); + } + + let mut body_ctx = ctx.clone(); + body_ctx.insert(Id::Local(*id), t_value); + + tc.typecheck(body, &body_ctx) + } + LetRec { ids, values, body, limits: _ } => { + if ids.len() != values.len() { + return Err(TypeError::BadLetRecBindings { source: expr }); + } + + // temporary hack: steal info from the Gets inside to learn the expected types + // if no get occurs in any definition or the body, that means that relation is dead code (which is okay) + let mut ctx = ctx.clone(); + // calling tc.collect_recursive_variable_types(expr, ...) triggers a panic due to nested letrecs with shadowing IDs + for inner_expr in values.iter().chain(std::iter::once(body.as_ref())) { + tc.collect_recursive_variable_types(inner_expr, ids, &mut ctx)?; + } + + for (id, value) in ids.iter().zip_eq(values.iter()) { + let typ = tc.typecheck(value, &ctx)?; + + let id = Id::Local(id.clone()); + if let Some(ctx_typ) = ctx.get_mut(&id) { + for (base_col, input_col) in ctx_typ.iter_mut().zip_eq(typ) { + // we expect an EXACT match, but don't care about nullability + let diffs = column_union(base_col, &input_col); + if !diffs.is_empty() { + return Err(TypeError::MismatchColumn { + source: expr, + got: input_col, + expected: base_col.clone(), + diffs, + message: + "couldn't compute union of column types in LetRec" + .to_string(), + }) + } + } + } else { + // dead code: no `Get` references this relation anywhere. we record the type anyway + ctx.insert(id, typ); + } + } + + tc.typecheck(body, &ctx) + } + ArrangeBy { input, keys } => { + let t_in = tc.typecheck(input, ctx)?; + + for key in keys { + for k in key { + let _ = tc.typecheck_scalar(k, expr, &t_in)?; + } + } + + Ok(t_in) + } + }) + } + + /// Traverses a term to collect the types of given ids. + /// + /// LetRec doesn't have type info stored in it. Until we change the MIR to track that information explicitly, we have to rebuild it from looking at the term. + fn collect_recursive_variable_types<'a>( + &self, + expr: &'a MirRelationExpr, + ids: &[LocalId], + ctx: &mut Context, + ) -> Result<(), TypeError<'a>> { + use MirRelationExpr::*; + + self.checked_recur(|tc| { + match expr { + Get { + id: Id::Local(id), + typ, + .. + } => { + if !ids.contains(id) { + return Ok(()); + } + + let id = Id::Local(id.clone()); + if let Some(ctx_typ) = ctx.get_mut(&id) { + let typ = typ + .column_types + .iter() + .map(ReprColumnType::from) + .collect_vec(); + + if ctx_typ.len() != typ.len() { + let diffs = relation_subtype_difference(&typ, ctx_typ); + + return Err(TypeError::MismatchColumns { + source: expr, + got: typ, + expected: ctx_typ.clone(), + diffs, + message: "environment and type annotation did not match" + .to_string(), + }); + } + + for (base_col, input_col) in ctx_typ.iter_mut().zip_eq(typ) { + let diffs = column_union(base_col, &input_col); + if !diffs.is_empty() { + return Err(TypeError::MismatchColumn { + source: expr, + got: input_col, + expected: base_col.clone(), + diffs, + message: + "couldn't compute union of column types in Get and context" + .to_string(), + }); + } + } + } else { + ctx.insert( + id, + typ.column_types + .iter() + .map(ReprColumnType::from) + .collect_vec(), + ); + } + } + Get { + id: Id::Global(..), .. + } + | Constant { .. } => (), + Let { id, value, body } => { + tc.collect_recursive_variable_types(value, ids, ctx)?; + + // we've shadowed the id + if ids.contains(id) { + return Err(TypeError::Shadowing { + source: expr, + id: Id::Local(*id), + }); + } + + tc.collect_recursive_variable_types(body, ids, ctx)?; + } + LetRec { + ids: inner_ids, + values, + body, + limits: _, + } => { + for inner_id in inner_ids { + if ids.contains(inner_id) { + return Err(TypeError::Shadowing { + source: expr, + id: Id::Local(*inner_id), + }); + } + } + + for value in values { + tc.collect_recursive_variable_types(value, ids, ctx)?; + } + + tc.collect_recursive_variable_types(body, ids, ctx)?; + } + Project { input, .. } + | Map { input, .. } + | FlatMap { input, .. } + | Filter { input, .. } + | Reduce { input, .. } + | TopK { input, .. } + | Negate { input } + | Threshold { input } + | ArrangeBy { input, .. } => { + tc.collect_recursive_variable_types(input, ids, ctx)?; + } + Join { inputs, .. } => { + for input in inputs { + tc.collect_recursive_variable_types(input, ids, ctx)?; + } + } + Union { base, inputs } => { + tc.collect_recursive_variable_types(base, ids, ctx)?; + + for input in inputs { + tc.collect_recursive_variable_types(input, ids, ctx)?; + } + } + } + + Ok(()) + }) + } + + fn typecheck_scalar<'a>( + &self, + expr: &'a MirScalarExpr, + source: &'a MirRelationExpr, + column_types: &[ReprColumnType], + ) -> Result> { + use MirScalarExpr::*; + + self.checked_recur(|tc| match expr { + Column(i, _) => match column_types.get(*i) { + Some(ty) => Ok(ty.clone()), + None => Err(TypeError::NoSuchColumn { + source, + expr, + col: *i, + }), + }, + Literal(row, typ) => { + let typ = ReprColumnType::from(typ); + if let Ok(row) = row { + let datums = row.unpack(); + + if datums.len() != 1 + || (datums[0] != mz_repr::Datum::Dummy && !datums[0].is_instance_of(&typ)) + { + return Err(TypeError::BadConstantRow { + source, + got: row.clone(), + expected: vec![typ], + }); + } + } + + Ok(typ) + } + CallUnmaterializable(func) => Ok(ReprColumnType::from(&func.output_type())), + CallUnary { expr, func } => { + let typ_in = tc.typecheck_scalar(expr, source, column_types)?; + let typ_out = func.output_type(SqlColumnType::from_repr(&typ_in)); + Ok(ReprColumnType::from(&typ_out)) + } + CallBinary { expr1, expr2, func } => { + let typ_in1 = tc.typecheck_scalar(expr1, source, column_types)?; + let typ_in2 = tc.typecheck_scalar(expr2, source, column_types)?; + let typ_out = func.output_type( + SqlColumnType::from_repr(&typ_in1), + SqlColumnType::from_repr(&typ_in2), + ); + Ok(ReprColumnType::from(&typ_out)) + } + CallVariadic { exprs, func } => Ok(ReprColumnType::from( + &func.output_type( + exprs + .iter() + .map(|e| { + tc.typecheck_scalar(e, source, column_types) + .map(|typ| SqlColumnType::from_repr(&typ)) + }) + .collect::, TypeError>>()?, + ), + )), + If { cond, then, els } => { + let cond_type = tc.typecheck_scalar(cond, source, column_types)?; + + // condition must be boolean + // ignoring nullability: null is treated as false + // NB this behavior is slightly different from columns_match (for which we would set nullable to false in the expected type) + if cond_type.scalar_type != ReprScalarType::Bool { + let sub = cond_type.scalar_type.clone(); + + return Err(TypeError::MismatchColumn { + source, + got: cond_type, + expected: ReprColumnType { + scalar_type: ReprScalarType::Bool, + nullable: true, + }, + diffs: vec![ReprColumnTypeDifference::NotSubtype { + sub, + sup: ReprScalarType::Bool, + }], + message: "expected boolean condition".to_string(), + }); + } + + let mut then_type = tc.typecheck_scalar(then, source, column_types)?; + let else_type = tc.typecheck_scalar(els, source, column_types)?; + + let diffs = column_union(&mut then_type, &else_type); + if !diffs.is_empty() { + return Err(TypeError::MismatchColumn { + source, + got: then_type, + expected: else_type, + diffs, + message: "couldn't compute union of column types for If".to_string(), + }); + } + + Ok(then_type) + } + }) + } + + /// Typecheck an `AggregateExpr` + pub fn typecheck_aggregate<'a>( + &self, + expr: &'a AggregateExpr, + source: &'a MirRelationExpr, + column_types: &[ReprColumnType], + ) -> Result> { + self.checked_recur(|tc| { + let t_in = tc.typecheck_scalar(&expr.expr, source, column_types)?; + + // TODO check that t_in is actually acceptable for `func` + + Ok(ReprColumnType::from( + &expr.func.output_type(SqlColumnType::from_repr(&t_in)), + )) + }) + } +} + +/// Detailed type error logging as a warning, with failures in CI and a logged error in production +/// +/// type_error(severity, ...) logs a type warning; if `severity` is `true`, it will also log an error (visible in Sentry) +macro_rules! type_error { + ($severity:expr, $($arg:tt)+) => {{ + if $severity { + soft_panic_or_log!($($arg)+); + } else { + ::tracing::debug!($($arg)+); + } + }} +} + +impl crate::Transform for Typecheck { + fn name(&self) -> &'static str { + "Typecheck" + } + + fn actually_perform_transform( + &self, + relation: &mut MirRelationExpr, + transform_ctx: &mut crate::TransformCtx, + ) -> Result<(), crate::TransformError> { + let mut typecheck_ctx = self.ctx.lock().expect("typecheck ctx"); + + let expected = transform_ctx + .global_id + .map_or_else(|| None, |id| typecheck_ctx.get(&Id::Global(id))); + + if let Some(id) = transform_ctx.global_id { + if self.disallow_new_globals + && expected.is_none() + && transform_ctx.global_id.is_some() + && !id.is_transient() + { + type_error!( + false, // not severe + "type warning: new non-transient global id {id}\n{}", + relation.pretty() + ); + } + } + + let got = self.typecheck(relation, &typecheck_ctx); + + let humanizer = mz_repr::explain::DummyHumanizer; + + match (got, expected) { + (Ok(got), Some(expected)) => { + let id = transform_ctx.global_id.unwrap(); + + // contravariant: global types can be updated + let diffs = relation_subtype_difference(expected, &got); + if !diffs.is_empty() { + // SEVERE only if got and expected have true differences, not just nullability + let severity = diffs + .iter() + .any(|diff| diff.clone().ignore_nullability().is_some()); + + let err = TypeError::MismatchColumns { + source: relation, + got, + expected: expected.clone(), + diffs, + message: format!( + "a global id {id}'s type changed (was `expected` which should be a subtype of `got`) " + ), + }; + + type_error!(severity, "type error in known global id {id}:\n{err}"); + } + } + (Ok(got), None) => { + if let Some(id) = transform_ctx.global_id { + typecheck_ctx.insert(Id::Global(id), got); + } + } + (Err(err), _) => { + let (expected, binding) = match expected { + Some(expected) => { + let id = transform_ctx.global_id.unwrap(); + ( + format!("expected type {}\n", columns_pretty(expected, &humanizer)), + format!("known global id {id}"), + ) + } + None => ("".to_string(), "transient query".to_string()), + }; + + type_error!( + true, // SEVERE: the transformed code is inconsistent + "type error in {binding}:\n{err}\n{expected}{}", + relation.pretty() + ); + } + } + + Ok(()) + } +} + +/// Prints a type prettily with a given `ExprHumanizer` +pub fn columns_pretty(cols: &[ReprColumnType], humanizer: &H) -> String +where + H: ExprHumanizer, +{ + let mut s = String::with_capacity(2 + 3 * cols.len()); + + s.push('('); + + let mut it = cols.iter().peekable(); + while let Some(col) = it.next() { + s.push_str(&humanizer.humanize_column_type_repr(col, false)); + + if it.peek().is_some() { + s.push_str(", "); + } + } + + s.push(')'); + + s +} + +impl ReprRelationTypeDifference { + /// Pretty prints a type difference + /// + /// Always indents two spaces + pub fn humanize(&self, h: &H, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result + where + H: ExprHumanizer, + { + use ReprRelationTypeDifference::*; + match self { + Length { len_sub, len_sup } => { + writeln!( + f, + " number of columns do not match ({len_sub} != {len_sup})" + ) + } + Column { col, diff } => { + writeln!(f, " column {col} differs:")?; + diff.humanize(4, h, f) + } + } + } +} + +impl ReprColumnTypeDifference { + /// Pretty prints a type difference at a given indentation level + pub fn humanize( + &self, + indent: usize, + h: &H, + f: &mut std::fmt::Formatter<'_>, + ) -> std::fmt::Result + where + H: ExprHumanizer, + { + use ReprColumnTypeDifference::*; + + // indent + write!(f, "{:indent$}", "")?; + + match self { + NotSubtype { sub, sup } => { + let sub = h.humanize_scalar_type_repr(sub, false); + let sup = h.humanize_scalar_type_repr(sup, false); + + writeln!(f, "{sub} is a not a subtype of {sup}") + } + Nullability { sub, sup } => { + let sub = h.humanize_column_type_repr(sub, false); + let sup = h.humanize_column_type_repr(sup, false); + + writeln!(f, "{sub} is nullable but {sup} is not") + } + ElementType { ctor, element_type } => { + writeln!(f, "{ctor} element types differ:")?; + + element_type.humanize(indent + 2, h, f) + } + RecordMissingFields { missing } => { + write!(f, "missing column fields:")?; + for col in missing { + write!(f, " {col}")?; + } + f.write_char('\n') + } + RecordFields { fields } => { + writeln!(f, "{} record fields differ:", fields.len())?; + + for (i, diff) in fields.iter().enumerate() { + writeln!(f, "{:indent$} field {i}:", "")?; + diff.humanize(indent + 4, h, f)?; + } + Ok(()) + } + } + } +} + +/// Wrapper struct for a `Display` instance for `TypeError`s with a given `ExprHumanizer` +#[allow(missing_debug_implementations)] +pub struct TypeErrorHumanizer<'a, 'b, H> +where + H: ExprHumanizer, +{ + err: &'a TypeError<'a>, + humanizer: &'b H, +} + +impl<'a, 'b, H> TypeErrorHumanizer<'a, 'b, H> +where + H: ExprHumanizer, +{ + /// Create a `Display`-shim struct for a given `TypeError`/`ExprHumanizer` pair + pub fn new(err: &'a TypeError, humanizer: &'b H) -> Self { + Self { err, humanizer } + } +} + +impl<'a, 'b, H> std::fmt::Display for TypeErrorHumanizer<'a, 'b, H> +where + H: ExprHumanizer, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.err.humanize(self.humanizer, f) + } +} + +impl<'a> std::fmt::Display for TypeError<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + TypeErrorHumanizer { + err: self, + humanizer: &DummyHumanizer, + } + .fmt(f) + } +} + +impl<'a> TypeError<'a> { + /// The source of the type error + pub fn source(&self) -> Option<&'a MirRelationExpr> { + use TypeError::*; + match self { + Unbound { source, .. } + | NoSuchColumn { source, .. } + | MismatchColumn { source, .. } + | MismatchColumns { source, .. } + | BadConstantRow { source, .. } + | BadProject { source, .. } + | BadJoinEquivalence { source, .. } + | BadTopKGroupKey { source, .. } + | BadTopKOrdering { source, .. } + | BadLetRecBindings { source } + | Shadowing { source, .. } + | DisallowedDummy { source, .. } => Some(source), + Recursion { .. } => None, + } + } + + fn humanize(&self, humanizer: &H, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result + where + H: ExprHumanizer, + { + if let Some(source) = self.source() { + writeln!(f, "In the MIR term:\n{}\n", source.pretty())?; + } + + use TypeError::*; + match self { + Unbound { source: _, id, typ } => { + let typ = columns_pretty(&typ.column_types, humanizer); + writeln!(f, "{id} is unbound\ndeclared type {typ}")? + } + NoSuchColumn { + source: _, + expr, + col, + } => writeln!(f, "{expr} references non-existent column {col}")?, + MismatchColumn { + source: _, + got, + expected, + diffs, + message, + } => { + let got = humanizer.humanize_column_type_repr(got, false); + let expected = humanizer.humanize_column_type_repr(expected, false); + writeln!( + f, + "mismatched column types: {message}\n got {got}\nexpected {expected}" + )?; + + for diff in diffs { + diff.humanize(2, humanizer, f)?; + } + } + MismatchColumns { + source: _, + got, + expected, + diffs, + message, + } => { + let got = columns_pretty(got, humanizer); + let expected = columns_pretty(expected, humanizer); + + writeln!( + f, + "mismatched relation types: {message}\n got {got}\nexpected {expected}" + )?; + + for diff in diffs { + diff.humanize(humanizer, f)?; + } + } + BadConstantRow { + source: _, + got, + expected, + } => { + let expected = columns_pretty(expected, humanizer); + + writeln!( + f, + "bad constant row\n got {got}\nexpected row of type {expected}" + )? + } + BadProject { + source: _, + got, + input_type, + } => { + let input_type = columns_pretty(input_type, humanizer); + + writeln!( + f, + "projection of non-existant columns {got:?} from type {input_type}" + )? + } + BadJoinEquivalence { + source: _, + got, + message, + } => { + let got = columns_pretty(got, humanizer); + + writeln!(f, "bad join equivalence {got}: {message}")? + } + BadTopKGroupKey { + source: _, + k, + input_type, + } => { + let input_type = columns_pretty(input_type, humanizer); + + writeln!( + f, + "TopK group key component references invalid column {k} in columns: {input_type}" + )? + } + BadTopKOrdering { + source: _, + order, + input_type, + } => { + let col = order.column; + let num_cols = input_type.len(); + let are = if num_cols == 1 { "is" } else { "are" }; + let s = if num_cols == 1 { "" } else { "s" }; + let input_type = columns_pretty(input_type, humanizer); + + // TODO(cloud#8196) + let mode = HumanizedExplain::new(false); + let order = mode.expr(order, None); + + writeln!( + f, + "TopK ordering {order} references invalid column {col}\nthere {are} {num_cols} column{s}: {input_type}" + )? + } + BadLetRecBindings { source: _ } => { + writeln!(f, "LetRec ids and definitions don't line up")? + } + Shadowing { source: _, id } => writeln!(f, "id {id} is shadowed")?, + DisallowedDummy { source: _ } => writeln!(f, "contains a dummy value")?, + Recursion { error } => writeln!(f, "{error}")?, + } + + Ok(()) + } +} diff --git a/src/transform/src/typecheck.rs b/src/transform/src/typecheck.rs index 9f7eadf965591..dabcf03b9587b 100644 --- a/src/transform/src/typecheck.rs +++ b/src/transform/src/typecheck.rs @@ -22,7 +22,7 @@ use mz_expr::{ use mz_ore::soft_panic_or_log; use mz_ore::stack::{CheckedRecursion, RecursionGuard, RecursionLimitError}; use mz_repr::explain::{DummyHumanizer, ExprHumanizer}; -use mz_repr::{ColumnName, Row, ScalarBaseType, SqlColumnType, SqlRelationType, SqlScalarType}; +use mz_repr::{ColumnName, Row, SqlColumnType, SqlRelationType, SqlScalarBaseType, SqlScalarType}; /// Typechecking contexts as shared by various typechecking passes. /// @@ -366,7 +366,7 @@ pub fn scalar_subtype_difference( }, ) | (Array(sub_elt), Array(sup_elt)) => { - let ctor = format!("{:?}", ScalarBaseType::from(sub)); + let ctor = format!("{:?}", SqlScalarBaseType::from(sub)); diffs.extend( scalar_subtype_difference(sub_elt, sup_elt) .into_iter() @@ -405,7 +405,7 @@ pub fn scalar_subtype_difference( } (_, _) => { // TODO(mgree) confirm that we don't want to allow numeric subtyping - if ScalarBaseType::from(sub) != ScalarBaseType::from(sup) { + if SqlScalarBaseType::from(sub) != SqlScalarBaseType::from(sup) { diffs.push(SqlColumnTypeDifference::NotSubtype { sub: sub.clone(), sup: sup.clone(), diff --git a/src/transform/tests/test_runner.rs b/src/transform/tests/test_runner.rs index 2ccba4a88435a..2cf048eaeeacf 100644 --- a/src/transform/tests/test_runner.rs +++ b/src/transform/tests/test_runner.rs @@ -34,7 +34,7 @@ mod tests { use mz_transform::dataflow::{ DataflowMetainfo, optimize_dataflow_demand_inner, optimize_dataflow_filters_inner, }; - use mz_transform::{Optimizer, Transform, TransformCtx, typecheck}; + use mz_transform::{Optimizer, Transform, TransformCtx, reprtypecheck, typecheck}; use proc_macro2::TokenTree; use crate::explain::Explainable; @@ -51,10 +51,12 @@ mod tests { fn full_transform_list() -> Vec> { let features = OptimizerFeatures::default(); let typecheck_ctx = typecheck::empty_context(); + let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, &typecheck_ctx, + &repr_typecheck_ctx, &mut df_meta, None, Some(TEST_GLOBAL_ID), @@ -185,10 +187,12 @@ mod tests { ) -> Result { let features = OptimizerFeatures::default(); let typecheck_ctx = typecheck::empty_context(); + let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, &typecheck_ctx, + &repr_typecheck_ctx, &mut df_meta, None, Some(TEST_GLOBAL_ID), @@ -369,10 +373,12 @@ mod tests { if test_type == TestType::Opt { let features = OptimizerFeatures::default(); let typecheck_ctx = typecheck::empty_context(); + let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, &typecheck_ctx, + &repr_typecheck_ctx, &mut df_meta, None, Some(TEST_GLOBAL_ID), @@ -410,10 +416,12 @@ mod tests { if test_type == TestType::Opt { let features = OptimizerFeatures::default(); let typecheck_ctx = typecheck::empty_context(); + let repr_typecheck_ctx = reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = TransformCtx::local( &features, &typecheck_ctx, + &repr_typecheck_ctx, &mut df_meta, None, Some(TEST_GLOBAL_ID), diff --git a/src/transform/tests/test_transforms.rs b/src/transform/tests/test_transforms.rs index 3aca43e434887..451a0a024b4db 100644 --- a/src/transform/tests/test_transforms.rs +++ b/src/transform/tests/test_transforms.rs @@ -18,7 +18,6 @@ use mz_repr::explain::{ExplainConfig, PlanRenderingContext}; use mz_repr::optimize::{OptimizerFeatures, OverrideFrom}; use mz_transform::analysis::annotate_plan; use mz_transform::dataflow::DataflowMetainfo; -use mz_transform::typecheck::TypeErrorHumanizer; const TEST_GLOBAL_ID: GlobalId = GlobalId::Transient(1234567); @@ -118,8 +117,8 @@ fn handle_typecheck( }; // Apply the transformation, returning early on TransformError. - use mz_transform::typecheck::{Typecheck, columns_pretty}; - let ctx = mz_transform::typecheck::empty_context(); + use mz_transform::reprtypecheck::{Typecheck, columns_pretty}; + let ctx = mz_transform::reprtypecheck::empty_context(); let tc = Typecheck::new(std::sync::Arc::clone(&ctx)); @@ -129,7 +128,9 @@ fn handle_typecheck( Ok(typ) => format!("{}\n", columns_pretty(&typ, catalog).trim()), Err(err) => format!( "{}\n", - TypeErrorHumanizer::new(&err, catalog).to_string().trim(), + mz_transform::reprtypecheck::TypeErrorHumanizer::new(&err, catalog) + .to_string() + .trim(), ), } } @@ -266,10 +267,12 @@ fn apply_transform( features.enable_dequadratic_eqprop_map = true; features.enable_eq_classes_withholding_errors = true; let typecheck_ctx = mz_transform::typecheck::empty_context(); + let repr_typecheck_ctx = mz_transform::reprtypecheck::empty_context(); let mut df_meta = DataflowMetainfo::default(); let mut transform_ctx = mz_transform::TransformCtx::local( &features, &typecheck_ctx, + &repr_typecheck_ctx, &mut df_meta, None, Some(TEST_GLOBAL_ID), diff --git a/src/transform/tests/test_transforms/typecheck.spec b/src/transform/tests/test_transforms/typecheck.spec index b2879073feeb6..229c71f66407b 100644 --- a/src/transform/tests/test_transforms/typecheck.spec +++ b/src/transform/tests/test_transforms/typecheck.spec @@ -222,11 +222,10 @@ Return Get l1 -mismatched column types: couldn't compute union of column types in let rec: Can't union types: Bool and Int64 +mismatched column types: couldn't compute union of column types in LetRec got Int64 expected Bool? Bool is a not a subtype of Int64 - Bool? is nullable but Int64 is not ---- ---- @@ -413,7 +412,7 @@ Reduce group_by=[#0] aggregates=[max(#1), min(#1), sum(distinct #1)] monotonic e - ("a", 2) - ("a", 4) ---- -(String, Int64, Int64, Numeric { max_scale: Some(NumericMaxScale(0)) }) +(String, Int64, Int64, Numeric { max_scale: None }) # empty output type (no keys!) typecheck diff --git a/test/sqllogictest/types.slt b/test/sqllogictest/types.slt index a13429e58413c..cd552d678961f 100644 --- a/test/sqllogictest/types.slt +++ b/test/sqllogictest/types.slt @@ -1056,3 +1056,26 @@ Source materialize.public.t1 Target cluster: quickstart EOF + +# regression test from a randomly generated query that triggered a bug in column type unions + +statement ok +CREATE TABLE "t-2"("c-0-uint8" uint8 not null, "c-1-mz_timestamp" mz_timestamp, "c-2-uint8" uint8, "c-3-uint4" uint4 not null, "c-4-numeric(38,3)" numeric(38, 3)) + +query T multiline +EXPLAIN OPTIMIZED PLAN AS VERBOSE TEXT FOR SELECT "t-2"."c-1-mz_timestamp", (((TIME '2:28:18.662888') + (INTERVAL '-42 years -29 days 63 seconds')) - (cast((TIME '15:42:7.524476') as interval))) - (TIME '22:19:14.412239'), "t-2"."c-4-numeric(38,3)", 92.17399200019122::numeric, list_cat(('{42, 30, 32, -8, -77}'::int list), (list_prepend(((~(1812081241::int)) # (bit_length(mz_version()))), (list_cat(((('{43, -79, -82, -61, -68, -4, -74, 33, 58, 89, 75, 26, 32, -87, 86, 84, -72, -19, 32, -74}'::int list) || (list_prepend((643833376::int), ('{-54, 76, -11, 93, -7}'::int list)))) || (list_cat((list_append((list_prepend(((cast((TIMESTAMP '65400-10-12') as date)) - (cast(((cast(((DATE '91277-6-19') - ((TIME '12:39:7.558847') - (TIME '17:9:41.957019'))) as date)) - (cast((cast((cast((((TIME '15:22:18.671002') - ((TIME '22:33:56.971004') - (((TIME '0:8:33.902669') + ((TIMESTAMP '99999-12-31') - (TIMESTAMP '32881-5-7'))) - ((cast(("t-2"."c-1-mz_timestamp") as timestamp)) - (TIMESTAMP '10707-6-4'))))) + (((DATE '69879-12-21') + (TIME '16:37:0.805740')) - (TIMESTAMP '99999-12-31'))) as interval)) as time)) as interval))) as date))), ('{88, -27, 35, -5, 7, -65, -36, 31, -62, -15, -97, -62, 29, -55, 38, 41, -20, -25, 86, -46}'::int list))), (bit_length('bar')))), (list_prepend(((DATE '97905-11-25') - (cast((TIMESTAMP '32380-9-2') as date))), (list_cat((list_append((list_append((list_cat(('{60}'::int list), ('{-51}'::int list))), ((DATE '38037-4-24') - (cast(((cast((((TIMESTAMP '50016-6-26') + (cast(((TIME '21:19:52.524361') - (INTERVAL '35 days 80 seconds')) as interval))) + ((TIME '7:21:3.901932') - ((cast((((DATE '99999-12-31') + (TIME '2:32:20.53752')) - (cast((mz_now()) as timestamp))) as time)) + ((TIMESTAMP '45837-10-25') - (TIMESTAMP '66847-1-26'))))) as date)) + ((cast((INTERVAL '1 MINUTE') as time)) - ((TIME '15:13:11.375670') - (INTERVAL '1 MINUTE')))) as date))))), (-24090::int))), (list_append(('{83, 82, -97, 99, -59}'::int list), (char_length((("t-2"."c-4-numeric(38,3)") + (("t-2"."c-4-numeric(38,3)") - (("t-2"."c-4-numeric(38,3)") + ("t-2"."c-4-numeric(38,3)")))) || (cast(('{"key0": "94"}'::jsonb) as text))))))))))))), (list_append((list_append(('{60}'::int list), ((-127::int) / (-54::int)))), (bit_length('hJ0o3JZ70D'))))))))), list_append(('{97}'::int list), (4::int)), mod((-664621.2288346319::float4), (1.0::float4)), '{key0 => -73}'::map[text=>text], AVG("t-2"."c-2-uint8") OVER (PARTITION BY "t-2"."c-0-uint8" ORDER BY "t-2"."c-3-uint4") FROM "t-2" WHERE True UNION ALL SELECT "t-2"."c-1-mz_timestamp", (((TIME '2:28:18.662888') + (INTERVAL '-42 years -29 days 63 seconds')) - (cast((TIME '15:42:7.524476') as interval))) - (TIME '22:19:14.412239'), "t-2"."c-4-numeric(38,3)", 92.17399200019122::numeric, list_cat(('{42, 30, 32, -8, -77}'::int list), (list_prepend(((~(1812081241::int)) # (bit_length(mz_version()))), (list_cat(((('{43, -79, -82, -61, -68, -4, -74, 33, 58, 89, 75, 26, 32, -87, 86, 84, -72, -19, 32, -74}'::int list) || (list_prepend((643833376::int), ('{-54, 76, -11, 93, -7}'::int list)))) || (list_cat((list_append((list_prepend(((cast((TIMESTAMP '65400-10-12') as date)) - (cast(((cast(((DATE '91277-6-19') - ((TIME '12:39:7.558847') - (TIME '17:9:41.957019'))) as date)) - (cast((cast((cast((((TIME '15:22:18.671002') - ((TIME '22:33:56.971004') - (((TIME '0:8:33.902669') + ((TIMESTAMP '99999-12-31') - (TIMESTAMP '32881-5-7'))) - ((cast(("t-2"."c-1-mz_timestamp") as timestamp)) - (TIMESTAMP '10707-6-4'))))) + (((DATE '69879-12-21') + (TIME '16:37:0.805740')) - (TIMESTAMP '99999-12-31'))) as interval)) as time)) as interval))) as date))), ('{88, -27, 35, -5, 7, -65, -36, 31, -62, -15, -97, -62, 29, -55, 38, 41, -20, -25, 86, -46}'::int list))), (bit_length('bar')))), (list_prepend(((DATE '97905-11-25') - (cast((TIMESTAMP '32380-9-2') as date))), (list_cat((list_append((list_append((list_cat(('{60}'::int list), ('{-51}'::int list))), ((DATE '38037-4-24') - (cast(((cast((((TIMESTAMP '50016-6-26') + (cast(((TIME '21:19:52.524361') - (INTERVAL '35 days 80 seconds')) as interval))) + ((TIME '7:21:3.901932') - ((cast((((DATE '99999-12-31') + (TIME '2:32:20.53752')) - (cast((mz_now()) as timestamp))) as time)) + ((TIMESTAMP '45837-10-25') - (TIMESTAMP '66847-1-26'))))) as date)) + ((cast((INTERVAL '1 MINUTE') as time)) - ((TIME '15:13:11.375670') - (INTERVAL '1 MINUTE')))) as date))))), (-24090::int))), (list_append(('{83, 82, -97, 99, -59}'::int list), (char_length((("t-2"."c-4-numeric(38,3)") + (("t-2"."c-4-numeric(38,3)") - (("t-2"."c-4-numeric(38,3)") + ("t-2"."c-4-numeric(38,3)")))) || (cast(('{"key0": "94"}'::jsonb) as text))))))))))))), (list_append((list_append(('{60}'::int list), ((-127::int) / (-54::int)))), (bit_length('hJ0o3JZ70D'))))))))), list_append(('{97}'::int list), (4::int)), mod((-664621.2288346319::float4), (1.0::float4)), '{key0 => -73}'::map[text=>text], AVG("t-2"."c-2-uint8") OVER (PARTITION BY "t-2"."c-0-uint8" ORDER BY "t-2"."c-3-uint4") FROM "t-2" WHERE False LIMIT 35; +---- +Explained Query: + Finish limit=35 output=[#0..=#8] + Project (#3, #9, #4, #10, #7, #11..=#13, #8) + Map (record_get[1](#1), record_get[1](#2), record_get[4](#2), record_get[0](#1), record_get[0](#5), ([42, 30, 32, -8, -77] || (-1812081290 || (([43, -79, -82, -61, -68, -4, -74, 33, 58, 89, 75, 26, 32, -87, 86, 84, -72, -19, 32, -74, 643833376, -54, 76, -11, 93, -7] || ((((+65400-10-12 - timestamp_to_date((+91277-06-19 - time_to_interval(interval_to_time(time_to_interval(((15:22:18.671002 - (22:33:56.971004 - (00:08:33.902669 - (mz_timestamp_to_timestamp(#3{"c-1-mz_timestamp"}) - +10707-06-04 00:00:00)))) + -264026719:22:59.19426))))))) || [88, -27, 35, -5, 7, -65, -36, 31, -62, -15, -97, -62, 29, -55, 38, 41, -20, -25, 86, -46]) || 24) || (23932598 || ([60, -51, -4375302, -24090] || ([83, 82, -97, 99, -59] || char_length((numeric_to_text((#4{"c-4-numeric(38,3)"} + (#4{"c-4-numeric(38,3)"} - (#4{"c-4-numeric(38,3)"} + #4{"c-4-numeric(38,3)"})))) || "{\"key0\":\"94\"}"))))))) || [60, 2, 80]))), (record_get[1](#5) / bigint_to_numeric(case when (#6 = 0) then null else #6 end)), -11:32:00.273827, 92.17399200019122, [97, 4], -0.25, {key0: "-73"}) + FlatMap unnest_list(#0) + Project (#1) + Reduce group_by=[#0] aggregates=[fused_window_agg(row(row(row(#0, #1, #2, #3, #4), row(#2{"c-2-uint8"}, #2{"c-2-uint8"})), #3{"c-3-uint4"}))] + ReadStorage materialize.public.t-2 + +Source materialize.public.t-2 + +Target cluster: quickstart + +EOF