Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/adapter/src/coord/sequencer/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5328,6 +5328,7 @@ impl Coordinator {
OptimizerNoticeKind::IndexKeyEmpty => {
system_vars.enable_notices_for_index_empty_key()
}
OptimizerNoticeKind::CrossJoin => system_vars.enable_notices_for_cross_joins(),
};
if notice_enabled {
// We don't need to redact the notice parts because
Expand Down
6 changes: 6 additions & 0 deletions src/sql/src/session/vars/definitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1960,6 +1960,12 @@ feature_flags!(
default: true,
enable_for_item_parsing: true,
},
{
name: enable_notices_for_cross_joins,
desc: "emitting notices for cross joins (doesn't affect EXPLAIN)",
default: false,
enable_for_item_parsing: false,
},
{
name: enable_alter_swap,
desc: "the ALTER SWAP feature for objects",
Expand Down
48 changes: 48 additions & 0 deletions src/transform/src/dataflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use proptest_derive::Arbitrary;
use serde::{Deserialize, Serialize};

use crate::monotonic::MonotonicFlag;
use crate::notice::CrossJoin;
use crate::notice::RawOptimizerNotice;
use crate::{IndexOracle, Optimizer, TransformCtx, TransformError};

Expand Down Expand Up @@ -105,6 +106,8 @@ pub fn optimize_dataflow(

mz_repr::explain::trace_plan(dataflow);

gather_notices(dataflow, transform_ctx.df_meta);

Ok(())
}

Expand Down Expand Up @@ -1231,6 +1234,51 @@ impl IndexUsageContext {
}
}

/// Gathers optimization notices by traversing the MIR plan.
/// Note that some notices are collected elsewhere, e.g. inside `Transform`s or sequencing.
#[mz_ore::instrument(
target = "optimizer",
level = "debug",
fields(path.segment = "index_imports")
)]
fn gather_notices(dataflow: &DataflowDesc, dataflow_metainfo: &mut DataflowMetainfo) {
for build_desc in dataflow.objects_to_build.iter() {
// Gather `CrossJoin` notices.
// Some system objects also have cross joins. We don't want them to create noise.
if !build_desc.id.is_system() {
build_desc.plan.visit_pre(&mut |expr: &MirRelationExpr| {
match expr {
MirRelationExpr::Join { implementation, .. } => {
match implementation {
JoinImplementation::Differential(_start, order) => {
for (_, key, _) in order {
if key.is_empty() {
dataflow_metainfo.push_optimizer_notice_dedup(CrossJoin)
}
}
}
JoinImplementation::DeltaQuery(orders) => {
for order in orders {
for (_, key, _) in order {
if key.is_empty() {
dataflow_metainfo.push_optimizer_notice_dedup(CrossJoin)
}
}
}
}
JoinImplementation::IndexedFilter(..) => {
// no cross joins
}
JoinImplementation::Unimplemented => {}
}
}
_ => {}
}
});
}
}
}

/// Extra information about the dataflow. This is not going to be shipped, but has to be processed
/// in other ways, e.g., showing notices to the user, or saving meta-information to the catalog.
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Arbitrary)]
Expand Down
3 changes: 3 additions & 0 deletions src/transform/src/notice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
//! the [`RawOptimizerNotice`] enum and other boilerplate code.

// Modules (one for each notice type).
mod cross_join;
mod index_already_exists;
mod index_key_empty;
mod index_too_wide_for_literal_constraints;

pub use cross_join::CrossJoin;
pub use index_already_exists::IndexAlreadyExists;
pub use index_key_empty::IndexKeyEmpty;
pub use index_too_wide_for_literal_constraints::IndexTooWideForLiteralConstraints;
Expand Down Expand Up @@ -359,6 +361,7 @@ raw_optimizer_notices![
IndexAlreadyExists => "An identical index already exists",
IndexTooWideForLiteralConstraints => "Index too wide for literal constraints",
IndexKeyEmpty => "Empty index key",
CrossJoin => "Cross join",
];

impl RawOptimizerNotice {
Expand Down
79 changes: 79 additions & 0 deletions src/transform/src/notice/cross_join.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright Materialize, Inc. and contributors. All rights reserved.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0.

//! Hosts [`CrossJoin`].

use std::collections::BTreeSet;
use std::fmt;

use mz_repr::GlobalId;
use mz_repr::explain::ExprHumanizer;

use crate::notice::{ActionKind, OptimizerNoticeApi};

/// A plan involves a cross join.
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct CrossJoin;

impl OptimizerNoticeApi for CrossJoin {
fn dependencies(&self) -> BTreeSet<GlobalId> {
BTreeSet::new()
}

fn fmt_message(
&self,
f: &mut fmt::Formatter<'_>,
_humanizer: &dyn ExprHumanizer,
_redacted: bool,
) -> fmt::Result {
write!(
f,
"Cross join. \
The join will be completely skewed to one worker thread, \
which can lead to performance problems if an input relation to a cross join is large."
)
}

fn fmt_hint(
&self,
f: &mut fmt::Formatter<'_>,
_humanizer: &dyn ExprHumanizer,
_redacted: bool,
) -> fmt::Result {
write!(
f,
"If you encounter slow queries or slow hydrations where only one CPU core is being \
utilized for extended periods of time, try to eliminate the cross join by changing \
your query."
)
//////////////// todo: what else to suggest?
// - hints for how to find in EXPLAIN where the cross join is
// - point to https://materialize.com/docs/transform-data/dataflow-troubleshooting/#is-work-distributed-equally-across-workers
// - maybe use EXPLAIN ANALYZE to see whether the join inputs are big?
// - make a list of common things that can lead to cross joins:
// - obvious stuff, like typing out CROSS JOIN, or not giving a join condition at all (neither in ON or WHERE)
// - OR in join conditions (see https://github.com/MaterializeInc/database-issues/issues/5841)
// - inequality joins
// - (one side of the equality involving both join inputs)
// Maybe put all this in a dedicated docs page, and just link to that here.
}

fn fmt_action(
&self,
_f: &mut fmt::Formatter<'_>,
_humanizer: &dyn ExprHumanizer,
_redacted: bool,
) -> fmt::Result {
Ok(())
}

fn action_kind(&self, _humanizer: &dyn ExprHumanizer) -> ActionKind {
ActionKind::None
}
}
192 changes: 192 additions & 0 deletions test/sqllogictest/transform/notice/cross_join.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# Copyright Materialize, Inc. and contributors. All rights reserved.
#
# Use of this software is governed by the Business Source License
# included in the LICENSE file at the root of this repository.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0.

simple conn=mz_system,user=mz_system
ALTER SYSTEM SET enable_mz_notices TO true
----
COMPLETE 0

# Disable rbac checks in order to select from mz_notices.
simple conn=mz_system,user=mz_system
ALTER SYSTEM SET enable_rbac_checks TO false
----
COMPLETE 0

statement ok
CREATE SCHEMA notices;

statement ok
SET SCHEMA = notices;

statement ok
CREATE TABLE t1(a int, b int);

statement ok
CREATE TABLE t2(x int, y int);

# EXPLAIN a peek with a cross join
query T multiline
EXPLAIN OPTIMIZED PLAN FOR
SELECT *
FROM t1, t2;
----
Explained Query:
CrossJoin type=differential
ArrangeBy keys=[[]]
ReadStorage materialize.notices.t1
ArrangeBy keys=[[]]
ReadStorage materialize.notices.t2

Source materialize.notices.t1
Source materialize.notices.t2

Target cluster: quickstart

Notices:
- Notice: Cross join. The join will be completely skewed to one worker thread, which can lead to performance problems if an input relation to a cross join is large.
Hint: If you encounter slow queries or slow hydrations where only one CPU core is being utilized for extended periods of time, try to eliminate the cross join by changing your query.

EOF

# Materialize view with a cross join
statement ok
CREATE MATERIALIZED VIEW mv1 AS
SELECT *
FROM t1, t2;

# Notice should be present in both EXPLAIN and in the internal table.

query T multiline
EXPLAIN OPTIMIZED PLAN FOR MATERIALIZED VIEW mv1
----
materialize.notices.mv1:
CrossJoin type=differential
ArrangeBy keys=[[]]
ReadStorage materialize.notices.t1
ArrangeBy keys=[[]]
ReadStorage materialize.notices.t2

Source materialize.notices.t1
Source materialize.notices.t2

Target cluster: quickstart

Notices:
- Notice: Cross join. The join will be completely skewed to one worker thread, which can lead to performance problems if an input relation to a cross join is large.
Hint: If you encounter slow queries or slow hydrations where only one CPU core is being utilized for extended periods of time, try to eliminate the cross join by changing your query.

EOF

query TTTTTTTT
SELECT
n.notice_type, n.message, n.redacted_message, n.hint, n.redacted_hint, n.action, n.redacted_action, n.action_type
FROM
mz_internal.mz_notices n JOIN
mz_catalog.mz_materialized_views mv ON(n.object_id = mv.id)
WHERE
mv.name = 'mv1';
----
Cross join
Cross join. The join will be completely skewed to one worker thread, which can lead to performance problems if an input relation to a cross join is large.
NULL
If you encounter slow queries or slow hydrations where only one CPU core is being utilized for extended periods of time, try to eliminate the cross join by changing your query.
NULL
NULL
NULL
NULL

# Index on view with a cross join

statement ok
CREATE VIEW v1 AS
SELECT *
FROM t1, t2;

statement ok
CREATE INDEX v1_ind_a_b ON v1(a, b);

# Notice should be present in both EXPLAIN and in the internal table.

query T multiline
EXPLAIN OPTIMIZED PLAN FOR INDEX v1_ind_a_b
----
materialize.notices.v1_ind_a_b:
ArrangeBy keys=[[#0{a}, #1{b}]]
ReadGlobalFromSameDataflow materialize.notices.v1

materialize.notices.v1:
CrossJoin type=differential
ArrangeBy keys=[[]]
ReadStorage materialize.notices.t1
ArrangeBy keys=[[]]
ReadStorage materialize.notices.t2

Source materialize.notices.t1
Source materialize.notices.t2

Target cluster: quickstart

Notices:
- Notice: Cross join. The join will be completely skewed to one worker thread, which can lead to performance problems if an input relation to a cross join is large.
Hint: If you encounter slow queries or slow hydrations where only one CPU core is being utilized for extended periods of time, try to eliminate the cross join by changing your query.

EOF

query TTTTTTTT
SELECT
n.notice_type, n.message, n.redacted_message, n.hint, n.redacted_hint, n.action, n.redacted_action, n.action_type
FROM
mz_internal.mz_notices n JOIN
mz_catalog.mz_indexes idx ON(n.object_id = idx.id)
WHERE
idx.name = 'v1_ind_a_b';
----
Cross join
Cross join. The join will be completely skewed to one worker thread, which can lead to performance problems if an input relation to a cross join is large.
NULL
If you encounter slow queries or slow hydrations where only one CPU core is being utilized for extended periods of time, try to eliminate the cross join by changing your query.
NULL
NULL
NULL
NULL

# Drop the objects with the cross joins. All the notices should disappear from the internal table.

statement ok
DROP VIEW v1 CASCADE;

statement ok
DROP MATERIALIZED VIEW mv1;

query I
SELECT count(*)
FROM mz_internal.mz_notices;
----
0

# Negative example, no notice should be generated.
statement ok
CREATE MATERIALIZED VIEW mv2 AS
SELECT *
FROM t1, t2
WHERE t1.a = t2.x;

query I
SELECT count(*)
FROM mz_internal.mz_notices;
----
0


# todo: more queries (positive and negative examples)


# This further tests `drop_plans_and_metainfos`
statement ok
DROP SCHEMA notices CASCADE;