From a3bcdc497190c96aec7109038e9e3fa4cbc1ab09 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Fri, 3 Oct 2025 12:49:17 +0100 Subject: [PATCH 01/13] Redesign sessions v3 for better partition pruning and indexing --- .../test/__snapshots__/test_schema.ambr | 52 +- .../test/test_raw_sessions_v3_model.py | 8 +- .../database/schema/session_replay_events.py | 7 +- posthog/hogql/database/schema/sessions_v3.py | 46 +- .../database/schema/test/test_sessions_v2.py | 26 + .../database/schema/test/test_sessions_v3.py | 54 ++ .../test_session_v3_where_clause_extractor.py | 634 ++++++++++++++++++ posthog/hogql/database/schema/util/uuid.py | 75 +++ .../schema/util/where_clause_extractor.py | 90 ++- .../hogql/functions/clickhouse/conversions.py | 1 + .../hogql/functions/clickhouse/datetime.py | 1 + posthog/hogql/helpers/timestamp_visitor.py | 7 + posthog/hogql/printer.py | 5 +- posthog/models/raw_sessions/sql_v3.py | 33 +- posthog/test/base.py | 4 +- 15 files changed, 949 insertions(+), 94 deletions(-) create mode 100644 posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py create mode 100644 posthog/hogql/database/schema/util/uuid.py diff --git a/posthog/clickhouse/test/__snapshots__/test_schema.ambr b/posthog/clickhouse/test/__snapshots__/test_schema.ambr index cb6827dc9a5ca..ffc1b95cb2a16 100644 --- a/posthog/clickhouse/test/__snapshots__/test_schema.ambr +++ b/posthog/clickhouse/test/__snapshots__/test_schema.ambr @@ -2625,7 +2625,12 @@ CREATE TABLE IF NOT EXISTS raw_sessions_v3 ( team_id Int64, - session_id_v7 UUID, + + -- Both UInt128 and UUID are imperfect choices here + -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ + -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + session_id_v7 UInt128, + session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -2711,7 +2716,7 @@ WITH parsed_events AS ( SELECT team_id, - `$session_id`, + `$session_id_uuid` AS session_id_v7, distinct_id AS _distinct_id, person_id, timestamp, @@ -2841,8 +2846,8 @@ ) SELECT - team_id, - toUUID(`$session_id`) as session_id_v7, + team_id, + session_id_v7, initializeAggregation('argMaxState', _distinct_id, timestamp) as distinct_id, initializeAggregation('argMaxState', person_id, timestamp) as person_id, @@ -3675,7 +3680,12 @@ CREATE TABLE IF NOT EXISTS sharded_raw_sessions_v3 ( team_id Int64, - session_id_v7 UUID, + + -- Both UInt128 and UUID are imperfect choices here + -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ + -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + session_id_v7 UInt128, + session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -3749,15 +3759,10 @@ flag_values AggregateFunction(groupUniqArrayMap, Map(String, String)) ) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/77f1df52-4b43-11e9-910f-b8ca3a9b9f3e_{shard}/posthog.raw_sessions_v3', '{replica}') - PARTITION BY toYYYYMM(UUIDv7ToDateTime(session_id_v7)) + PARTITION BY toYYYYMM(session_timestamp) ORDER BY ( team_id, - - -- sadly we need to include this as clickhouse UUIDs have insane ordering - -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ - -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope - UUIDv7ToDateTime(session_id_v7), - + session_timestamp, session_id_v7 ) @@ -4661,7 +4666,12 @@ CREATE TABLE IF NOT EXISTS writable_raw_sessions_v3 ( team_id Int64, - session_id_v7 UUID, + + -- Both UInt128 and UUID are imperfect choices here + -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ + -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + session_id_v7 UInt128, + session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -5926,7 +5936,12 @@ CREATE TABLE IF NOT EXISTS sharded_raw_sessions_v3 ( team_id Int64, - session_id_v7 UUID, + + -- Both UInt128 and UUID are imperfect choices here + -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ + -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + session_id_v7 UInt128, + session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -6000,15 +6015,10 @@ flag_values AggregateFunction(groupUniqArrayMap, Map(String, String)) ) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/77f1df52-4b43-11e9-910f-b8ca3a9b9f3e_{shard}/posthog.raw_sessions_v3', '{replica}') - PARTITION BY toYYYYMM(UUIDv7ToDateTime(session_id_v7)) + PARTITION BY toYYYYMM(session_timestamp) ORDER BY ( team_id, - - -- sadly we need to include this as clickhouse UUIDs have insane ordering - -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ - -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope - UUIDv7ToDateTime(session_id_v7), - + session_timestamp, session_id_v7 ) diff --git a/posthog/clickhouse/test/test_raw_sessions_v3_model.py b/posthog/clickhouse/test/test_raw_sessions_v3_model.py index 33a99e9265f5a..5f695f940141c 100644 --- a/posthog/clickhouse/test/test_raw_sessions_v3_model.py +++ b/posthog/clickhouse/test/test_raw_sessions_v3_model.py @@ -31,7 +31,7 @@ def select_by_session_id(self, session_id): * from raw_sessions_v3_v where - session_id_v7 = toUUID(%(session_id)s) AND + session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s """, { @@ -58,7 +58,7 @@ def test_it_creates_session_when_creating_event(self): team_id from raw_sessions_v3_v where - session_id_v7 = toUUID(%(session_id)s) AND + session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s """, { @@ -252,7 +252,7 @@ def test_select_from_sessions(self): max_timestamp, urls FROM raw_sessions_v3 - WHERE session_id_v7 = toUUID(%(session_id)s) AND team_id = %(team_id)s + WHERE session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s """, { "session_id": session_id, @@ -283,7 +283,7 @@ def test_select_from_sessions_mv(self): max_timestamp, urls FROM raw_sessions_v3_mv - WHERE session_id_v7 = toUUID(%(session_id)s) AND team_id = %(team_id)s + WHERE session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s """, { "session_id": session_id, diff --git a/posthog/hogql/database/schema/session_replay_events.py b/posthog/hogql/database/schema/session_replay_events.py index a4fd599c6fb61..46f7bf6bad61e 100644 --- a/posthog/hogql/database/schema/session_replay_events.py +++ b/posthog/hogql/database/schema/session_replay_events.py @@ -26,10 +26,7 @@ select_from_sessions_table_v2, session_id_to_session_id_v7_as_uint128_expr, ) -from posthog.hogql.database.schema.sessions_v3 import ( - select_from_sessions_table_v3, - session_id_to_session_id_v7_as_uuid_expr, -) +from posthog.hogql.database.schema.sessions_v3 import select_from_sessions_table_v3, session_id_to_uint128_as_uuid_expr from posthog.hogql.errors import ResolutionError @@ -91,7 +88,7 @@ def join_replay_table_to_sessions_table_v3( join_expr.constraint = ast.JoinConstraint( expr=ast.CompareOperation( op=ast.CompareOperationOp.Eq, - left=session_id_to_session_id_v7_as_uuid_expr(ast.Field(chain=[join_to_add.from_table, "session_id"])), + left=session_id_to_uint128_as_uuid_expr(ast.Field(chain=[join_to_add.from_table, "session_id"])), right=ast.Field(chain=[join_to_add.to_table, "session_id_v7"]), ), constraint_type="ON", diff --git a/posthog/hogql/database/schema/sessions_v3.py b/posthog/hogql/database/schema/sessions_v3.py index 018327dc4cb0e..b369d5112a48a 100644 --- a/posthog/hogql/database/schema/sessions_v3.py +++ b/posthog/hogql/database/schema/sessions_v3.py @@ -41,6 +41,7 @@ RAW_SESSIONS_FIELDS: dict[str, FieldOrTable] = { "team_id": IntegerDatabaseField(name="team_id", nullable=False), "session_id_v7": UUIDDatabaseField(name="session_id_v7", nullable=False), + "session_timestamp": DateTimeDatabaseField(name="session_timestamp", nullable=False), "distinct_id": DatabaseField(name="distinct_id", nullable=False), "person_id": DatabaseField(name="person_id", nullable=False), "min_timestamp": DateTimeDatabaseField(name="min_timestamp", nullable=False), @@ -89,6 +90,7 @@ "id": StringDatabaseField(name="id"), # TODO remove this, it's a duplicate of the correct session_id field below to get some trends working on a deadline "session_id": StringDatabaseField(name="session_id"), + "session_timestamp": DateTimeDatabaseField(name="session_timestamp", nullable=False), "distinct_id": StringDatabaseField(name="distinct_id"), "person_id": UUIDDatabaseField(name="person_id"), # timestamp @@ -184,7 +186,26 @@ def arg_max_merge_field(field_name: str) -> ast.Call: aggregate_fields: dict[str, ast.Expr] = { "session_id": ast.Call( name="toString", - args=[ast.Field(chain=[table_name, "session_id_v7"])], + args=[ + ast.Call( + name="reinterpretAsUUID", + args=[ + ast.Call( + name="bitOr", + args=[ + ast.Call( + name="bitShiftLeft", + args=[ast.Field(chain=[table_name, "session_id_v7"]), ast.Constant(value=64)], + ), + ast.Call( + name="bitShiftRight", + args=[ast.Field(chain=[table_name, "session_id_v7"]), ast.Constant(value=64)], + ), + ], + ) + ], + ) + ], ), # try not to use this, prefer to use session_id_v7 "distinct_id": arg_max_merge_field("distinct_id"), "person_id": arg_max_merge_field("person_id"), @@ -394,25 +415,8 @@ def session_id_to_session_id_v7_as_uuid_expr(session_id: ast.Expr) -> ast.Expr: return ast.Call(name="toUUID", args=[session_id]) -def uuid_to_uint128_expr(uuid: ast.Expr) -> ast.Expr: - return ast.Call( - name="reinterpretAsUUID", - args=[ - ast.Call( - name="bitOr", - args=[ - ast.Call( - name="bitShiftLeft", - args=[uuid, ast.Constant(value=64)], - ), - ast.Call( - name="bitShiftRight", - args=[uuid, ast.Constant(value=64)], - ), - ], - ) - ], - ) +def session_id_to_uint128_as_uuid_expr(session_id: ast.Expr) -> ast.Expr: + return ast.Call(name="_toUInt128", args=[(session_id_to_session_id_v7_as_uuid_expr(session_id))]) def join_events_table_to_sessions_table_v3( @@ -429,7 +433,7 @@ def join_events_table_to_sessions_table_v3( join_expr.constraint = ast.JoinConstraint( expr=ast.CompareOperation( op=ast.CompareOperationOp.Eq, - left=uuid_to_uint128_expr(ast.Field(chain=[join_to_add.from_table, "$session_id_uuid"])), + left=ast.Field(chain=[join_to_add.from_table, "$session_id_uuid"]), right=ast.Field(chain=[join_to_add.to_table, "session_id_v7"]), ), constraint_type="ON", diff --git a/posthog/hogql/database/schema/test/test_sessions_v2.py b/posthog/hogql/database/schema/test/test_sessions_v2.py index 3b320cf0d015b..719b3ec19122a 100644 --- a/posthog/hogql/database/schema/test/test_sessions_v2.py +++ b/posthog/hogql/database/schema/test/test_sessions_v2.py @@ -695,6 +695,32 @@ def test_can_use_v1_and_v2_fields(self): (0, 0, "https://example.com/pathname", "https://example.com/pathname", "/pathname", "/pathname") ] + def test_event_sessions_where(self): + session_id = str(uuid7()) + + _create_event( + event="$pageview", + team=self.team, + distinct_id="d1", + properties={ + "$current_url": "https://example.com/pathname", + "$pathname": "/pathname", + "$session_id": session_id, + }, + ) + + response = self.__execute( + parse_select( + """ + select + count() from events + where events.session.$entry_pathname = '/pathname' + """, + ), + ) + + assert response.results == [(1,)] + class TestGetLazySessionProperties(ClickhouseTestMixin, APIBaseTest): def test_all(self): diff --git a/posthog/hogql/database/schema/test/test_sessions_v3.py b/posthog/hogql/database/schema/test/test_sessions_v3.py index 5264d6060f802..d0c93f29aeda6 100644 --- a/posthog/hogql/database/schema/test/test_sessions_v3.py +++ b/posthog/hogql/database/schema/test/test_sessions_v3.py @@ -618,6 +618,60 @@ def test_can_use_v1_and_v2_fields(self): (0, 0, "https://example.com/pathname", "https://example.com/pathname", "/pathname", "/pathname") ] + def test_event_sessions_where_event_timestamp(self): + session_id = str(uuid7()) + + _create_event( + event="$pageview", + team=self.team, + distinct_id="d1", + properties={ + "$current_url": "https://example.com/pathname", + "$pathname": "/pathname", + "$session_id": session_id, + }, + ) + + response = self.__execute( + parse_select( + """ + select + session.id as session_id, + from events + where session_id = {session_id} AND timestamp >= '1970-01-01' + """, + placeholders={"session_id": ast.Constant(value=session_id)}, + ), + ) + + assert response.results == [(session_id,)] + + def test_event_sessions_where(self): + session_id = str(uuid7()) + + _create_event( + event="$pageview", + team=self.team, + distinct_id="d1", + properties={ + "$current_url": "https://example.com/pathname", + "$pathname": "/pathname", + "$session_id": session_id, + }, + ) + + response = self.__execute( + parse_select( + """ + select + count() from events + where events.session.$entry_pathname = '/pathname' + """, + ), + ) + + assert response.results == [(1,)] + class TestGetLazySessionProperties(ClickhouseTestMixin, APIBaseTest): def test_all(self): diff --git a/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py new file mode 100644 index 0000000000000..4843b44878a4b --- /dev/null +++ b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py @@ -0,0 +1,634 @@ +from typing import Optional, Union + +from posthog.test.base import APIBaseTest, ClickhouseTestMixin + +from inline_snapshot import snapshot + +from posthog.schema import SessionTableVersion + +from posthog.hogql import ast +from posthog.hogql.context import HogQLContext +from posthog.hogql.database.schema.util.where_clause_extractor import SessionMinTimestampWhereClauseExtractorV3 +from posthog.hogql.modifiers import create_default_modifiers_for_team +from posthog.hogql.parser import parse_expr, parse_select +from posthog.hogql.printer import prepare_ast_for_printing, print_prepared_ast +from posthog.hogql.visitor import clone_expr + + +def f(s: Union[str, ast.Expr, None], placeholders: Optional[dict[str, ast.Expr]] = None) -> Union[ast.Expr, None]: + if s is None: + return None + if isinstance(s, str): + expr = parse_expr(s, placeholders=placeholders) + else: + expr = s + return clone_expr(expr, clear_types=True, clear_locations=True) + + +def parse( + s: str, + placeholders: Optional[dict[str, ast.Expr]] = None, +) -> ast.SelectQuery | ast.SelectSetQuery: + parsed = parse_select(s, placeholders=placeholders) + return parsed + + +class TestSessionWhereClauseExtractorV3(ClickhouseTestMixin, APIBaseTest): + @property + def inliner(self): + team = self.team + modifiers = create_default_modifiers_for_team(team) + modifiers.sessionTableVersion = SessionTableVersion.V3 + context = HogQLContext( + team_id=team.pk, + team=team, + enable_select_queries=True, + modifiers=modifiers, + ) + return SessionMinTimestampWhereClauseExtractorV3(context) + + def test_handles_select_with_no_where_claus(self): + inner_where = self.inliner.get_inner_where(parse("SELECT * FROM sessions")) + assert inner_where is None + + def test_handles_select_with_eq(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE $start_timestamp = '2021-01-01'"))) + expected = f( + "raw_sessions_v3.session_timestamp >= ('2021-01-01' - toIntervalDay(3)) AND raw_sessions_v3.session_timestamp <= ('2021-01-01' + toIntervalDay(3))" + ) + assert expected == actual + + def test_handles_select_with_eq_flipped(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE '2021-01-01' = $start_timestamp"))) + expected = f( + "raw_sessions_v3.session_timestamp >= ('2021-01-01' - toIntervalDay(3)) AND raw_sessions_v3.session_timestamp <= ('2021-01-01' + toIntervalDay(3))" + ) + assert expected == actual + + def test_handles_select_with_simple_gt(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE $start_timestamp > '2021-01-01'"))) + expected = f("raw_sessions_v3.session_timestamp >= ('2021-01-01' - toIntervalDay(3))") + assert expected == actual + + def test_handles_select_with_simple_gte(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE $start_timestamp >= '2021-01-01'"))) + expected = f("raw_sessions_v3.session_timestamp >= ('2021-01-01' - toIntervalDay(3))") + assert expected == actual + + def test_handles_select_with_simple_lt(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE $start_timestamp < '2021-01-01'"))) + expected = f("raw_sessions_v3.session_timestamp <= ('2021-01-01' + toIntervalDay(3))") + assert expected == actual + + def test_handles_select_with_simple_lte(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE $start_timestamp <= '2021-01-01'"))) + expected = f("raw_sessions_v3.session_timestamp <= ('2021-01-01' + toIntervalDay(3))") + assert expected == actual + + def test_select_with_placeholder(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE $start_timestamp > {timestamp}", + placeholders={"timestamp": ast.Constant(value="2021-01-01")}, + ) + ) + ) + expected = f("raw_sessions_v3.session_timestamp >= ('2021-01-01' - toIntervalDay(3))") + assert expected == actual + + def test_unrelated_equals(self): + actual = self.inliner.get_inner_where( + parse("SELECT * FROM sessions WHERE $initial_utm_campaign = $initial_utm_source") + ) + assert actual is None + + def test_timestamp_and(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE and($start_timestamp >= '2021-01-01', $start_timestamp <= '2021-01-03')" + ) + ) + ) + expected = f( + "raw_sessions_v3.session_timestamp >= ('2021-01-01' - toIntervalDay(3)) AND raw_sessions_v3.session_timestamp <= ('2021-01-03' + toIntervalDay(3))" + ) + assert expected == actual + + def test_timestamp_or(self): + actual = f( + self.inliner.get_inner_where( + parse("SELECT * FROM sessions WHERE and(min_timestamp <= '2021-01-01', min_timestamp >= '2021-01-03')") + ) + ) + expected = f( + "raw_sessions_v3.session_timestamp <= ('2021-01-01' + toIntervalDay(3)) AND raw_sessions_v3.session_timestamp >= ('2021-01-03' - toIntervalDay(3))" + ) + assert expected == actual + + def test_unrelated_function(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like('a', 'b')"))) + assert actual is None + + def test_timestamp_unrelated_function(self): + actual = f( + self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like(toString(min_timestamp), 'b')")) + ) + assert actual is None + + def test_timestamp_unrelated_function_timestamp(self): + actual = f( + self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like(toString(min_timestamp), 'b')")) + ) + assert actual is None + + def test_ambiguous_or(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE or($start_timestamp > '2021-01-03', like(toString($start_timestamp), 'b'))" + ) + ) + ) + assert actual is None + + def test_ambiguous_and(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE and($start_timestamp > '2021-01-03', like(toString($start_timestamp), 'b'))" + ) + ) + ) + assert actual == f("raw_sessions_v3.session_timestamp >= ('2021-01-03' - toIntervalDay(3))") + + def test_join(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM events JOIN sessions ON events.session_id = sessions.session_id WHERE $start_timestamp > '2021-01-03'" + ) + ) + ) + expected = f("raw_sessions_v3.session_timestamp >= ('2021-01-03' - toIntervalDay(3))") + assert expected == actual + + def test_join_using_events_timestamp_filter(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM events JOIN sessions ON events.session_id = sessions.session_id WHERE timestamp > '2021-01-03'" + ) + ) + ) + expected = f("raw_sessions_v3.session_timestamp >= ('2021-01-03' - toIntervalDay(3))") + assert expected == actual + + def test_minus(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE $start_timestamp >= today() - 2"))) + expected = f("raw_sessions_v3.session_timestamp >= ((today() - 2) - toIntervalDay(3))") + assert expected == actual + + def test_minus_function(self): + actual = f( + self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE $start_timestamp >= minus(today() , 2)")) + ) + expected = f("raw_sessions_v3.session_timestamp >= (minus(today(), 2) - toIntervalDay(3))") + assert expected == actual + + def test_less_function(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE less($start_timestamp, today())"))) + expected = f("raw_sessions_v3.session_timestamp <= (today() + toIntervalDay(3))") + assert expected == actual + + def test_less_function_second_arg(self): + actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE less(today(), $start_timestamp)"))) + expected = f("raw_sessions_v3.session_timestamp >= (today() - toIntervalDay(3))") + assert expected == actual + + def test_subquery_args(self): + actual = f( + self.inliner.get_inner_where( + parse("SELECT * FROM sessions WHERE true = (select false) and less(today(), min_timestamp)") + ) + ) + expected = f("raw_sessions_v3.session_timestamp >= (today() - toIntervalDay(3))") + assert expected == actual + + def test_real_example(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM events JOIN sessions ON events.session_id = sessions.session_id WHERE event = '$pageview' AND toTimeZone(timestamp, 'US/Pacific') >= toDateTime('2024-03-12 00:00:00', 'US/Pacific') AND toTimeZone(timestamp, 'US/Pacific') <= toDateTime('2024-03-19 23:59:59', 'US/Pacific')" + ) + ) + ) + expected = f( + "toTimeZone(raw_sessions_v3.session_timestamp, 'US/Pacific') >= (toDateTime('2024-03-12 00:00:00', 'US/Pacific') - toIntervalDay(3)) AND toTimeZone(raw_sessions_v3.session_timestamp, 'US/Pacific') <= (toDateTime('2024-03-19 23:59:59', 'US/Pacific') + toIntervalDay(3))" + ) + assert expected == actual + + def test_collapse_and(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE event = '$pageview' AND (TRUE AND (TRUE AND TRUE AND (timestamp >= '2024-03-12' AND TRUE)))" + ) + ) + ) + expected = f("raw_sessions_v3.session_timestamp >= ('2024-03-12' - toIntervalDay(3))") + assert expected == actual + + def test_select_query(self): + actual = f( + self.inliner.get_inner_where( + parse( + "SELECT * FROM sessions WHERE timestamp = (SELECT max(timestamp) FROM events WHERE event = '$pageview')" + ) + ) + ) + assert actual is None + + def test_breakdown_subquery(self): + actual = f( + self.inliner.get_inner_where( + parse( + f""" +SELECT + count(DISTINCT e.$session_id) AS total, + toStartOfDay(timestamp) AS day_start, + multiIf(and(greaterOrEquals(session.$session_duration, 2.0), less(session.$session_duration, 4.5)), '[2.0,4.5]', and(greaterOrEquals(session.$session_duration, 4.5), less(session.$session_duration, 27.0)), '[4.5,27.0]', and(greaterOrEquals(session.$session_duration, 27.0), less(session.$session_duration, 44.0)), '[27.0,44.0]', and(greaterOrEquals(session.$session_duration, 44.0), less(session.$session_duration, 48.0)), '[44.0,48.0]', and(greaterOrEquals(session.$session_duration, 48.0), less(session.$session_duration, 57.5)), '[48.0,57.5]', and(greaterOrEquals(session.$session_duration, 57.5), less(session.$session_duration, 61.0)), '[57.5,61.0]', and(greaterOrEquals(session.$session_duration, 61.0), less(session.$session_duration, 74.0)), '[61.0,74.0]', and(greaterOrEquals(session.$session_duration, 74.0), less(session.$session_duration, 90.0)), '[74.0,90.0]', and(greaterOrEquals(session.$session_duration, 90.0), less(session.$session_duration, 98.5)), '[90.0,98.5]', and(greaterOrEquals(session.$session_duration, 98.5), less(session.$session_duration, 167.01)), '[98.5,167.01]', '["",""]') AS breakdown_value + FROM + events AS e SAMPLE 1 + WHERE + and(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(toDateTime('2024-04-13 00:00:00')))), lessOrEquals(timestamp, assumeNotNull(toDateTime('2024-04-20 23:59:59'))), equals(event, '$pageview'), in(person_id, (SELECT + person_id + FROM + raw_cohort_people + WHERE + and(equals(cohort_id, 2), equals(version, 0))))) + GROUP BY + day_start, + breakdown_value + """ + ) + ) + ) + expected = f( + "raw_sessions_v3.session_timestamp >= (toStartOfDay(assumeNotNull(toDateTime('2024-04-13 00:00:00'))) - toIntervalDay(3)) AND raw_sessions_v3.session_timestamp <= (assumeNotNull(toDateTime('2024-04-20 23:59:59')) + toIntervalDay(3))" + ) + assert expected == actual + + def test_not_like(self): + # based on a bug here: https://posthog.slack.com/archives/C05LJK1N3CP/p1719916566421079 + where = ast.And( + exprs=[ + ast.CompareOperation( + left=ast.Field(chain=["event"]), + op=ast.CompareOperationOp.Eq, + right=ast.Constant(value="$pageview"), + ), + ast.CompareOperation( + left=ast.Field(chain=["timestamp"]), + op=ast.CompareOperationOp.GtEq, + right=ast.Constant(value="2024-03-12"), + ), + ast.And( + exprs=[ + ast.CompareOperation( + left=ast.Field(chain=["host"]), + op=ast.CompareOperationOp.NotILike, + right=ast.Constant(value="localhost:3000"), + ), + ast.CompareOperation( + left=ast.Field(chain=["host"]), + op=ast.CompareOperationOp.NotILike, + right=ast.Constant(value="localhost:3001"), + ), + ] + ), + ] + ) + select = ast.SelectQuery(select=[], where=where) + actual = f(self.inliner.get_inner_where(select)) + expected = f("raw_sessions_v3.session_timestamp >= ('2024-03-12' - toIntervalDay(3))") + assert expected == actual + + def test_point_query(self): + actual = f( + self.inliner.get_inner_where( + parse( + """ + SELECT * FROM sessions WHERE session_id = '0199a58b-fdf2-785c-b6e3-6ba32b2380cf' + """ + ) + ) + ) + expected = f( + "raw_sessions_v3.session_timestamp = fromUnixTimestamp64Milli(_toUInt64(bitShiftRight(_toUInt128(toUUID('0199a58b-fdf2-785c-b6e3-6ba32b2380cf')), 80)))" + ) + assert expected == actual + + +class TestSessionsV3QueriesHogQLToClickhouse(ClickhouseTestMixin, APIBaseTest): + def print_query(self, query: str) -> str: + team = self.team + modifiers = create_default_modifiers_for_team(team) + modifiers.sessionTableVersion = SessionTableVersion.V3 + context = HogQLContext( + team_id=team.pk, + team=team, + enable_select_queries=True, + modifiers=modifiers, + ) + prepared_ast = prepare_ast_for_printing(node=parse(query), context=context, dialect="clickhouse") + if prepared_ast is None: + return "" + pretty = print_prepared_ast(prepared_ast, context=context, dialect="clickhouse", pretty=True) + return pretty + + def test_select_with_timestamp(self): + actual = self.print_query("SELECT session_id FROM sessions WHERE $start_timestamp > '2021-01-01'") + assert self.generalize_sql(actual) == snapshot( + """\ +SELECT + sessions.session_id AS session_id +FROM + (SELECT + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + min(toTimeZone(raw_sessions_v3.min_timestamp, %(hogql_val_0)s)) AS `$start_timestamp`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM + raw_sessions_v3 + WHERE + and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(%(hogql_val_1)s, toIntervalDay(3)))) + GROUP BY + raw_sessions_v3.session_id_v7, + raw_sessions_v3.session_id_v7) AS sessions +WHERE + ifNull(greater(sessions.`$start_timestamp`, %(hogql_val_2)s), 0) +LIMIT 50000\ +""" + ) + + def test_join_with_events(self): + actual = self.print_query( + """ +SELECT + sessions.session_id, + uniq(uuid) as uniq_uuid +FROM events +JOIN sessions +ON events.$session_id = sessions.session_id +WHERE events.timestamp > '2021-01-01' +GROUP BY sessions.session_id +""" + ) + assert self.generalize_sql(actual) == snapshot( + """\ +SELECT + sessions.session_id AS session_id, + uniq(events.uuid) AS uniq_uuid +FROM + events + JOIN (SELECT + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM + raw_sessions_v3 + WHERE + and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(%(hogql_val_0)s, toIntervalDay(3)))) + GROUP BY + raw_sessions_v3.session_id_v7, + raw_sessions_v3.session_id_v7) AS sessions ON equals(events.`$session_id`, sessions.session_id) +WHERE + and(equals(events.team_id, ), greater(toTimeZone(events.timestamp, %(hogql_val_1)s), %(hogql_val_2)s)) +GROUP BY + sessions.session_id +LIMIT 50000\ +""" + ) + + def test_union(self): + actual = self.print_query( + """ +SELECT 0 as duration +UNION ALL +SELECT events.session.$session_duration as duration +FROM events +WHERE events.timestamp < today() + """ + ) + assert self.generalize_sql(actual) == snapshot( + """\ +SELECT + 0 AS duration +LIMIT 50000 +UNION ALL +SELECT + events__session.`$session_duration` AS duration +FROM + events + LEFT JOIN (SELECT + dateDiff(%(hogql_val_0)s, min(toTimeZone(raw_sessions_v3.min_timestamp, %(hogql_val_1)s)), max(toTimeZone(raw_sessions_v3.max_timestamp, %(hogql_val_2)s))) AS `$session_duration`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM + raw_sessions_v3 + WHERE + and(equals(raw_sessions_v3.team_id, ), lessOrEquals(raw_sessions_v3.session_timestamp, plus(today(), toIntervalDay(3)))) + GROUP BY + raw_sessions_v3.session_id_v7, + raw_sessions_v3.session_id_v7) AS events__session ON equals(events.`$session_id_uuid`, events__session.session_id_v7) +WHERE + and(equals(events.team_id, ), less(toTimeZone(events.timestamp, %(hogql_val_3)s), today())) +LIMIT 50000\ +""" + ) + + def test_session_breakdown(self): + actual = self.print_query( + """SELECT count(DISTINCT e."$session_id") AS total, + toStartOfDay(timestamp) AS day_start, + multiIf(and(greaterOrEquals(session."$session_duration", 2.0), + less(session."$session_duration", 4.5)), + '[2.0,4.5]', + and(greaterOrEquals(session."$session_duration", 4.5), + less(session."$session_duration", 27.0)), + '[4.5,27.0]', + and(greaterOrEquals(session."$session_duration", 27.0), + less(session."$session_duration", 44.0)), + '[27.0,44.0]', + and(greaterOrEquals(session."$session_duration", 44.0), + less(session."$session_duration", 48.0)), + '[44.0,48.0]', + and(greaterOrEquals(session."$session_duration", 48.0), + less(session."$session_duration", 57.5)), + '[48.0,57.5]', + and(greaterOrEquals(session."$session_duration", 57.5), + less(session."$session_duration", 61.0)), + '[57.5,61.0]', + and(greaterOrEquals(session."$session_duration", 61.0), + less(session."$session_duration", 74.0)), + '[61.0,74.0]', + and(greaterOrEquals(session."$session_duration", 74.0), + less(session."$session_duration", 90.0)), + '[74.0,90.0]', + and(greaterOrEquals(session."$session_duration", 90.0), + less(session."$session_duration", 98.5)), + '[90.0,98.5]', and(greaterOrEquals(session."$session_duration", 98.5), + less(session."$session_duration", 167.01)), '[98.5,167.01]', + '["",""]') AS breakdown_value +FROM events AS e SAMPLE 1 +WHERE and(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(toDateTime('2024-04-13 00:00:00')))), + lessOrEquals(timestamp, assumeNotNull(toDateTime('2024-04-20 23:59:59'))), + equals(event, '$pageview'), in(person_id, (SELECT person_id + FROM raw_cohort_people + WHERE and(equals(cohort_id, 2), equals(version, 0))))) +GROUP BY day_start, + breakdown_value""" + ) + assert self.generalize_sql(actual) == snapshot( + """\ +SELECT + count(DISTINCT e.`$session_id`) AS total, + toStartOfDay(toTimeZone(e.timestamp, %(hogql_val_7)s)) AS day_start, + multiIf(and(ifNull(greaterOrEquals(e__session.`$session_duration`, 2.0), 0), ifNull(less(e__session.`$session_duration`, 4.5), 0)), %(hogql_val_8)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 4.5), 0), ifNull(less(e__session.`$session_duration`, 27.0), 0)), %(hogql_val_9)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 27.0), 0), ifNull(less(e__session.`$session_duration`, 44.0), 0)), %(hogql_val_10)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 44.0), 0), ifNull(less(e__session.`$session_duration`, 48.0), 0)), %(hogql_val_11)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 48.0), 0), ifNull(less(e__session.`$session_duration`, 57.5), 0)), %(hogql_val_12)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 57.5), 0), ifNull(less(e__session.`$session_duration`, 61.0), 0)), %(hogql_val_13)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 61.0), 0), ifNull(less(e__session.`$session_duration`, 74.0), 0)), %(hogql_val_14)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 74.0), 0), ifNull(less(e__session.`$session_duration`, 90.0), 0)), %(hogql_val_15)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 90.0), 0), ifNull(less(e__session.`$session_duration`, 98.5), 0)), %(hogql_val_16)s, and(ifNull(greaterOrEquals(e__session.`$session_duration`, 98.5), 0), ifNull(less(e__session.`$session_duration`, 167.01), 0)), %(hogql_val_17)s, %(hogql_val_18)s) AS breakdown_value +FROM + events AS e SAMPLE 1 + LEFT OUTER JOIN (SELECT + argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM + person_distinct_id_overrides + WHERE + equals(person_distinct_id_overrides.team_id, ) + GROUP BY + person_distinct_id_overrides.distinct_id + HAVING + ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN (SELECT + dateDiff(%(hogql_val_0)s, min(toTimeZone(raw_sessions_v3.min_timestamp, %(hogql_val_1)s)), max(toTimeZone(raw_sessions_v3.max_timestamp, %(hogql_val_2)s))) AS `$session_duration`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM + raw_sessions_v3 + WHERE + and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(toStartOfDay(assumeNotNull(toDateTime(%(hogql_val_3)s, %(hogql_val_4)s))), toIntervalDay(3))), lessOrEquals(raw_sessions_v3.session_timestamp, plus(assumeNotNull(toDateTime(%(hogql_val_5)s, %(hogql_val_6)s)), toIntervalDay(3)))) + GROUP BY + raw_sessions_v3.session_id_v7, + raw_sessions_v3.session_id_v7) AS e__session ON equals(e.`$session_id_uuid`, e__session.session_id_v7) +WHERE + and(equals(e.team_id, ), and(greaterOrEquals(toTimeZone(e.timestamp, %(hogql_val_19)s), toStartOfDay(assumeNotNull(toDateTime(%(hogql_val_20)s, %(hogql_val_21)s)))), lessOrEquals(toTimeZone(e.timestamp, %(hogql_val_22)s), assumeNotNull(toDateTime(%(hogql_val_23)s, %(hogql_val_24)s))), equals(e.event, %(hogql_val_25)s), in(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), (SELECT + cohortpeople.person_id AS person_id + FROM + cohortpeople + WHERE + and(equals(cohortpeople.team_id, ), and(equals(cohortpeople.cohort_id, 2), equals(cohortpeople.version, 0))))))) +GROUP BY + day_start, + breakdown_value +LIMIT 50000\ +""" + ) + + def test_session_replay_query(self): + actual = self.print_query( + """ +SELECT + s.session_id, + min(s.min_first_timestamp) as start_time +FROM raw_session_replay_events s +WHERE s.session.$entry_pathname = '/home' AND min_first_timestamp >= '2021-01-01:12:34' AND min_first_timestamp < now() +GROUP BY session_id + """ + ) + assert self.generalize_sql(actual) == snapshot( + """\ +SELECT + s.session_id AS session_id, + min(toTimeZone(s.min_first_timestamp, %(hogql_val_3)s)) AS start_time +FROM + session_replay_events AS s + LEFT JOIN (SELECT + path(argMinMerge(raw_sessions_v3.entry_url)) AS `$entry_pathname`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM + raw_sessions_v3 + WHERE + and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(%(hogql_val_0)s, toIntervalDay(3))), lessOrEquals(raw_sessions_v3.session_timestamp, plus(now64(6, %(hogql_val_1)s), toIntervalDay(3)))) + GROUP BY + raw_sessions_v3.session_id_v7, + raw_sessions_v3.session_id_v7) AS s__session ON equals(toUInt128(accurateCastOrNull(s.session_id, %(hogql_val_2)s)), s__session.session_id_v7) +WHERE + and(equals(s.team_id, ), ifNull(equals(s__session.`$entry_pathname`, %(hogql_val_4)s), 0), greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s), %(hogql_val_6)s), less(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), now64(6, %(hogql_val_8)s))) +GROUP BY + s.session_id +LIMIT 50000\ +""" + ) + + def test_urls_in_sessions_in_timestamp_query(self): + actual = self.print_query( + """ + select + session_id, + `$urls`, + $start_timestamp +from sessions +where `$start_timestamp` >= now() - toIntervalDay(7) +""" + ) + assert self.generalize_sql(actual) == snapshot( + """\ +SELECT + sessions.session_id AS session_id, + sessions.`$urls` AS `$urls`, + sessions.`$start_timestamp` AS `$start_timestamp` +FROM + (SELECT + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + arrayDistinct(arrayFlatten(groupArray(raw_sessions_v3.urls))) AS `$urls`, + min(toTimeZone(raw_sessions_v3.min_timestamp, %(hogql_val_0)s)) AS `$start_timestamp`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM + raw_sessions_v3 + WHERE + and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(minus(now64(6, %(hogql_val_1)s), toIntervalDay(7)), toIntervalDay(3)))) + GROUP BY + raw_sessions_v3.session_id_v7, + raw_sessions_v3.session_id_v7) AS sessions +WHERE + ifNull(greaterOrEquals(sessions.`$start_timestamp`, minus(now64(6, %(hogql_val_2)s), toIntervalDay(7))), 0) +LIMIT 50000\ +""" + ) + + def test_point_query(self): + actual = self.print_query( + """ + select + session_id, + from sessions + where session_id == '01995624-6a63-7cc4-800c-f5a45d99fa9b' + """ + ) + assert self.generalize_sql(actual) == snapshot( + """\ +SELECT + sessions.session_id AS session_id +FROM + (SELECT + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM + raw_sessions_v3 + WHERE + and(equals(raw_sessions_v3.team_id, ), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull(%(hogql_val_0)s, %(hogql_val_1)s)), 80))))) + GROUP BY + raw_sessions_v3.session_id_v7, + raw_sessions_v3.session_id_v7) AS sessions +WHERE + ifNull(equals(sessions.session_id, %(hogql_val_2)s), 0) +LIMIT 50000""" + ) diff --git a/posthog/hogql/database/schema/util/uuid.py b/posthog/hogql/database/schema/util/uuid.py new file mode 100644 index 0000000000000..4c79b2f47d4ce --- /dev/null +++ b/posthog/hogql/database/schema/util/uuid.py @@ -0,0 +1,75 @@ +from posthog.hogql import ast + + +def uuid_string_expr_to_uuid_expr(uuid_expr: ast.Expr) -> ast.Expr: + return ast.Call(name="toUUID", args=[uuid_expr]) + + +def uuid_string_expr_to_uint128_expr(uuid_expr: ast.Expr) -> ast.Expr: + return ast.Call(name="_toUInt128", args=[(uuid_string_expr_to_uuid_expr(uuid_expr))]) + + +def uuid_expr_to_timestamp_expr(uuid_expr: ast.Expr) -> ast.Expr: + return ast.Call(name="UUIDv7ToDateTime", args=[uuid_expr]) + + +def uuid_uint128_expr_to_timestamp_expr_v2(uuid_expr: ast.Expr) -> ast.Expr: + # use this for compat with sessions v2's ORDER BY + return ast.Call( + name="fromUnixTimestamp", + args=[ + ast.Call( + name="intDiv", + args=[ + ast.Call( + name="_toUInt64", + args=[ + ast.Call( + name="bitShiftRight", + args=[uuid_expr, ast.Constant(value=80)], + ) + ], + ), + ast.Constant(value=1000), + ], + ) + ], + ) + + +def uuid_uint128_expr_to_timestamp_expr_v3(uuid_expr: ast.Expr) -> ast.Expr: + return ast.Call( + name="fromUnixTimestamp64Milli", + args=[ + ast.Call( + name="_toUInt64", + args=[ + ast.Call( + name="bitShiftRight", + args=[uuid_expr, ast.Constant(value=80)], + ) + ], + ), + ], + ) + + +def uuid_uint128_to_uuid_expr(uuid: ast.Expr) -> ast.Expr: + return ast.Call( + name="reinterpretAsUUID", + args=[ + ast.Call( + name="bitOr", + args=[ + ast.Call( + name="bitShiftLeft", + args=[uuid, ast.Constant(value=64)], + ), + ast.Call( + name="bitShiftRight", + args=[uuid, ast.Constant(value=64)], + ), + ], + ) + ], + ) diff --git a/posthog/hogql/database/schema/util/where_clause_extractor.py b/posthog/hogql/database/schema/util/where_clause_extractor.py index 4b717dcbc8943..29ed97d1efe63 100644 --- a/posthog/hogql/database/schema/util/where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/where_clause_extractor.py @@ -6,6 +6,10 @@ from posthog.hogql.ast import CompareOperationOp from posthog.hogql.context import HogQLContext from posthog.hogql.database.models import DatabaseField, LazyJoinToAdd, LazyTableToAdd +from posthog.hogql.database.schema.util.uuid import ( + uuid_uint128_expr_to_timestamp_expr_v2, + uuid_uint128_expr_to_timestamp_expr_v3, +) from posthog.hogql.errors import NotImplementedError, QueryError from posthog.hogql.functions.mapping import HOGQL_COMPARISON_MAPPING from posthog.hogql.helpers.timestamp_visitor import is_simple_timestamp_field_expression, is_time_or_interval_constant @@ -329,6 +333,26 @@ def handle_timestamp_comparison( right=node.left, ) ) + + if node.op == CompareOperationOp.Eq: + if is_left_constant and is_session_id_string_expr(node.right, self.context): + left_timestamp_expr = self.session_id_str_to_timestamp_expr(node.left) + if left_timestamp_expr is None: + return None + return ast.CompareOperation( + op=CompareOperationOp.Eq, left=left_timestamp_expr, right=self.timestamp_field + ) + elif is_right_constant and is_session_id_string_expr(node.left, self.context): + right_timestamp_expr = self.session_id_str_to_timestamp_expr(node.right) + if right_timestamp_expr is None: + return None + return ast.CompareOperation( + op=CompareOperationOp.Eq, left=self.timestamp_field, right=right_timestamp_expr + ) + + return None + + def session_id_str_to_timestamp_expr(self, session_id_str_expr: ast.Expr) -> Optional[ast.Expr]: return None @@ -338,36 +362,21 @@ class SessionMinTimestampWhereClauseExtractorV1(SessionMinTimestampWhereClauseEx class SessionMinTimestampWhereClauseExtractorV2(SessionMinTimestampWhereClauseExtractor): - timestamp_field = ast.Call( - name="fromUnixTimestamp", - args=[ - ast.Call( - name="intDiv", - args=[ - ast.Call( - name="_toUInt64", - args=[ - ast.Call( - name="bitShiftRight", - args=[ast.Field(chain=["raw_sessions", "session_id_v7"]), ast.Constant(value=80)], - ) - ], - ), - ast.Constant(value=1000), - ], - ) - ], - ) + timestamp_field = uuid_uint128_expr_to_timestamp_expr_v2(ast.Field(chain=["raw_sessions", "session_id_v7"])) time_buffer = ast.Call(name="toIntervalDay", args=[ast.Constant(value=SESSION_BUFFER_DAYS)]) class SessionMinTimestampWhereClauseExtractorV3(SessionMinTimestampWhereClauseExtractor): - timestamp_field = ast.Call( - name="UUIDv7ToDateTime", - args=[ast.Field(chain=["raw_sessions_v3", "session_id_v7"])], - ) + timestamp_field = ast.Field(chain=["raw_sessions_v3", "session_timestamp"]) time_buffer = ast.Call(name="toIntervalDay", args=[ast.Constant(value=SESSION_BUFFER_DAYS)]) + def session_id_str_to_timestamp_expr(self, session_id_str_expr: ast.Expr) -> Optional[ast.Expr]: + # this is a roundabout way of doing it, but we want to match the logic in the clickhouse table definition + timestamp_expr = uuid_uint128_expr_to_timestamp_expr_v3( + ast.Call(name="_toUInt128", args=[ast.Call(name="toUUID", args=[session_id_str_expr])]) + ) + return timestamp_expr + def has_tombstone(expr: ast.Expr, tombstone_string: str) -> bool: visitor = HasTombstoneVisitor(tombstone_string) @@ -446,6 +455,39 @@ def visit_alias(self, node: ast.Alias) -> ast.Expr: return self.visit(node.expr) +def is_session_id_string_expr(node: ast.Expr, context: HogQLContext) -> bool: + if isinstance(node, ast.Field): + from posthog.hogql.database.schema.events import EventsTable + from posthog.hogql.database.schema.session_replay_events import RawSessionReplayEventsTable + from posthog.hogql.database.schema.sessions_v3 import SessionsTableV3 + + if node.type and isinstance(node.type, ast.FieldType): + resolved_field = node.type.resolve_database_field(context) + table_type = node.type.resolve_table_type(context) + if isinstance(table_type, ast.TableAliasType): + table_type = table_type.table_type + if isinstance(table_type, ast.LazyJoinType): + table = table_type.lazy_join.join_table + else: + table = table_type.table + if resolved_field and isinstance(resolved_field, DatabaseField): + if ( + (isinstance(table, EventsTable) and resolved_field.name == "$session_id") + or (isinstance(table, SessionsTableV3) and resolved_field.name in ("session_id")) + or (isinstance(table, RawSessionReplayEventsTable) and resolved_field.name == "min_first_timestamp") + ): + return True + # no type information, so just use the name of the field + if node.chain[-1] in [ + "session_id", + "$session_id", + ]: + return True + if isinstance(node, ast.Alias): + return is_session_id_string_expr(node.expr, context) + return False + + def flatten_ands(exprs): flattened = [] for expr in exprs: diff --git a/posthog/hogql/functions/clickhouse/conversions.py b/posthog/hogql/functions/clickhouse/conversions.py index e800fe99c0e37..d87ecd00fbff2 100644 --- a/posthog/hogql/functions/clickhouse/conversions.py +++ b/posthog/hogql/functions/clickhouse/conversions.py @@ -85,6 +85,7 @@ "parseDateTimeBestEffort": HogQLFunctionMeta("parseDateTime64BestEffortOrNull", 1, 2, tz_aware=True), "toTypeName": HogQLFunctionMeta("toTypeName", 1, 1), "cityHash64": HogQLFunctionMeta("cityHash64", 1, 1), + "UUIDv7ToDateTime": HogQLFunctionMeta("UUIDv7ToDateTime", 1, 1, tz_aware=True), } # Date conversion functions (that overlap with type conversions) diff --git a/posthog/hogql/functions/clickhouse/datetime.py b/posthog/hogql/functions/clickhouse/datetime.py index 6a2613c7ef4da..09a5f458cac88 100644 --- a/posthog/hogql/functions/clickhouse/datetime.py +++ b/posthog/hogql/functions/clickhouse/datetime.py @@ -18,6 +18,7 @@ "toSecond": HogQLFunctionMeta("toSecond", 1, 1), "toUnixTimestamp": HogQLFunctionMeta("toUnixTimestamp", 1, 2), "toUnixTimestamp64Milli": HogQLFunctionMeta("toUnixTimestamp64Milli", 1, 1), + "fromUnixTimestamp64Milli": HogQLFunctionMeta("fromUnixTimestamp64Milli", 1, 1), "toStartOfInterval": HogQLFunctionMeta( "toStartOfInterval", 2, diff --git a/posthog/hogql/helpers/timestamp_visitor.py b/posthog/hogql/helpers/timestamp_visitor.py index 7fecface842d5..8173c935c4324 100644 --- a/posthog/hogql/helpers/timestamp_visitor.py +++ b/posthog/hogql/helpers/timestamp_visitor.py @@ -105,6 +105,7 @@ def visit_alias(self, node: ast.Alias) -> bool: from posthog.hogql.database.schema.session_replay_events import RawSessionReplayEventsTable from posthog.hogql.database.schema.sessions_v1 import SessionsTableV1 from posthog.hogql.database.schema.sessions_v2 import SessionsTableV2 + from posthog.hogql.database.schema.sessions_v3 import SessionsTableV3 if node.type and isinstance(node.type, ast.FieldAliasType): try: @@ -134,6 +135,12 @@ def visit_alias(self, node: ast.Alias) -> bool: # we guarantee that a session is < 24 hours, so with bufferDays being 3 above, we can use $end_timestamp too and resolved_field.name in ("$start_timestamp", "$end_timestamp") ) + or ( + isinstance(table_type, ast.LazyTableType) + and isinstance(table_type.table, SessionsTableV3) + # we guarantee that a session is < 24 hours, so with bufferDays being 3 above, we can use $end_timestamp too + and resolved_field.name in ("$start_timestamp", "$end_timestamp") + ) or ( isinstance(table_type, ast.TableType) and isinstance(table_type.table, RawSessionReplayEventsTable) diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 05bbf3dd3985c..848a286211b18 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -825,9 +825,10 @@ def visit_compare_operation(self, node: ast.CompareOperation): ): not_nullable = True hack_sessions_timestamp = ( - "fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000))" + "fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000))", + "raw_sessions_v3.session_timestamp", ) - if hack_sessions_timestamp == left or hack_sessions_timestamp == right: + if any(s == left for s in hack_sessions_timestamp) or any(s == right for s in hack_sessions_timestamp): not_nullable = True # :HACK: Prevent ifNull() wrapping for $ai_trace_id to allow bloom filter index usage diff --git a/posthog/models/raw_sessions/sql_v3.py b/posthog/models/raw_sessions/sql_v3.py index 0fb9a5dd35c96..3028a823d9cbb 100644 --- a/posthog/models/raw_sessions/sql_v3.py +++ b/posthog/models/raw_sessions/sql_v3.py @@ -14,7 +14,6 @@ don't need to consider this). Upgrades over v2: -* Uses the UUIDv7ToDateTime function in the ORDER BY clause, which was not available when we built v2 * Has a property map for storing lower-tier ad ids, making it easier to add new ad ids in the future * Stores presence of ad ids separately from the value, so e.g. channel type calculations only need to read 1 bit instead of a gclid string up to 100 chars * Parses JSON only once per event rather than once per column per event, saving CPU usage @@ -41,7 +40,7 @@ def TRUNCATE_RAW_SESSIONS_TABLE_SQL_V3(): return f"TRUNCATE TABLE IF EXISTS {SHARDED_RAW_SESSIONS_TABLE_V3()}" -def DROP_RAW_SESSION_SHARDED_TABLE_SQL_V3(): +def DROP_RAW_SESSION_TABLE_SQL_V3(): return f"DROP TABLE IF EXISTS {SHARDED_RAW_SESSIONS_TABLE_V3()}" @@ -67,7 +66,16 @@ def DROP_RAW_SESSION_VIEW_SQL_V3(): CREATE TABLE IF NOT EXISTS {table_name} ( team_id Int64, - session_id_v7 UUID, + + -- Both UInt128 and UUID are imperfect choices here + -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ + -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + -- right now choose UInt128 as that's the type of events.$session_id_uuid, but in the future we will probably want to switch everything to the new CH UUID type (when it's released) + session_id_v7 UInt128, + -- Ideally we would not need to store this separately, as the ID *is* the timestamp + -- Unfortunately for now, chaining clickhouse functions to extract the timestamp will break indexes / partition pruning, so do this workaround + -- again, when the new CH UUID type is released, we should try to switch to that and remove the separate timestamp column + session_timestamp DateTime64 MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -151,15 +159,10 @@ def RAW_SESSIONS_TABLE_SQL_V3(): return ( RAW_SESSIONS_TABLE_BASE_SQL_V3 + """ -PARTITION BY toYYYYMM(UUIDv7ToDateTime(session_id_v7)) +PARTITION BY toYYYYMM(session_timestamp) ORDER BY ( team_id, - - -- sadly we need to include this as clickhouse UUIDs have insane ordering - -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ - -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope - UUIDv7ToDateTime(session_id_v7), - + session_timestamp, session_id_v7 ) """ @@ -255,7 +258,7 @@ def RAW_SESSION_TABLE_MV_SELECT_SQL_V3(where="TRUE"): WITH parsed_events AS ( SELECT team_id, - `$session_id`, + `$session_id_uuid` AS session_id_v7, distinct_id AS _distinct_id, person_id, timestamp, @@ -273,8 +276,8 @@ def RAW_SESSION_TABLE_MV_SELECT_SQL_V3(where="TRUE"): ) SELECT - team_id, - toUUID(`$session_id`) as session_id_v7, + team_id, + session_id_v7, initializeAggregation('argMaxState', _distinct_id, timestamp) as distinct_id, initializeAggregation('argMaxState', person_id, timestamp) as person_id, @@ -421,7 +424,7 @@ def DISTRIBUTED_RAW_SESSIONS_TABLE_SQL_V3(): CREATE OR REPLACE VIEW {TABLE_BASE_NAME_V3}_v AS SELECT session_id_v7, - UUIDv7ToDateTime(session_id_v7) as session_timestamp, + session_timestamp, team_id, argMaxMerge(distinct_id) as distinct_id, @@ -484,7 +487,7 @@ def DISTRIBUTED_RAW_SESSIONS_TABLE_SQL_V3(): -- flags groupUniqArrayMapMerge(flag_values) as flag_values FROM {settings.CLICKHOUSE_DATABASE}.{DISTRIBUTED_RAW_SESSIONS_TABLE_V3()} -GROUP BY session_id_v7, team_id +GROUP BY session_id_v7, session_timestamp, team_id """ ) diff --git a/posthog/test/base.py b/posthog/test/base.py index 2fc4e7cc6eba9..fbb08d9c03f02 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -116,7 +116,7 @@ DISTRIBUTED_RAW_SESSIONS_TABLE_SQL_V3, DROP_RAW_SESSION_DISTRIBUTED_TABLE_SQL_V3, DROP_RAW_SESSION_MATERIALIZED_VIEW_SQL_V3, - DROP_RAW_SESSION_SHARDED_TABLE_SQL_V3, + DROP_RAW_SESSION_TABLE_SQL_V3, DROP_RAW_SESSION_VIEW_SQL_V3, DROP_RAW_SESSION_WRITABLE_TABLE_SQL_V3, RAW_SESSIONS_CREATE_OR_REPLACE_VIEW_SQL_V3, @@ -1207,7 +1207,7 @@ def reset_clickhouse_database() -> None: DROP_PERSON_TABLE_SQL, DROP_PROPERTY_DEFINITIONS_TABLE_SQL(), DROP_RAW_SESSION_SHARDED_TABLE_SQL(), - DROP_RAW_SESSION_SHARDED_TABLE_SQL_V3(), + DROP_RAW_SESSION_TABLE_SQL_V3(), DROP_RAW_SESSION_DISTRIBUTED_TABLE_SQL(), DROP_RAW_SESSION_DISTRIBUTED_TABLE_SQL_V3(), DROP_RAW_SESSION_WRITABLE_TABLE_SQL(), From e193ec9abb01bedf09638978c6045136bf28f06d Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Fri, 3 Oct 2025 13:48:43 +0100 Subject: [PATCH 02/13] Don't use = to check whether element in set --- posthog/hogql/printer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 848a286211b18..8ebdf6ba78f56 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -828,7 +828,7 @@ def visit_compare_operation(self, node: ast.CompareOperation): "fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000))", "raw_sessions_v3.session_timestamp", ) - if any(s == left for s in hack_sessions_timestamp) or any(s == right for s in hack_sessions_timestamp): + if left in hack_sessions_timestamp or right in hack_sessions_timestamp: not_nullable = True # :HACK: Prevent ifNull() wrapping for $ai_trace_id to allow bloom filter index usage From 2f78f497217541266c6c602e21c3cf7c191aab7f Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Fri, 3 Oct 2025 13:49:03 +0100 Subject: [PATCH 03/13] Fix wrapping in toTimeZone --- posthog/hogql/database/schema/sessions_v3.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/posthog/hogql/database/schema/sessions_v3.py b/posthog/hogql/database/schema/sessions_v3.py index b369d5112a48a..ffe87dcf4f767 100644 --- a/posthog/hogql/database/schema/sessions_v3.py +++ b/posthog/hogql/database/schema/sessions_v3.py @@ -41,7 +41,9 @@ RAW_SESSIONS_FIELDS: dict[str, FieldOrTable] = { "team_id": IntegerDatabaseField(name="team_id", nullable=False), "session_id_v7": UUIDDatabaseField(name="session_id_v7", nullable=False), - "session_timestamp": DateTimeDatabaseField(name="session_timestamp", nullable=False), + "session_timestamp": DatabaseField( + name="session_timestamp", nullable=False + ), # not a DateTimeDatabaseField to avoid wrapping with toTimeZone "distinct_id": DatabaseField(name="distinct_id", nullable=False), "person_id": DatabaseField(name="person_id", nullable=False), "min_timestamp": DateTimeDatabaseField(name="min_timestamp", nullable=False), From d4247fd01963b4b2eb41dc1754c24a3af63b327d Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 3 Oct 2025 12:59:19 +0000 Subject: [PATCH 04/13] Update query snapshots --- .../test/__snapshots__/test_schema.ambr | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/posthog/clickhouse/test/__snapshots__/test_schema.ambr b/posthog/clickhouse/test/__snapshots__/test_schema.ambr index ffc1b95cb2a16..874aa4601235a 100644 --- a/posthog/clickhouse/test/__snapshots__/test_schema.ambr +++ b/posthog/clickhouse/test/__snapshots__/test_schema.ambr @@ -2629,8 +2629,12 @@ -- Both UInt128 and UUID are imperfect choices here -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + -- right now choose UInt128 as that's the type of events.$session_id_uuid, but in the future we will probably want to switch everything to the new CH UUID type (when it's released) session_id_v7 UInt128, - session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), + -- Ideally we would not need to store this separately, as the ID *is* the timestamp + -- Unfortunately for now, chaining clickhouse functions to extract the timestamp will break indexes / partition pruning, so do this workaround + -- again, when the new CH UUID type is released, we should try to switch to that and remove the separate timestamp column + session_timestamp DateTime64 MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -3684,8 +3688,12 @@ -- Both UInt128 and UUID are imperfect choices here -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + -- right now choose UInt128 as that's the type of events.$session_id_uuid, but in the future we will probably want to switch everything to the new CH UUID type (when it's released) session_id_v7 UInt128, - session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), + -- Ideally we would not need to store this separately, as the ID *is* the timestamp + -- Unfortunately for now, chaining clickhouse functions to extract the timestamp will break indexes / partition pruning, so do this workaround + -- again, when the new CH UUID type is released, we should try to switch to that and remove the separate timestamp column + session_timestamp DateTime64 MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -4670,8 +4678,12 @@ -- Both UInt128 and UUID are imperfect choices here -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + -- right now choose UInt128 as that's the type of events.$session_id_uuid, but in the future we will probably want to switch everything to the new CH UUID type (when it's released) session_id_v7 UInt128, - session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), + -- Ideally we would not need to store this separately, as the ID *is* the timestamp + -- Unfortunately for now, chaining clickhouse functions to extract the timestamp will break indexes / partition pruning, so do this workaround + -- again, when the new CH UUID type is released, we should try to switch to that and remove the separate timestamp column + session_timestamp DateTime64 MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session @@ -5940,8 +5952,12 @@ -- Both UInt128 and UUID are imperfect choices here -- see https://michcioperz.com/wiki/clickhouse-uuid-ordering/ -- but also see https://github.com/ClickHouse/ClickHouse/issues/77226 and hope + -- right now choose UInt128 as that's the type of events.$session_id_uuid, but in the future we will probably want to switch everything to the new CH UUID type (when it's released) session_id_v7 UInt128, - session_timestamp DateTime MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), + -- Ideally we would not need to store this separately, as the ID *is* the timestamp + -- Unfortunately for now, chaining clickhouse functions to extract the timestamp will break indexes / partition pruning, so do this workaround + -- again, when the new CH UUID type is released, we should try to switch to that and remove the separate timestamp column + session_timestamp DateTime64 MATERIALIZED fromUnixTimestamp64Milli(toUInt64(bitShiftRight(session_id_v7, 80))), -- ClickHouse will pick the latest value of distinct_id for the session -- this is fine since even if the distinct_id changes during a session From 2a6d305f2821ccece6343a950155f67f8021152c Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 3 Oct 2025 13:11:47 +0000 Subject: [PATCH 05/13] Update query snapshots --- .../backend/test/__snapshots__/test_hogql_fixer_ai.ambr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/products/data_warehouse/backend/test/__snapshots__/test_hogql_fixer_ai.ambr b/products/data_warehouse/backend/test/__snapshots__/test_hogql_fixer_ai.ambr index 9aa39605e418d..5e50ccb5a650d 100644 --- a/products/data_warehouse/backend/test/__snapshots__/test_hogql_fixer_ai.ambr +++ b/products/data_warehouse/backend/test/__snapshots__/test_hogql_fixer_ai.ambr @@ -51,7 +51,7 @@ HogQL defines what functions are available with most (but not all) having a 1:1 mapping to ClickHouse functions. These are the non-aggregated HogQL functions: ``` - ['plus', 'minus', 'multiply', 'divide', 'intDiv', 'intDivOrZero', 'modulo', 'moduloOrZero', 'positiveModulo', 'negate', 'abs', 'gcd', 'lcm', 'max2', 'min2', 'multiplyDecimal', 'divideDecimal', 'empty', 'notEmpty', 'length', 'reverse', 'array', 'range', 'arrayConcat', 'arrayElement', 'has', 'hasAll', 'hasAny', 'hasSubstr', 'indexOf', 'arrayCount', 'countEqual', 'arrayEnumerate', 'arrayEnumerateUniq', 'arrayPopBack', 'arrayPopFront', 'arrayPushBack', 'arrayPushFront', 'arrayResize', 'arraySlice', 'arraySort', 'arrayReverseSort', 'arrayUniq', 'arrayJoin', 'arrayDifference', 'arrayDistinct', 'arrayEnumerateDense', 'arrayIntersect', 'arrayReduce', 'arrayReverse', 'arrayFilter', 'arrayFlatten', 'arrayCompact', 'arrayZip', 'arrayAUC', 'arrayMap', 'arrayFill', 'arrayFold', 'arrayWithConstant', 'arraySplit', 'arrayReverseFill', 'arrayReverseSplit', 'arrayRotateLeft', 'arrayRotateRight', 'arrayExists', 'arrayAll', 'arrayFirst', 'arrayLast', 'arrayFirstIndex', 'arrayLastIndex', 'arrayMin', 'arrayMax', 'arraySum', 'arrayAvg', 'arrayCumSum', 'arrayCumSumNonNegative', 'arrayProduct', 'arrayStringConcat', 'generateSeries', 'hex', 'unhex', 'reinterpretAsUInt8', 'reinterpretAsUInt16', 'reinterpretAsUInt32', 'reinterpretAsUInt64', 'reinterpretAsUInt128', 'reinterpretAsUInt256', 'reinterpretAsInt8', 'reinterpretAsInt16', 'reinterpretAsInt32', 'reinterpretAsInt64', 'reinterpretAsInt128', 'reinterpretAsInt256', 'reinterpretAsFloat32', 'reinterpretAsFloat64', 'reinterpretAsUUID', 'toInt', '_toInt8', '_toInt16', '_toInt32', '_toInt64', '_toUInt64', '_toUInt128', 'toFloat', 'toFloatOrZero', 'toFloatOrDefault', 'toDecimal', '_toDate', 'toUUID', 'toString', 'toBool', 'toJSONString', 'parseDateTime', 'parseDateTimeBestEffort', 'toTypeName', 'cityHash64', 'toDate', 'to_date', 'toDateTime', 'toDateTime64', 'toDateTimeUS', 'isnull', 'isNotNull', 'coalesce', 'ifnull', 'nullif', 'assumeNotNull', 'toNullable', 'timeZoneOf', 'timeZoneOffset', 'toYear', 'toQuarter', 'toMonth', 'toDayOfYear', 'toDayOfMonth', 'toDayOfWeek', 'toHour', 'toMinute', 'toSecond', 'toUnixTimestamp', 'toUnixTimestamp64Milli', 'toStartOfInterval', 'toStartOfYear', 'toStartOfISOYear', 'toStartOfQuarter', 'toStartOfMonth', 'toLastDayOfMonth', 'toMonday', 'toStartOfWeek', 'toStartOfDay', 'toLastDayOfWeek', 'toStartOfHour', 'toStartOfMinute', 'toStartOfSecond', 'toStartOfFiveMinutes', 'toStartOfTenMinutes', 'toStartOfFifteenMinutes', 'toTime', 'toISOYear', 'toISOWeek', 'toWeek', 'toYearWeek', 'age', 'dateAdd', 'dateSub', 'date_bin', 'date_add', 'date_subtract', 'date_diff', 'dateDiff', 'timeStampAdd', 'timeStampSub', 'nowInBlock', 'rowNumberInBlock', 'rowNumberInAllBlocks', 'timeSlot', 'toYYYYMM', 'toYYYYMMDD', 'toYYYYMMDDhhmmss', 'addYears', 'addMonths', 'addWeeks', 'addDays', 'addHours', 'addMinutes', 'addSeconds', 'addQuarters', 'subtractYears', 'subtractMonths', 'subtractWeeks', 'subtractDays', 'subtractHours', 'subtractMinutes', 'subtractSeconds', 'subtractQuarters', 'timeSlots', 'formatDateTime', 'dateName', 'monthName', 'fromUnixTimestamp', 'toModifiedJulianDay', 'fromModifiedJulianDay', 'toIntervalSecond', 'toIntervalMinute', 'toIntervalHour', 'toIntervalDay', 'toIntervalWeek', 'toIntervalMonth', 'toIntervalQuarter', 'toIntervalYear', 'now', 'yesterday', 'current_timestamp', 'today', 'current_date', 'date_part', 'date_trunc', 'dateTrunc', 'to_timestamp', 'to_char', 'make_timestamp', 'make_date', 'make_interval', 'make_timestamptz', 'timezone', 'toTimeZone', 'greatCircleDistance', 'geoDistance', 'greatCircleAngle', 'pointInEllipses', 'pointInPolygon', 'geohashEncode', 'geohashDecode', 'geohashesInBox', 'h3IsValid', 'h3GetResolution', 'h3GetBaseCell', 'h3EdgeAngle', 'h3EdgeLengthM', 'h3EdgeLengthKm', 'geoToH3', 'h3ToGeo', 'h3ToGeoBoundary', 'h3kRing', 'h3HexAreaM2', 'h3HexAreaKm2', 'h3IndexesAreNeighbors', 'h3ToChildren', 'h3ToParent', 'h3ToString', 'stringToH3', 'h3IsResClassIII', 'h3IsPentagon', 'h3GetFaces', 'h3CellAreaM2', 'h3CellAreaRads2', 'h3ToCenterChild', 'h3ExactEdgeLengthM', 'h3ExactEdgeLengthKm', 'h3ExactEdgeLengthRads', 'h3NumHexagons', 'h3PointDistM', 'h3PointDistKm', 'h3PointDistRads', 'h3GetRes0Indexes', 'h3GetPentagonIndexes', 'h3Line', 'h3Distance', 'h3HexRing', 'h3GetUnidirectionalEdge', 'h3UnidirectionalEdgeIsValid', 'h3GetOriginIndexFromUnidirectionalEdge', 'h3GetDestinationIndexFromUnidirectionalEdge', 'h3GetIndexesFromUnidirectionalEdge', 'h3GetUnidirectionalEdgesFromHexagon', 'h3GetUnidirectionalEdgeBoundary', 'isValidJSON', 'JSONHas', 'JSONLength', 'JSONArrayLength', 'JSONType', 'JSONExtract', 'JSONExtractUInt', 'JSONExtractInt', 'JSONExtractFloat', 'JSONExtractBool', 'JSONExtractString', 'JSONExtractKeys', 'JSONExtractRaw', 'JSONExtractArrayRaw', 'JSONExtractKeysAndValues', 'JSONExtractKeysAndValuesRaw', 'JSON_VALUE', 'e', 'pi', 'exp', 'log', 'ln', 'exp2', 'log2', 'exp10', 'log10', 'sqrt', 'cbrt', 'erf', 'erfc', 'lgamma', 'tgamma', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'pow', 'power', 'intExp2', 'intExp10', 'cosh', 'acosh', 'sinh', 'asinh', 'atanh', 'atan2', 'hypot', 'log1p', 'sign', 'degrees', 'radians', 'factorial', 'width_bucket', 'floor', 'ceil', 'trunc', 'round', 'roundBankers', 'roundToExp2', 'roundDuration', 'roundAge', 'roundDown', 'left', 'right', 'lengthUTF8', 'leftPad', 'rightPad', 'leftPadUTF8', 'rightPadUTF8', 'lower', 'upper', 'lowerUTF8', 'upperUTF8', 'isValidUTF8', 'toValidUTF8', 'format', 'reverseUTF8', 'concat', 'substring', 'substringUTF8', 'appendTrailingCharIfAbsent', 'convertCharset', 'base58Encode', 'base58Decode', 'tryBase58Decode', 'base64Encode', 'base64Decode', 'tryBase64Decode', 'endsWith', 'startsWith', 'encodeXMLComponent', 'decodeXMLComponent', 'extractTextFromHTML', 'ascii', 'concatWithSeparator', 'position', 'positionCaseInsensitive', 'positionUTF8', 'positionCaseInsensitiveUTF8', 'multiSearchAllPositions', 'multiSearchAllPositionsUTF8', 'multiSearchFirstPosition', 'multiSearchFirstIndex', 'multiSearchAny', 'match', 'multiMatchAny', 'multiMatchAnyIndex', 'multiMatchAllIndices', 'multiFuzzyMatchAny', 'multiFuzzyMatchAnyIndex', 'multiFuzzyMatchAllIndices', 'extract', 'extractAll', 'extractAllGroupsHorizontal', 'extractAllGroupsVertical', 'like', 'ilike', 'notLike', 'notILike', 'ngramDistance', 'ngramSearch', 'countSubstrings', 'countSubstringsCaseInsensitive', 'countSubstringsCaseInsensitiveUTF8', 'countMatches', 'regexpExtract', 'replace', 'replaceAll', 'replaceOne', 'replaceRegexpAll', 'replaceRegexpOne', 'regexpQuoteMeta', 'translate', 'translateUTF8', 'splitByChar', 'splitByString', 'splitByRegexp', 'splitByWhitespace', 'splitByNonAlpha', 'alphaTokens', 'extractAllGroups', 'ngrams', 'tokens', 'repeat', 'initcap', 'lpad', 'rpad', 'split_part', 'ltrim', 'trimLeft', 'rtrim', 'trimRight', 'btrim', 'trim', 'equals', 'notEquals', 'less', 'greater', 'lessOrEquals', 'greaterOrEquals', 'in', 'notIn', 'and', 'or', 'xor', 'not', 'if', 'multiIf', 'map', 'mapFromArrays', 'mapAdd', 'mapSubtract', 'mapPopulateSeries', 'mapContains', 'mapKeys', 'mapValues', 'mapContainsKeyLike', 'mapExtractKeyLike', 'mapApply', 'mapFilter', 'mapUpdate', 'bitAnd', 'bitOr', 'bitXor', 'bitNot', 'bitShiftLeft', 'bitShiftRight', 'bitRotateLeft', 'bitRotateRight', 'bitSlice', 'bitTest', 'bitTestAll', 'bitTestAny', 'bitCount', 'bitHammingDistance', 'bitmapBuild', 'bitmapToArray', 'bitmapSubsetInRange', 'bitmapSubsetLimit', 'subBitmap', 'bitmapContains', 'bitmapHasAny', 'bitmapHasAll', 'bitmapCardinality', 'bitmapMin', 'bitmapMax', 'bitmapTransform', 'bitmapAnd', 'bitmapOr', 'bitmapXor', 'bitmapAndnot', 'bitmapAndCardinality', 'bitmapOrCardinality', 'bitmapXorCardinality', 'bitmapAndnotCardinality', 'protocol', 'domain', 'domainWithoutWWW', 'topLevelDomain', 'firstSignificantSubdomain', 'cutToFirstSignificantSubdomain', 'cutToFirstSignificantSubdomainWithWWW', 'port', 'path', 'pathFull', 'queryString', 'fragment', 'queryStringAndFragment', 'extractURLParameter', 'extractURLParameters', 'extractURLParameterNames', 'URLHierarchy', 'URLPathHierarchy', 'encodeURLComponent', 'decodeURLComponent', 'encodeURLFormComponent', 'decodeURLFormComponent', 'netloc', 'cutWWW', 'cutQueryString', 'cutFragment', 'cutQueryStringAndFragment', 'cutURLParameter', 'tuple', 'tupleElement', 'untuple', 'tupleHammingDistance', 'tupleToNameValuePairs', 'tuplePlus', 'tupleMinus', 'tupleMultiply', 'tupleDivide', 'tupleNegate', 'tupleMultiplyByNumber', 'tupleDivideByNumber', 'dotProduct', 'isFinite', 'isInfinite', 'ifNotFinite', 'isNaN', 'bar', 'transform', 'formatReadableDecimalSize', 'formatReadableSize', 'formatReadableQuantity', 'formatReadableTimeDelta', 'least', 'greatest', 'tumble', 'hop', 'tumbleStart', 'tumbleEnd', 'hopStart', 'hopEnd', 'L1Norm', 'L2Norm', 'LinfNorm', 'LpNorm', 'L1Distance', 'L2Distance', 'LinfDistance', 'LpDistance', 'L1Normalize', 'L2Normalize', 'LinfNormalize', 'LpNormalize', 'cosineDistance', 'rank', 'dense_rank', 'row_number', 'first_value', 'last_value', 'nth_value', 'lagInFrame', 'leadInFrame', 'lag', 'lead', 'getSurveyResponse', 'uniqueSurveySubmissionsFilter', 'languageCodeToName', 'aggregate_funnel', 'aggregate_funnel_array', 'aggregate_funnel_cohort', 'aggregate_funnel_test', 'aggregate_funnel_trends', 'aggregate_funnel_array_trends', 'aggregate_funnel_cohort_trends', 'aggregate_funnel_array_trends_test'] + ['plus', 'minus', 'multiply', 'divide', 'intDiv', 'intDivOrZero', 'modulo', 'moduloOrZero', 'positiveModulo', 'negate', 'abs', 'gcd', 'lcm', 'max2', 'min2', 'multiplyDecimal', 'divideDecimal', 'empty', 'notEmpty', 'length', 'reverse', 'array', 'range', 'arrayConcat', 'arrayElement', 'has', 'hasAll', 'hasAny', 'hasSubstr', 'indexOf', 'arrayCount', 'countEqual', 'arrayEnumerate', 'arrayEnumerateUniq', 'arrayPopBack', 'arrayPopFront', 'arrayPushBack', 'arrayPushFront', 'arrayResize', 'arraySlice', 'arraySort', 'arrayReverseSort', 'arrayUniq', 'arrayJoin', 'arrayDifference', 'arrayDistinct', 'arrayEnumerateDense', 'arrayIntersect', 'arrayReduce', 'arrayReverse', 'arrayFilter', 'arrayFlatten', 'arrayCompact', 'arrayZip', 'arrayAUC', 'arrayMap', 'arrayFill', 'arrayFold', 'arrayWithConstant', 'arraySplit', 'arrayReverseFill', 'arrayReverseSplit', 'arrayRotateLeft', 'arrayRotateRight', 'arrayExists', 'arrayAll', 'arrayFirst', 'arrayLast', 'arrayFirstIndex', 'arrayLastIndex', 'arrayMin', 'arrayMax', 'arraySum', 'arrayAvg', 'arrayCumSum', 'arrayCumSumNonNegative', 'arrayProduct', 'arrayStringConcat', 'generateSeries', 'hex', 'unhex', 'reinterpretAsUInt8', 'reinterpretAsUInt16', 'reinterpretAsUInt32', 'reinterpretAsUInt64', 'reinterpretAsUInt128', 'reinterpretAsUInt256', 'reinterpretAsInt8', 'reinterpretAsInt16', 'reinterpretAsInt32', 'reinterpretAsInt64', 'reinterpretAsInt128', 'reinterpretAsInt256', 'reinterpretAsFloat32', 'reinterpretAsFloat64', 'reinterpretAsUUID', 'toInt', '_toInt8', '_toInt16', '_toInt32', '_toInt64', '_toUInt64', '_toUInt128', 'toFloat', 'toFloatOrZero', 'toFloatOrDefault', 'toDecimal', '_toDate', 'toUUID', 'toString', 'toBool', 'toJSONString', 'parseDateTime', 'parseDateTimeBestEffort', 'toTypeName', 'cityHash64', 'UUIDv7ToDateTime', 'toDate', 'to_date', 'toDateTime', 'toDateTime64', 'toDateTimeUS', 'isnull', 'isNotNull', 'coalesce', 'ifnull', 'nullif', 'assumeNotNull', 'toNullable', 'timeZoneOf', 'timeZoneOffset', 'toYear', 'toQuarter', 'toMonth', 'toDayOfYear', 'toDayOfMonth', 'toDayOfWeek', 'toHour', 'toMinute', 'toSecond', 'toUnixTimestamp', 'toUnixTimestamp64Milli', 'fromUnixTimestamp64Milli', 'toStartOfInterval', 'toStartOfYear', 'toStartOfISOYear', 'toStartOfQuarter', 'toStartOfMonth', 'toLastDayOfMonth', 'toMonday', 'toStartOfWeek', 'toStartOfDay', 'toLastDayOfWeek', 'toStartOfHour', 'toStartOfMinute', 'toStartOfSecond', 'toStartOfFiveMinutes', 'toStartOfTenMinutes', 'toStartOfFifteenMinutes', 'toTime', 'toISOYear', 'toISOWeek', 'toWeek', 'toYearWeek', 'age', 'dateAdd', 'dateSub', 'date_bin', 'date_add', 'date_subtract', 'date_diff', 'dateDiff', 'timeStampAdd', 'timeStampSub', 'nowInBlock', 'rowNumberInBlock', 'rowNumberInAllBlocks', 'timeSlot', 'toYYYYMM', 'toYYYYMMDD', 'toYYYYMMDDhhmmss', 'addYears', 'addMonths', 'addWeeks', 'addDays', 'addHours', 'addMinutes', 'addSeconds', 'addQuarters', 'subtractYears', 'subtractMonths', 'subtractWeeks', 'subtractDays', 'subtractHours', 'subtractMinutes', 'subtractSeconds', 'subtractQuarters', 'timeSlots', 'formatDateTime', 'dateName', 'monthName', 'fromUnixTimestamp', 'toModifiedJulianDay', 'fromModifiedJulianDay', 'toIntervalSecond', 'toIntervalMinute', 'toIntervalHour', 'toIntervalDay', 'toIntervalWeek', 'toIntervalMonth', 'toIntervalQuarter', 'toIntervalYear', 'now', 'yesterday', 'current_timestamp', 'today', 'current_date', 'date_part', 'date_trunc', 'dateTrunc', 'to_timestamp', 'to_char', 'make_timestamp', 'make_date', 'make_interval', 'make_timestamptz', 'timezone', 'toTimeZone', 'greatCircleDistance', 'geoDistance', 'greatCircleAngle', 'pointInEllipses', 'pointInPolygon', 'geohashEncode', 'geohashDecode', 'geohashesInBox', 'h3IsValid', 'h3GetResolution', 'h3GetBaseCell', 'h3EdgeAngle', 'h3EdgeLengthM', 'h3EdgeLengthKm', 'geoToH3', 'h3ToGeo', 'h3ToGeoBoundary', 'h3kRing', 'h3HexAreaM2', 'h3HexAreaKm2', 'h3IndexesAreNeighbors', 'h3ToChildren', 'h3ToParent', 'h3ToString', 'stringToH3', 'h3IsResClassIII', 'h3IsPentagon', 'h3GetFaces', 'h3CellAreaM2', 'h3CellAreaRads2', 'h3ToCenterChild', 'h3ExactEdgeLengthM', 'h3ExactEdgeLengthKm', 'h3ExactEdgeLengthRads', 'h3NumHexagons', 'h3PointDistM', 'h3PointDistKm', 'h3PointDistRads', 'h3GetRes0Indexes', 'h3GetPentagonIndexes', 'h3Line', 'h3Distance', 'h3HexRing', 'h3GetUnidirectionalEdge', 'h3UnidirectionalEdgeIsValid', 'h3GetOriginIndexFromUnidirectionalEdge', 'h3GetDestinationIndexFromUnidirectionalEdge', 'h3GetIndexesFromUnidirectionalEdge', 'h3GetUnidirectionalEdgesFromHexagon', 'h3GetUnidirectionalEdgeBoundary', 'isValidJSON', 'JSONHas', 'JSONLength', 'JSONArrayLength', 'JSONType', 'JSONExtract', 'JSONExtractUInt', 'JSONExtractInt', 'JSONExtractFloat', 'JSONExtractBool', 'JSONExtractString', 'JSONExtractKeys', 'JSONExtractRaw', 'JSONExtractArrayRaw', 'JSONExtractKeysAndValues', 'JSONExtractKeysAndValuesRaw', 'JSON_VALUE', 'e', 'pi', 'exp', 'log', 'ln', 'exp2', 'log2', 'exp10', 'log10', 'sqrt', 'cbrt', 'erf', 'erfc', 'lgamma', 'tgamma', 'sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'pow', 'power', 'intExp2', 'intExp10', 'cosh', 'acosh', 'sinh', 'asinh', 'atanh', 'atan2', 'hypot', 'log1p', 'sign', 'degrees', 'radians', 'factorial', 'width_bucket', 'floor', 'ceil', 'trunc', 'round', 'roundBankers', 'roundToExp2', 'roundDuration', 'roundAge', 'roundDown', 'left', 'right', 'lengthUTF8', 'leftPad', 'rightPad', 'leftPadUTF8', 'rightPadUTF8', 'lower', 'upper', 'lowerUTF8', 'upperUTF8', 'isValidUTF8', 'toValidUTF8', 'format', 'reverseUTF8', 'concat', 'substring', 'substringUTF8', 'appendTrailingCharIfAbsent', 'convertCharset', 'base58Encode', 'base58Decode', 'tryBase58Decode', 'base64Encode', 'base64Decode', 'tryBase64Decode', 'endsWith', 'startsWith', 'encodeXMLComponent', 'decodeXMLComponent', 'extractTextFromHTML', 'ascii', 'concatWithSeparator', 'position', 'positionCaseInsensitive', 'positionUTF8', 'positionCaseInsensitiveUTF8', 'multiSearchAllPositions', 'multiSearchAllPositionsUTF8', 'multiSearchFirstPosition', 'multiSearchFirstIndex', 'multiSearchAny', 'match', 'multiMatchAny', 'multiMatchAnyIndex', 'multiMatchAllIndices', 'multiFuzzyMatchAny', 'multiFuzzyMatchAnyIndex', 'multiFuzzyMatchAllIndices', 'extract', 'extractAll', 'extractAllGroupsHorizontal', 'extractAllGroupsVertical', 'like', 'ilike', 'notLike', 'notILike', 'ngramDistance', 'ngramSearch', 'countSubstrings', 'countSubstringsCaseInsensitive', 'countSubstringsCaseInsensitiveUTF8', 'countMatches', 'regexpExtract', 'replace', 'replaceAll', 'replaceOne', 'replaceRegexpAll', 'replaceRegexpOne', 'regexpQuoteMeta', 'translate', 'translateUTF8', 'splitByChar', 'splitByString', 'splitByRegexp', 'splitByWhitespace', 'splitByNonAlpha', 'alphaTokens', 'extractAllGroups', 'ngrams', 'tokens', 'repeat', 'initcap', 'lpad', 'rpad', 'split_part', 'ltrim', 'trimLeft', 'rtrim', 'trimRight', 'btrim', 'trim', 'equals', 'notEquals', 'less', 'greater', 'lessOrEquals', 'greaterOrEquals', 'in', 'notIn', 'and', 'or', 'xor', 'not', 'if', 'multiIf', 'map', 'mapFromArrays', 'mapAdd', 'mapSubtract', 'mapPopulateSeries', 'mapContains', 'mapKeys', 'mapValues', 'mapContainsKeyLike', 'mapExtractKeyLike', 'mapApply', 'mapFilter', 'mapUpdate', 'bitAnd', 'bitOr', 'bitXor', 'bitNot', 'bitShiftLeft', 'bitShiftRight', 'bitRotateLeft', 'bitRotateRight', 'bitSlice', 'bitTest', 'bitTestAll', 'bitTestAny', 'bitCount', 'bitHammingDistance', 'bitmapBuild', 'bitmapToArray', 'bitmapSubsetInRange', 'bitmapSubsetLimit', 'subBitmap', 'bitmapContains', 'bitmapHasAny', 'bitmapHasAll', 'bitmapCardinality', 'bitmapMin', 'bitmapMax', 'bitmapTransform', 'bitmapAnd', 'bitmapOr', 'bitmapXor', 'bitmapAndnot', 'bitmapAndCardinality', 'bitmapOrCardinality', 'bitmapXorCardinality', 'bitmapAndnotCardinality', 'protocol', 'domain', 'domainWithoutWWW', 'topLevelDomain', 'firstSignificantSubdomain', 'cutToFirstSignificantSubdomain', 'cutToFirstSignificantSubdomainWithWWW', 'port', 'path', 'pathFull', 'queryString', 'fragment', 'queryStringAndFragment', 'extractURLParameter', 'extractURLParameters', 'extractURLParameterNames', 'URLHierarchy', 'URLPathHierarchy', 'encodeURLComponent', 'decodeURLComponent', 'encodeURLFormComponent', 'decodeURLFormComponent', 'netloc', 'cutWWW', 'cutQueryString', 'cutFragment', 'cutQueryStringAndFragment', 'cutURLParameter', 'tuple', 'tupleElement', 'untuple', 'tupleHammingDistance', 'tupleToNameValuePairs', 'tuplePlus', 'tupleMinus', 'tupleMultiply', 'tupleDivide', 'tupleNegate', 'tupleMultiplyByNumber', 'tupleDivideByNumber', 'dotProduct', 'isFinite', 'isInfinite', 'ifNotFinite', 'isNaN', 'bar', 'transform', 'formatReadableDecimalSize', 'formatReadableSize', 'formatReadableQuantity', 'formatReadableTimeDelta', 'least', 'greatest', 'tumble', 'hop', 'tumbleStart', 'tumbleEnd', 'hopStart', 'hopEnd', 'L1Norm', 'L2Norm', 'LinfNorm', 'LpNorm', 'L1Distance', 'L2Distance', 'LinfDistance', 'LpDistance', 'L1Normalize', 'L2Normalize', 'LinfNormalize', 'LpNormalize', 'cosineDistance', 'rank', 'dense_rank', 'row_number', 'first_value', 'last_value', 'nth_value', 'lagInFrame', 'leadInFrame', 'lag', 'lead', 'getSurveyResponse', 'uniqueSurveySubmissionsFilter', 'languageCodeToName', 'aggregate_funnel', 'aggregate_funnel_array', 'aggregate_funnel_cohort', 'aggregate_funnel_test', 'aggregate_funnel_trends', 'aggregate_funnel_array_trends', 'aggregate_funnel_cohort_trends', 'aggregate_funnel_array_trends_test'] ``` These are the aggregated HogQL functions: From 818d21b29db06a17407eae0e4263897f6d888fde Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Fri, 3 Oct 2025 16:00:40 +0100 Subject: [PATCH 06/13] Exclude session_timestamp from properties --- posthog/hogql/database/schema/sessions_v3.py | 1 + 1 file changed, 1 insertion(+) diff --git a/posthog/hogql/database/schema/sessions_v3.py b/posthog/hogql/database/schema/sessions_v3.py index ffe87dcf4f767..c3691403ee788 100644 --- a/posthog/hogql/database/schema/sessions_v3.py +++ b/posthog/hogql/database/schema/sessions_v3.py @@ -459,6 +459,7 @@ def get_lazy_session_table_properties_v3(search: Optional[str]): "$num_uniq_urls", "$page_screen_autocapture_count_up_to", "$entry_channel_type_properties", + "session_timestamp", # really people should be using $start_timestamp for most queries # aliases for people upgrading from v1 to v2/v3 "$exit_current_url", "$exit_pathname", From 2c71c4c7d6bd3d19f01e7fc5083d9f90b19d79af Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Fri, 3 Oct 2025 16:02:20 +0100 Subject: [PATCH 07/13] Remove redundant test --- .../util/test/test_session_v3_where_clause_extractor.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py index 4843b44878a4b..7c6b74f121b65 100644 --- a/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py @@ -131,12 +131,6 @@ def test_unrelated_function(self): actual = f(self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like('a', 'b')"))) assert actual is None - def test_timestamp_unrelated_function(self): - actual = f( - self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like(toString(min_timestamp), 'b')")) - ) - assert actual is None - def test_timestamp_unrelated_function_timestamp(self): actual = f( self.inliner.get_inner_where(parse("SELECT * FROM sessions WHERE like(toString(min_timestamp), 'b')")) From f555f8dd0c5f97401fa759acf1920dd93cbfce33 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Fri, 3 Oct 2025 16:04:20 +0100 Subject: [PATCH 08/13] Remove extra comma --- .../schema/util/test/test_session_v3_where_clause_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py index 7c6b74f121b65..a06bdc18eeb98 100644 --- a/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py @@ -602,7 +602,7 @@ def test_point_query(self): actual = self.print_query( """ select - session_id, + session_id from sessions where session_id == '01995624-6a63-7cc4-800c-f5a45d99fa9b' """ From 7085f112b026d1fff5ea7f14ec0e9cee2056352d Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Mon, 6 Oct 2025 16:11:06 +0100 Subject: [PATCH 09/13] Add snapshots to v3 session tests --- posthog/clickhouse/test/test_raw_sessions_v3_model.py | 9 ++++++++- posthog/hogql/database/schema/test/test_sessions_v3.py | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/posthog/clickhouse/test/test_raw_sessions_v3_model.py b/posthog/clickhouse/test/test_raw_sessions_v3_model.py index 5f695f940141c..55bdb77f5ae1e 100644 --- a/posthog/clickhouse/test/test_raw_sessions_v3_model.py +++ b/posthog/clickhouse/test/test_raw_sessions_v3_model.py @@ -1,6 +1,12 @@ import datetime -from posthog.test.base import BaseTest, ClickhouseTestMixin, _create_event, flush_persons_and_events +from posthog.test.base import ( + BaseTest, + ClickhouseTestMixin, + _create_event, + flush_persons_and_events, + snapshot_clickhouse_queries, +) from posthog.clickhouse.client import query_with_columns, sync_execute from posthog.models.raw_sessions.sql_v3 import RAW_SESSION_TABLE_BACKFILL_SQL_V3 @@ -22,6 +28,7 @@ def create_session_id(): return str(uuid7(random=session_id_counter)) +@snapshot_clickhouse_queries class TestRawSessionsModel(ClickhouseTestMixin, BaseTest): def select_by_session_id(self, session_id): flush_persons_and_events() diff --git a/posthog/hogql/database/schema/test/test_sessions_v3.py b/posthog/hogql/database/schema/test/test_sessions_v3.py index d0c93f29aeda6..501ccc960cf43 100644 --- a/posthog/hogql/database/schema/test/test_sessions_v3.py +++ b/posthog/hogql/database/schema/test/test_sessions_v3.py @@ -2,7 +2,13 @@ from time import time_ns import pytest -from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_event, + _create_person, + snapshot_clickhouse_queries, +) from posthog.schema import FilterLogicalOperator, HogQLQueryModifiers, SessionTableVersion @@ -18,6 +24,7 @@ from posthog.models.utils import uuid7 +@snapshot_clickhouse_queries class TestSessionsV3(ClickhouseTestMixin, APIBaseTest): def __execute( self, From aaa5b2b1e5a8e6bc29ea393816e3db8ff4108a88 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Mon, 6 Oct 2025 16:12:23 +0100 Subject: [PATCH 10/13] Fix duplicate session id in group by --- posthog/hogql/database/schema/sessions_v3.py | 2 +- .../util/test/test_session_v3_where_clause_extractor.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/posthog/hogql/database/schema/sessions_v3.py b/posthog/hogql/database/schema/sessions_v3.py index c3691403ee788..9b4613fc866f9 100644 --- a/posthog/hogql/database/schema/sessions_v3.py +++ b/posthog/hogql/database/schema/sessions_v3.py @@ -361,7 +361,7 @@ def get_entry_channel_type_property(n: int): aggregate_fields["$exit_pathname"] = aggregate_fields["$end_pathname"] select_fields: list[ast.Expr] = [] - group_by_fields: list[ast.Expr] = [ast.Field(chain=[table_name, "session_id_v7"])] + group_by_fields: list[ast.Expr] = [] for name, chain in requested_fields.items(): if name in aggregate_fields: diff --git a/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py index a06bdc18eeb98..77a421d2d5a85 100644 --- a/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py +++ b/posthog/hogql/database/schema/util/test/test_session_v3_where_clause_extractor.py @@ -358,7 +358,6 @@ def test_select_with_timestamp(self): WHERE and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(%(hogql_val_1)s, toIntervalDay(3)))) GROUP BY - raw_sessions_v3.session_id_v7, raw_sessions_v3.session_id_v7) AS sessions WHERE ifNull(greater(sessions.`$start_timestamp`, %(hogql_val_2)s), 0) @@ -394,7 +393,6 @@ def test_join_with_events(self): WHERE and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(%(hogql_val_0)s, toIntervalDay(3)))) GROUP BY - raw_sessions_v3.session_id_v7, raw_sessions_v3.session_id_v7) AS sessions ON equals(events.`$session_id`, sessions.session_id) WHERE and(equals(events.team_id, ), greater(toTimeZone(events.timestamp, %(hogql_val_1)s), %(hogql_val_2)s)) @@ -432,7 +430,6 @@ def test_union(self): WHERE and(equals(raw_sessions_v3.team_id, ), lessOrEquals(raw_sessions_v3.session_timestamp, plus(today(), toIntervalDay(3)))) GROUP BY - raw_sessions_v3.session_id_v7, raw_sessions_v3.session_id_v7) AS events__session ON equals(events.`$session_id_uuid`, events__session.session_id_v7) WHERE and(equals(events.team_id, ), less(toTimeZone(events.timestamp, %(hogql_val_3)s), today())) @@ -510,7 +507,6 @@ def test_session_breakdown(self): WHERE and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(toStartOfDay(assumeNotNull(toDateTime(%(hogql_val_3)s, %(hogql_val_4)s))), toIntervalDay(3))), lessOrEquals(raw_sessions_v3.session_timestamp, plus(assumeNotNull(toDateTime(%(hogql_val_5)s, %(hogql_val_6)s)), toIntervalDay(3)))) GROUP BY - raw_sessions_v3.session_id_v7, raw_sessions_v3.session_id_v7) AS e__session ON equals(e.`$session_id_uuid`, e__session.session_id_v7) WHERE and(equals(e.team_id, ), and(greaterOrEquals(toTimeZone(e.timestamp, %(hogql_val_19)s), toStartOfDay(assumeNotNull(toDateTime(%(hogql_val_20)s, %(hogql_val_21)s)))), lessOrEquals(toTimeZone(e.timestamp, %(hogql_val_22)s), assumeNotNull(toDateTime(%(hogql_val_23)s, %(hogql_val_24)s))), equals(e.event, %(hogql_val_25)s), in(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), (SELECT @@ -552,7 +548,6 @@ def test_session_replay_query(self): WHERE and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(%(hogql_val_0)s, toIntervalDay(3))), lessOrEquals(raw_sessions_v3.session_timestamp, plus(now64(6, %(hogql_val_1)s), toIntervalDay(3)))) GROUP BY - raw_sessions_v3.session_id_v7, raw_sessions_v3.session_id_v7) AS s__session ON equals(toUInt128(accurateCastOrNull(s.session_id, %(hogql_val_2)s)), s__session.session_id_v7) WHERE and(equals(s.team_id, ), ifNull(equals(s__session.`$entry_pathname`, %(hogql_val_4)s), 0), greaterOrEquals(toTimeZone(s.min_first_timestamp, %(hogql_val_5)s), %(hogql_val_6)s), less(toTimeZone(s.min_first_timestamp, %(hogql_val_7)s), now64(6, %(hogql_val_8)s))) @@ -590,7 +585,6 @@ def test_urls_in_sessions_in_timestamp_query(self): WHERE and(equals(raw_sessions_v3.team_id, ), greaterOrEquals(raw_sessions_v3.session_timestamp, minus(minus(now64(6, %(hogql_val_1)s), toIntervalDay(7)), toIntervalDay(3)))) GROUP BY - raw_sessions_v3.session_id_v7, raw_sessions_v3.session_id_v7) AS sessions WHERE ifNull(greaterOrEquals(sessions.`$start_timestamp`, minus(now64(6, %(hogql_val_2)s), toIntervalDay(7))), 0) @@ -620,7 +614,6 @@ def test_point_query(self): WHERE and(equals(raw_sessions_v3.team_id, ), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull(%(hogql_val_0)s, %(hogql_val_1)s)), 80))))) GROUP BY - raw_sessions_v3.session_id_v7, raw_sessions_v3.session_id_v7) AS sessions WHERE ifNull(equals(sessions.session_id, %(hogql_val_2)s), 0) From 918a13acb9e9b7f00efa62413e1eec8c0481b161 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Mon, 6 Oct 2025 17:12:35 +0100 Subject: [PATCH 11/13] Replace numbers in session tests --- posthog/clickhouse/test/test_raw_sessions_v3_model.py | 2 ++ posthog/hogql/database/schema/test/test_sessions_v3.py | 2 ++ posthog/test/base.py | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/posthog/clickhouse/test/test_raw_sessions_v3_model.py b/posthog/clickhouse/test/test_raw_sessions_v3_model.py index 55bdb77f5ae1e..f6ac05e12a130 100644 --- a/posthog/clickhouse/test/test_raw_sessions_v3_model.py +++ b/posthog/clickhouse/test/test_raw_sessions_v3_model.py @@ -30,6 +30,8 @@ def create_session_id(): @snapshot_clickhouse_queries class TestRawSessionsModel(ClickhouseTestMixin, BaseTest): + snapshot_replace_all_numbers = True + def select_by_session_id(self, session_id): flush_persons_and_events() return query_with_columns( diff --git a/posthog/hogql/database/schema/test/test_sessions_v3.py b/posthog/hogql/database/schema/test/test_sessions_v3.py index 501ccc960cf43..3632518a30f18 100644 --- a/posthog/hogql/database/schema/test/test_sessions_v3.py +++ b/posthog/hogql/database/schema/test/test_sessions_v3.py @@ -26,6 +26,8 @@ @snapshot_clickhouse_queries class TestSessionsV3(ClickhouseTestMixin, APIBaseTest): + snapshot_replace_all_numbers = True + def __execute( self, query, diff --git a/posthog/test/base.py b/posthog/test/base.py index fbb08d9c03f02..0c2b6d53eda34 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -1343,7 +1343,8 @@ def wrapped(self, *args, **kwargs): for query in queries: if "FROM system.columns" not in query: - self.assertQueryMatchesSnapshot(query) + replace_all_numbers = getattr(self, "snapshot_replace_all_numbers", False) + self.assertQueryMatchesSnapshot(query, replace_all_numbers=replace_all_numbers) return wrapped From cc6df142651f8d1c860f766fbccd720c3424dccd Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:24:33 +0000 Subject: [PATCH 12/13] Update query snapshots --- .../test/__snapshots__/test_sessions_v3.ambr | 567 ++++++++++++++++++ 1 file changed, 567 insertions(+) create mode 100644 posthog/hogql/database/schema/test/__snapshots__/test_sessions_v3.ambr diff --git a/posthog/hogql/database/schema/test/__snapshots__/test_sessions_v3.ambr b/posthog/hogql/database/schema/test/__snapshots__/test_sessions_v3.ambr new file mode 100644 index 0000000000000..06ab4481dfd2b --- /dev/null +++ b/posthog/hogql/database/schema/test/__snapshots__/test_sessions_v3.ambr @@ -0,0 +1,567 @@ +# serializer version: 1 +# name: TestSessionsV3.test_bounce_rate + ''' + SELECT sessions.`$is_bounce` AS `$is_bounce`, + sessions.session_id AS session_id + FROM + (SELECT if(ifNull(equals(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 0), 0), NULL, not(or(ifNull(greater(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 1), 0), greaterOrEquals(dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))), 10)))) AS `$is_bounce`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + ORDER BY sessions.session_id ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_can_use_v1_and_v2_fields + ''' + SELECT sessions.`$session_duration` AS `$session_duration`, + sessions.duration AS duration, + sessions.`$end_current_url` AS `$end_current_url`, + sessions.`$exit_current_url` AS `$exit_current_url`, + sessions.`$end_pathname` AS `$end_pathname`, + sessions.`$exit_pathname` AS `$exit_pathname` + FROM + (SELECT dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))) AS `$session_duration`, + dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))) AS duration, + argMaxMerge(raw_sessions_v3.end_url) AS `$end_current_url`, + argMaxMerge(raw_sessions_v3.end_url) AS `$exit_current_url`, + path(argMaxMerge(raw_sessions_v3.end_url)) AS `$end_pathname`, + path(argMaxMerge(raw_sessions_v3.end_url)) AS `$exit_pathname`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + WHERE ifNull(equals(sessions.session_id, '00000000-0000-0000-0000-000000000000'), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_channel_type + ''' + SELECT sessions.`$channel_type` AS `$channel_type` + FROM + (SELECT multiIf(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), 'cross-network'), 'Cross Network', or(in(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), tuple('cpc', 'cpm', 'cpv', 'cpa', 'ppc', 'retargeting')), startsWith(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), 'paid'), tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 5), isNotNull(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 7), ''), 'null'))), coalesce(coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (cutToFirstSignificantSubdomain(coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), '')), 'source'))), if(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*(([^a-df-z]|^)shop|shopping).*)$'), 'Paid Shopping', NULL), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), ''), 'medium')), coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (cutToFirstSignificantSubdomain(coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '')), 'source'))), multiIf(ifNull(equals(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 7), ''), 'null'), '1'), 0), 'Paid Search', match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*video.*)$'), 'Paid Video', tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6), 'Paid Social', 'Paid Unknown')), and(ifNull(equals(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '$direct'), 0), isNull(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null'))), or(isNull(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null'))), in(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), tuple('(direct)', 'direct', '$direct'))), not(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6))), 'Direct', coalesce(coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (cutToFirstSignificantSubdomain(coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), '')), 'source'))), if(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*(([^a-df-z]|^)shop|shopping).*)$'), 'Organic Shopping', NULL), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), ''), 'medium')), coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (cutToFirstSignificantSubdomain(coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '')), 'source'))), multiIf(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*video.*)$'), 'Organic Video', match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), 'push$'), 'Push', tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6), 'Organic Social', ifNull(equals(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '$direct'), 0), 'Direct', isNotNull(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4)), 'Referral', 'Unknown'))) AS `$channel_type`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + WHERE ifNull(equals(sessions.session_id, '00000000-0000-0000-0000-000000000000'), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_counts + ''' + SELECT sessions.`$pageview_count` AS `$pageview_count`, + sessions.`$autocapture_count` AS `$autocapture_count`, + sessions.`$screen_count` AS `$screen_count` + FROM + (SELECT uniqExactMerge(raw_sessions_v3.pageview_uniq) AS `$pageview_count`, + uniqExactMerge(raw_sessions_v3.autocapture_uniq) AS `$autocapture_count`, + uniqExactMerge(raw_sessions_v3.screen_uniq) AS `$screen_count`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + WHERE ifNull(equals(sessions.id, '00000000-0000-0000-0000-000000000000'), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_custom_bounce_rate_duration + ''' + SELECT sessions.`$is_bounce` AS `$is_bounce`, + sessions.session_id AS session_id + FROM + (SELECT if(ifNull(equals(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 0), 0), NULL, not(or(ifNull(greater(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 1), 0), greaterOrEquals(dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))), 10)))) AS `$is_bounce`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + ORDER BY sessions.session_id ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_custom_bounce_rate_duration.1 + ''' + SELECT sessions.`$is_bounce` AS `$is_bounce`, + sessions.session_id AS session_id + FROM + (SELECT if(ifNull(equals(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 0), 0), NULL, not(or(ifNull(greater(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 1), 0), greaterOrEquals(dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))), 10.0)))) AS `$is_bounce`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + ORDER BY sessions.session_id ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_custom_bounce_rate_duration.2 + ''' + SELECT sessions.`$is_bounce` AS `$is_bounce`, + sessions.session_id AS session_id + FROM + (SELECT if(ifNull(equals(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 0), 0), NULL, not(or(ifNull(greater(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 1), 0), greaterOrEquals(dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))), 30.0)))) AS `$is_bounce`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + ORDER BY sessions.session_id ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_empty_counts + ''' + SELECT sessions.`$pageview_count` AS `$pageview_count`, + sessions.`$autocapture_count` AS `$autocapture_count`, + sessions.`$screen_count` AS `$screen_count` + FROM + (SELECT uniqExactMerge(raw_sessions_v3.pageview_uniq) AS `$pageview_count`, + uniqExactMerge(raw_sessions_v3.autocapture_uniq) AS `$autocapture_count`, + uniqExactMerge(raw_sessions_v3.screen_uniq) AS `$screen_count`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + WHERE ifNull(equals(sessions.id, '00000000-0000-0000-0000-000000000000'), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_event_sessions_where + ''' + SELECT count() AS `count()` + FROM events + LEFT JOIN + (SELECT path(argMinMerge(raw_sessions_v3.entry_url)) AS `$entry_pathname`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + GROUP BY raw_sessions_v3.session_id_v7) AS events__session ON equals(events.`$session_id_uuid`, events__session.session_id_v7) + WHERE and(equals(events.team_id, 99999), ifNull(equals(events__session.`$entry_pathname`, '/pathname'), 0)) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_event_sessions_where_event_timestamp + ''' + SELECT events__session.id AS session_id + FROM events + LEFT JOIN + (SELECT toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80)))), greaterOrEquals(raw_sessions_v3.session_timestamp, minus('1970-01-01', toIntervalDay(3)))) + GROUP BY raw_sessions_v3.session_id_v7) AS events__session ON equals(events.`$session_id_uuid`, events__session.session_id_v7) + WHERE and(equals(events.team_id, 99999), ifNull(equals(session_id, '00000000-0000-0000-0000-000000000000'), 0), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), '1970-01-01')) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_idempotent_event_counts + ''' + SELECT sessions.`$pageview_count` AS `$pageview_count`, + sessions.`$autocapture_count` AS `$autocapture_count`, + sessions.`$screen_count` AS `$screen_count` + FROM + (SELECT uniqExactMerge(raw_sessions_v3.pageview_uniq) AS `$pageview_count`, + uniqExactMerge(raw_sessions_v3.autocapture_uniq) AS `$autocapture_count`, + uniqExactMerge(raw_sessions_v3.screen_uniq) AS `$screen_count`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + WHERE ifNull(equals(sessions.id, '00000000-0000-0000-0000-000000000000'), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_last_external_click_url + ''' + SELECT sessions.`$last_external_click_url` AS `$last_external_click_url` + FROM + (SELECT argMaxMerge(raw_sessions_v3.last_external_click_url) AS `$last_external_click_url`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + WHERE ifNull(equals(sessions.session_id, '00000000-0000-0000-0000-000000000000'), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_page_screen_autocapture_count_up_to + ''' + SELECT sessions.`$page_screen_autocapture_count_up_to` AS `$page_screen_autocapture_count_up_to` + FROM + (SELECT uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to) AS `$page_screen_autocapture_count_up_to`, + toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + GROUP BY raw_sessions_v3.session_id_v7) AS sessions + ORDER BY sessions.session_id ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_persons_and_sessions_on_events + ''' + SELECT if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) AS person_id, + events__session.`$entry_utm_source` AS `$entry_utm_source` + FROM events + LEFT JOIN + (SELECT argMinMerge(raw_sessions_v3.entry_utm_source) AS `$entry_utm_source`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), or(equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80)))), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80)))))) + GROUP BY raw_sessions_v3.session_id_v7) AS events__session ON equals(events.`$session_id_uuid`, events__session.session_id_v7) + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__override ON equals(events.distinct_id, events__override.distinct_id) + WHERE and(equals(events.team_id, 99999), or(equals(events.`$session_id`, '00000000-0000-0000-0000-000000000000'), equals(events.`$session_id`, '00000000-0000-0000-0000-000000000000'))) + ORDER BY 2 ASC + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_select_session_replay_session_duration + ''' + SELECT raw_session_replay_events__session.duration AS duration + FROM session_replay_events + LEFT JOIN + (SELECT dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))) AS duration, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + GROUP BY raw_sessions_v3.session_id_v7) AS raw_session_replay_events__session ON equals(toUInt128(accurateCastOrNull(session_replay_events.session_id, 'UUID')), raw_session_replay_events__session.session_id_v7) + WHERE equals(session_replay_events.team_id, 99999) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_select_star_from_raw_sessions + ''' + SELECT toTimeZone(raw_sessions_v3.min_timestamp, 'UTC') AS min_timestamp, + toTimeZone(raw_sessions_v3.max_timestamp, 'UTC') AS max_timestamp, + toTimeZone(raw_sessions_v3.max_inserted_at, 'UTC') AS max_inserted_at, + raw_sessions_v3.urls AS urls + FROM raw_sessions_v3 + WHERE equals(raw_sessions_v3.team_id, 99999) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_select_star_from_sessions + ''' + SELECT sessions.session_id AS session_id, + sessions.session_timestamp AS session_timestamp, + sessions.distinct_id AS distinct_id, + sessions.person_id AS person_id, + sessions.`$start_timestamp` AS `$start_timestamp`, + sessions.`$end_timestamp` AS `$end_timestamp`, + sessions.max_inserted_at AS max_inserted_at, + sessions.`$urls` AS `$urls`, + sessions.`$num_uniq_urls` AS `$num_uniq_urls`, + sessions.`$entry_current_url` AS `$entry_current_url`, + sessions.`$entry_pathname` AS `$entry_pathname`, + sessions.`$entry_hostname` AS `$entry_hostname`, + sessions.`$end_current_url` AS `$end_current_url`, + sessions.`$end_pathname` AS `$end_pathname`, + sessions.`$end_hostname` AS `$end_hostname`, + sessions.`$entry_referring_domain` AS `$entry_referring_domain`, + sessions.`$last_external_click_url` AS `$last_external_click_url`, + sessions.`$entry_utm_source` AS `$entry_utm_source`, + sessions.`$entry_utm_campaign` AS `$entry_utm_campaign`, + sessions.`$entry_utm_medium` AS `$entry_utm_medium`, + sessions.`$entry_utm_term` AS `$entry_utm_term`, + sessions.`$entry_utm_content` AS `$entry_utm_content`, + sessions.`$entry_fbclid` AS `$entry_fbclid`, + sessions.`$entry_has_fbclid` AS `$entry_has_fbclid`, + sessions.`$entry_has_gclid` AS `$entry_has_gclid`, + sessions.`$pageview_count` AS `$pageview_count`, + sessions.`$autocapture_count` AS `$autocapture_count`, + sessions.`$screen_count` AS `$screen_count`, + sessions.`$channel_type` AS `$channel_type`, + sessions.`$session_duration` AS `$session_duration`, + sessions.`$is_bounce` AS `$is_bounce`, + sessions.`$entry_gclsrc` AS `$entry_gclsrc`, + sessions.`$entry_has_gclsrc` AS `$entry_has_gclsrc`, + sessions.`$entry_dclid` AS `$entry_dclid`, + sessions.`$entry_has_dclid` AS `$entry_has_dclid`, + sessions.`$entry_gbraid` AS `$entry_gbraid`, + sessions.`$entry_has_gbraid` AS `$entry_has_gbraid`, + sessions.`$entry_wbraid` AS `$entry_wbraid`, + sessions.`$entry_has_wbraid` AS `$entry_has_wbraid`, + sessions.`$entry_msclkid` AS `$entry_msclkid`, + sessions.`$entry_has_msclkid` AS `$entry_has_msclkid`, + sessions.`$entry_twclid` AS `$entry_twclid`, + sessions.`$entry_has_twclid` AS `$entry_has_twclid`, + sessions.`$entry_li_fat_id` AS `$entry_li_fat_id`, + sessions.`$entry_has_li_fat_id` AS `$entry_has_li_fat_id`, + sessions.`$entry_mc_cid` AS `$entry_mc_cid`, + sessions.`$entry_has_mc_cid` AS `$entry_has_mc_cid`, + sessions.`$entry_igshid` AS `$entry_igshid`, + sessions.`$entry_has_igshid` AS `$entry_has_igshid`, + sessions.`$entry_ttclid` AS `$entry_ttclid`, + sessions.`$entry_has_ttclid` AS `$entry_has_ttclid`, + sessions.`$entry_epik` AS `$entry_epik`, + sessions.`$entry_has_epik` AS `$entry_has_epik`, + sessions.`$entry_qclid` AS `$entry_qclid`, + sessions.`$entry_has_qclid` AS `$entry_has_qclid`, + sessions.`$entry_sccid` AS `$entry_sccid`, + sessions.`$entry_has_sccid` AS `$entry_has_sccid`, + sessions.`$entry__kx` AS `$entry__kx`, + sessions.`$entry_has__kx` AS `$entry_has__kx`, + sessions.`$entry_irclid` AS `$entry_irclid`, + sessions.`$entry_has_irclid` AS `$entry_has_irclid` + FROM + (SELECT toString(reinterpretAsUUID(bitOr(bitShiftLeft(raw_sessions_v3.session_id_v7, 64), bitShiftRight(raw_sessions_v3.session_id_v7, 64)))) AS session_id, + raw_sessions_v3.session_timestamp AS session_timestamp, + argMaxMerge(raw_sessions_v3.distinct_id) AS distinct_id, + argMaxMerge(raw_sessions_v3.person_id) AS person_id, + min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')) AS `$start_timestamp`, + max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC')) AS `$end_timestamp`, + max(toTimeZone(raw_sessions_v3.max_inserted_at, 'UTC')) AS max_inserted_at, + arrayDistinct(arrayFlatten(groupArray(raw_sessions_v3.urls))) AS `$urls`, + length(arrayDistinct(arrayFlatten(groupArray(raw_sessions_v3.urls)))) AS `$num_uniq_urls`, + argMinMerge(raw_sessions_v3.entry_url) AS `$entry_current_url`, + path(argMinMerge(raw_sessions_v3.entry_url)) AS `$entry_pathname`, + domain(argMinMerge(raw_sessions_v3.entry_url)) AS `$entry_hostname`, + argMaxMerge(raw_sessions_v3.end_url) AS `$end_current_url`, + path(argMaxMerge(raw_sessions_v3.end_url)) AS `$end_pathname`, + domain(argMaxMerge(raw_sessions_v3.end_url)) AS `$end_hostname`, + argMinMerge(raw_sessions_v3.entry_referring_domain) AS `$entry_referring_domain`, + argMaxMerge(raw_sessions_v3.last_external_click_url) AS `$last_external_click_url`, + argMinMerge(raw_sessions_v3.entry_utm_source) AS `$entry_utm_source`, + argMinMerge(raw_sessions_v3.entry_utm_campaign) AS `$entry_utm_campaign`, + argMinMerge(raw_sessions_v3.entry_utm_medium) AS `$entry_utm_medium`, + argMinMerge(raw_sessions_v3.entry_utm_term) AS `$entry_utm_term`, + argMinMerge(raw_sessions_v3.entry_utm_content) AS `$entry_utm_content`, + argMinMerge(raw_sessions_v3.entry_fbclid) AS `$entry_fbclid`, + argMinMerge(raw_sessions_v3.entry_has_fbclid) AS `$entry_has_fbclid`, + argMinMerge(raw_sessions_v3.entry_has_gclid) AS `$entry_has_gclid`, + uniqExactMerge(raw_sessions_v3.pageview_uniq) AS `$pageview_count`, + uniqExactMerge(raw_sessions_v3.autocapture_uniq) AS `$autocapture_count`, + uniqExactMerge(raw_sessions_v3.screen_uniq) AS `$screen_count`, + multiIf(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), 'cross-network'), 'Cross Network', or(in(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), tuple('cpc', 'cpm', 'cpv', 'cpa', 'ppc', 'retargeting')), startsWith(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), 'paid'), tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 5), isNotNull(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 7), ''), 'null'))), coalesce(coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (cutToFirstSignificantSubdomain(coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), '')), 'source'))), if(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*(([^a-df-z]|^)shop|shopping).*)$'), 'Paid Shopping', NULL), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), ''), 'medium')), coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (cutToFirstSignificantSubdomain(coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '')), 'source'))), multiIf(ifNull(equals(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 7), ''), 'null'), '1'), 0), 'Paid Search', match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*video.*)$'), 'Paid Video', tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6), 'Paid Social', 'Paid Unknown')), and(ifNull(equals(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '$direct'), 0), isNull(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null'))), or(isNull(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null'))), in(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), tuple('(direct)', 'direct', '$direct'))), not(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6))), 'Direct', coalesce(coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (cutToFirstSignificantSubdomain(coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), '')), 'source'))), if(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*(([^a-df-z]|^)shop|shopping).*)$'), 'Organic Shopping', NULL), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), ''), 'medium')), coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (cutToFirstSignificantSubdomain(coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '')), 'source'))), multiIf(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*video.*)$'), 'Organic Video', match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), 'push$'), 'Push', tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6), 'Organic Social', ifNull(equals(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '$direct'), 0), 'Direct', isNotNull(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4)), 'Referral', 'Unknown'))) AS `$channel_type`, + dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))) AS `$session_duration`, + if(ifNull(equals(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 0), 0), NULL, not(or(ifNull(greater(uniqUpToMerge(1)(raw_sessions_v3.page_screen_autocapture_uniq_up_to), 1), 0), greaterOrEquals(dateDiff('second', min(toTimeZone(raw_sessions_v3.min_timestamp, 'UTC')), max(toTimeZone(raw_sessions_v3.max_timestamp, 'UTC'))), 10)))) AS `$is_bounce`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'gclsrc') AS `$entry_gclsrc`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'gclsrc') AS `$entry_has_gclsrc`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'dclid') AS `$entry_dclid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'dclid') AS `$entry_has_dclid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'gbraid') AS `$entry_gbraid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'gbraid') AS `$entry_has_gbraid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'wbraid') AS `$entry_wbraid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'wbraid') AS `$entry_has_wbraid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'msclkid') AS `$entry_msclkid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'msclkid') AS `$entry_has_msclkid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'twclid') AS `$entry_twclid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'twclid') AS `$entry_has_twclid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'li_fat_id') AS `$entry_li_fat_id`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'li_fat_id') AS `$entry_has_li_fat_id`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'mc_cid') AS `$entry_mc_cid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'mc_cid') AS `$entry_has_mc_cid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'igshid') AS `$entry_igshid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'igshid') AS `$entry_has_igshid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'ttclid') AS `$entry_ttclid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'ttclid') AS `$entry_has_ttclid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'epik') AS `$entry_epik`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'epik') AS `$entry_has_epik`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'qclid') AS `$entry_qclid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'qclid') AS `$entry_has_qclid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'sccid') AS `$entry_sccid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'sccid') AS `$entry_has_sccid`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), '_kx') AS `$entry__kx`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), '_kx') AS `$entry_has__kx`, + arrayElement(argMinMerge(raw_sessions_v3.entry_ad_ids_map), 'irclid') AS `$entry_irclid`, + has(argMinMerge(raw_sessions_v3.entry_ad_ids_set), 'irclid') AS `$entry_has_irclid`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_timestamp, + raw_sessions_v3.session_id_v7) AS sessions + WHERE ifNull(equals(sessions.session_id, '00000000-0000-0000-0000-000000000000'), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- +# name: TestSessionsV3.test_session_dot_channel_type + ''' + SELECT events__session.`$channel_type` AS `$channel_type` + FROM events + LEFT JOIN + (SELECT multiIf(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), 'cross-network'), 'Cross Network', or(in(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), tuple('cpc', 'cpm', 'cpv', 'cpa', 'ppc', 'retargeting')), startsWith(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), 'paid'), tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 5), isNotNull(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 7), ''), 'null'))), coalesce(coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (cutToFirstSignificantSubdomain(coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), '')), 'source'))), if(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*(([^a-df-z]|^)shop|shopping).*)$'), 'Paid Shopping', NULL), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), ''), 'medium')), coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_paid', (cutToFirstSignificantSubdomain(coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '')), 'source'))), multiIf(ifNull(equals(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 7), ''), 'null'), '1'), 0), 'Paid Search', match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*video.*)$'), 'Paid Video', tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6), 'Paid Social', 'Paid Unknown')), and(ifNull(equals(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '$direct'), 0), isNull(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null'))), or(isNull(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null'))), in(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), tuple('(direct)', 'direct', '$direct'))), not(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6))), 'Direct', coalesce(coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (cutToFirstSignificantSubdomain(coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 1), ''), 'null')), '')), 'source'))), if(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*(([^a-df-z]|^)shop|shopping).*)$'), 'Organic Shopping', NULL), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), ''), 'medium')), coalesce(dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), ''), 'source')), dictGetOrNull('posthog_test.channel_definition_dict', 'type_if_organic', (cutToFirstSignificantSubdomain(coalesce(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '')), 'source'))), multiIf(match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 3), ''), 'null')), '^(.*video.*)$'), 'Organic Video', match(lower(nullIf(nullIf(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 2), ''), 'null')), 'push$'), 'Push', tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 6), 'Organic Social', ifNull(equals(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4), '$direct'), 0), 'Direct', isNotNull(tupleElement(argMinMerge(raw_sessions_v3.entry_channel_type_properties), 4)), 'Referral', 'Unknown'))) AS `$channel_type`, + raw_sessions_v3.session_id_v7 AS session_id_v7 + FROM raw_sessions_v3 + WHERE and(equals(raw_sessions_v3.team_id, 99999), equals(raw_sessions_v3.session_timestamp, fromUnixTimestamp64Milli(toUInt64(bitShiftRight(toUInt128(accurateCastOrNull('00000000-0000-0000-0000-000000000000', 'UUID')), 80))))) + GROUP BY raw_sessions_v3.session_id_v7) AS events__session ON equals(events.`$session_id_uuid`, events__session.session_id_v7) + WHERE and(equals(events.team_id, 99999), equals(events.`$session_id`, '00000000-0000-0000-0000-000000000000')) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + transform_null_in=1, + optimize_min_equality_disjunction_chain_length=4294967295, + allow_experimental_join_condition=1 + ''' +# --- From 5c7c11b55b555643f63b6995b2306dd0bc7e64d2 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 16:36:33 +0000 Subject: [PATCH 13/13] Update query snapshots --- .../test_raw_sessions_v3_model.ambr | 175 ++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 posthog/clickhouse/test/__snapshots__/test_raw_sessions_v3_model.ambr diff --git a/posthog/clickhouse/test/__snapshots__/test_raw_sessions_v3_model.ambr b/posthog/clickhouse/test/__snapshots__/test_raw_sessions_v3_model.ambr new file mode 100644 index 0000000000000..1871f1e9c27bb --- /dev/null +++ b/posthog/clickhouse/test/__snapshots__/test_raw_sessions_v3_model.ambr @@ -0,0 +1,175 @@ +# serializer version: 1 +# name: TestRawSessionsModel.test_ad_ids_map_and_set + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_autocapture_does_not_set_attribution_when_pageview_present + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_autocapture_does_set_attribution_when_only_event + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_channel_type_properties + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_counts_pageviews_autocaptures_and_events + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_handles_different_distinct_id_across_same_session + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_handles_entry_and_exit_urls + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_handles_initial_utm_properties + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_it_creates_session_when_creating_event + ''' + + select session_id_v7, + team_id + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_lookup_feature_flag + ''' + + select session_id_v7, + has(flag_values['$feature/flag_string'], 'f1_a') as has_f1_a, + has(flag_values['$feature/flag_string'], 'f1_b') as has_f1_b, + has(flag_values['$feature/flag_string'], 'f1_c') as has_f1_c + from raw_sessions_v3_v + where team_id = 99999 + ORDER BY session_id_v7 + ''' +# --- +# name: TestRawSessionsModel.test_max_inserted_at + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_select_from_sessions + ''' + + SELECT session_id_v7, + team_id, + min_timestamp, + max_timestamp, + urls + FROM raw_sessions_v3 + WHERE session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_select_from_sessions_mv + ''' + + SELECT session_id_v7, + team_id, + min_timestamp, + max_timestamp, + urls + FROM raw_sessions_v3_mv + WHERE session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_separates_sessions_across_same_user + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_separates_sessions_across_same_user.1 + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_separates_sessions_across_same_user.2 + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_store_all_feature_flag_values + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# --- +# name: TestRawSessionsModel.test_tracks_all_distinct_ids + ''' + + select * + from raw_sessions_v3_v + where session_id_v7 = toUInt128(toUUID('00000000-0000-0000-0000-000000000000')) + AND team_id = 99999 + ''' +# ---