From a6b2cd3613bcb6d1eb888153db14088af43a3444 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 04:43:25 +0300 Subject: [PATCH 01/42] Move fragments module under Plausible.Stats.SQL --- extra/lib/plausible/stats/funnel.ex | 2 +- lib/plausible/exports.ex | 2 +- lib/plausible/stats/base.ex | 2 +- lib/plausible/stats/breakdown.ex | 2 +- lib/plausible/stats/clickhouse.ex | 2 +- lib/plausible/stats/current_visitors.ex | 2 +- lib/plausible/stats/filter_suggestions.ex | 2 +- lib/plausible/stats/filters/where_builder.ex | 2 +- lib/plausible/stats/imported/imported.ex | 2 +- lib/plausible/stats/sql/expression.ex | 2 +- lib/plausible/stats/{ => sql}/fragments.ex | 6 +++--- lib/plausible/stats/timeseries.ex | 2 +- 12 files changed, 14 insertions(+), 14 deletions(-) rename lib/plausible/stats/{ => sql}/fragments.ex (95%) diff --git a/extra/lib/plausible/stats/funnel.ex b/extra/lib/plausible/stats/funnel.ex index d9c497cee609..121a699fae80 100644 --- a/extra/lib/plausible/stats/funnel.ex +++ b/extra/lib/plausible/stats/funnel.ex @@ -10,7 +10,7 @@ defmodule Plausible.Stats.Funnel do alias Plausible.Funnels import Ecto.Query - import Plausible.Stats.Fragments + import Plausible.Stats.SQL.Fragments alias Plausible.ClickhouseRepo alias Plausible.Stats.Base diff --git a/lib/plausible/exports.ex b/lib/plausible/exports.ex index d5cb20ada20d..04e6a420218f 100644 --- a/lib/plausible/exports.ex +++ b/lib/plausible/exports.ex @@ -4,7 +4,7 @@ defmodule Plausible.Exports do """ use Plausible - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments import Ecto.Query @doc "Schedules CSV export job to S3 storage" diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 5354833a1f63..f80d6054cb1d 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -1,7 +1,7 @@ defmodule Plausible.Stats.Base do use Plausible.ClickhouseRepo use Plausible - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments alias Plausible.Stats.{Query, Filters, TableDecider} alias Plausible.Timezones diff --git a/lib/plausible/stats/breakdown.ex b/lib/plausible/stats/breakdown.ex index a0bdaa150917..abf78beac382 100644 --- a/lib/plausible/stats/breakdown.ex +++ b/lib/plausible/stats/breakdown.ex @@ -1,7 +1,7 @@ defmodule Plausible.Stats.Breakdown do use Plausible.ClickhouseRepo use Plausible - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments import Plausible.Stats.Base import Ecto.Query diff --git a/lib/plausible/stats/clickhouse.ex b/lib/plausible/stats/clickhouse.ex index ed112b9a93c2..d22a4bb0c569 100644 --- a/lib/plausible/stats/clickhouse.ex +++ b/lib/plausible/stats/clickhouse.ex @@ -2,7 +2,7 @@ defmodule Plausible.Stats.Clickhouse do use Plausible use Plausible.Repo use Plausible.ClickhouseRepo - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments import Ecto.Query, only: [from: 2] diff --git a/lib/plausible/stats/current_visitors.ex b/lib/plausible/stats/current_visitors.ex index 4af146691238..3249e510d7e5 100644 --- a/lib/plausible/stats/current_visitors.ex +++ b/lib/plausible/stats/current_visitors.ex @@ -1,6 +1,6 @@ defmodule Plausible.Stats.CurrentVisitors do use Plausible.ClickhouseRepo - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments def current_visitors(site) do first_datetime = diff --git a/lib/plausible/stats/filter_suggestions.ex b/lib/plausible/stats/filter_suggestions.ex index 8713919e3e4a..c4c59a373602 100644 --- a/lib/plausible/stats/filter_suggestions.ex +++ b/lib/plausible/stats/filter_suggestions.ex @@ -1,7 +1,7 @@ defmodule Plausible.Stats.FilterSuggestions do use Plausible.Repo use Plausible.ClickhouseRepo - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments import Plausible.Stats.Base import Ecto.Query diff --git a/lib/plausible/stats/filters/where_builder.ex b/lib/plausible/stats/filters/where_builder.ex index 257d4257a027..302cd92a36e8 100644 --- a/lib/plausible/stats/filters/where_builder.ex +++ b/lib/plausible/stats/filters/where_builder.ex @@ -8,7 +8,7 @@ defmodule Plausible.Stats.Filters.WhereBuilder do alias Plausible.Stats.Query - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments require Logger diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index 57cd6737bb19..ddef1014af6e 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -3,7 +3,7 @@ defmodule Plausible.Stats.Imported do use Plausible.ClickhouseRepo import Ecto.Query - import Plausible.Stats.Fragments + import Plausible.Stats.SQL.Fragments import Plausible.Stats.Util, only: [shortname: 2] alias Plausible.Stats.Imported diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index d3f38a142815..252c9c8cf862 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -6,7 +6,7 @@ defmodule Plausible.Stats.SQL.Expression do import Ecto.Query - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments @no_ref "Direct / None" @not_set "(not set)" diff --git a/lib/plausible/stats/fragments.ex b/lib/plausible/stats/sql/fragments.ex similarity index 95% rename from lib/plausible/stats/fragments.ex rename to lib/plausible/stats/sql/fragments.ex index 27f49839f51c..0ca45003fee3 100644 --- a/lib/plausible/stats/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -1,4 +1,4 @@ -defmodule Plausible.Stats.Fragments do +defmodule Plausible.Stats.SQL.Fragments do defmacro uniq(user_id) do quote do fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", unquote(user_id)) @@ -85,7 +85,7 @@ defmodule Plausible.Stats.Fragments do end @doc """ - Same as Plausible.Stats.Fragments.weekstart_not_before/2 but converts dates to + Same as Plausible.Stats.SQL.Fragments.weekstart_not_before/2 but converts dates to the specified timezone. """ defmacro weekstart_not_before(date, not_before, timezone) do @@ -145,7 +145,7 @@ defmodule Plausible.Stats.Fragments do defmacro __using__(_) do quote do - import Plausible.Stats.Fragments + import Plausible.Stats.SQL.Fragments end end end diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 69e3dc528f4c..4914023bfc4a 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -4,7 +4,7 @@ defmodule Plausible.Stats.Timeseries do alias Plausible.Stats.{Query, Util, Imported} import Plausible.Stats.{Base} import Ecto.Query - use Plausible.Stats.Fragments + use Plausible.Stats.SQL.Fragments @typep metric :: :pageviews From bf795d6a23818bf97a9d24ff6875c68c151a79f5 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 06:26:07 +0300 Subject: [PATCH 02/42] Introduce select_merge_as macro This simplifies some select_merge calls --- lib/plausible/stats/imported/imported.ex | 133 +++++++------------- lib/plausible/stats/sql/fragments.ex | 49 ++++++-- lib/plausible/stats/sql/query_builder.ex | 41 +++--- test/plausible/stats/sql/fragments_test.exs | 10 ++ 4 files changed, 113 insertions(+), 120 deletions(-) create mode 100644 test/plausible/stats/sql/fragments_test.exs diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index ddef1014af6e..be96a36f51fc 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -1,11 +1,11 @@ defmodule Plausible.Stats.Imported do - alias Plausible.Stats.Filters use Plausible.ClickhouseRepo + use Plausible.Stats.SQL.Fragments import Ecto.Query - import Plausible.Stats.SQL.Fragments import Plausible.Stats.Util, only: [shortname: 2] + alias Plausible.Stats.Filters alias Plausible.Stats.Imported alias Plausible.Stats.Query alias Plausible.Stats.SQL.QueryBuilder @@ -290,12 +290,8 @@ defmodule Plausible.Stats.Imported do "imported_custom_events" -> Imported.Base.query_imported("imported_custom_events", site, query) |> where([i], i.visitors > 0) - |> select_merge([i], %{ - dim0: - selected_as( - fragment("-indexOf(?, ?)", type(^events, {:array, :string}), i.name), - :dim0 - ) + |> select_merge_as([i], %{ + dim0: fragment("-indexOf(?, ?)", type(^events, {:array, :string}), i.name) }) |> select_imported_metrics(metrics) |> group_by([], selected_as(:dim0)) @@ -314,8 +310,8 @@ defmodule Plausible.Stats.Imported do ) |> join(:array, index in fragment("indices")) |> group_by([_i, index], index) - |> select_merge([_i, index], %{ - dim0: selected_as(type(fragment("?", index), :integer), :dim0) + |> select_merge_as([_i, index], %{ + dim0: type(fragment("?", index), :integer) }) |> select_imported_metrics(metrics) end) @@ -563,17 +559,8 @@ defmodule Plausible.Stats.Imported do defp group_imported_by(q, dim, key) when dim in [:source, :referrer] do q |> group_by([i], field(i, ^dim)) - |> select_merge([i], %{ - ^key => - selected_as( - fragment( - "if(empty(?), ?, ?)", - field(i, ^dim), - @no_ref, - field(i, ^dim) - ), - ^key - ) + |> select_merge_as([i], %{ + ^key => fragment("if(empty(?), ?, ?)", field(i, ^dim), @no_ref, field(i, ^dim)) }) end @@ -582,90 +569,70 @@ defmodule Plausible.Stats.Imported do q |> group_by([i], field(i, ^dim)) |> where([i], fragment("not empty(?)", field(i, ^dim))) - |> select_merge([i], %{^key => selected_as(field(i, ^dim), ^key)}) + |> select_merge_as([i], %{^key => field(i, ^dim)}) end defp group_imported_by(q, :page, key) do q |> group_by([i], i.page) - |> select_merge([i], %{^key => selected_as(i.page, ^key), time_on_page: sum(i.time_on_page)}) + |> select_merge_as([i], %{^key => i.page, time_on_page: sum(i.time_on_page)}) end defp group_imported_by(q, :country, key) do q |> group_by([i], i.country) |> where([i], i.country != "ZZ") - |> select_merge([i], %{^key => selected_as(i.country, ^key)}) + |> select_merge_as([i], %{^key => i.country}) end defp group_imported_by(q, :region, key) do q |> group_by([i], i.region) |> where([i], i.region != "") - |> select_merge([i], %{^key => selected_as(i.region, ^key)}) + |> select_merge_as([i], %{^key => i.region}) end defp group_imported_by(q, :city, key) do q |> group_by([i], i.city) |> where([i], i.city != 0 and not is_nil(i.city)) - |> select_merge([i], %{^key => selected_as(i.city, ^key)}) + |> select_merge_as([i], %{^key => i.city}) end defp group_imported_by(q, dim, key) when dim in [:device, :browser] do q |> group_by([i], field(i, ^dim)) - |> select_merge([i], %{ - ^key => - selected_as( - fragment("if(empty(?), ?, ?)", field(i, ^dim), @not_set, field(i, ^dim)), - ^key - ) + |> select_merge_as([i], %{ + ^key => fragment("if(empty(?), ?, ?)", field(i, ^dim), @not_set, field(i, ^dim)) }) end defp group_imported_by(q, :browser_version, key) do q |> group_by([i], [i.browser_version]) - |> select_merge([i], %{ - ^key => - selected_as( - fragment( - "if(empty(?), ?, ?)", - i.browser_version, - @not_set, - i.browser_version - ), - ^key - ) + |> select_merge_as([i], %{ + ^key => fragment("if(empty(?), ?, ?)", i.browser_version, @not_set, i.browser_version) }) end defp group_imported_by(q, :os, key) do q |> group_by([i], i.operating_system) - |> select_merge([i], %{ - ^key => - selected_as( - fragment("if(empty(?), ?, ?)", i.operating_system, @not_set, i.operating_system), - ^key - ) + |> select_merge_as([i], %{ + ^key => fragment("if(empty(?), ?, ?)", i.operating_system, @not_set, i.operating_system) }) end defp group_imported_by(q, :os_version, key) do q |> group_by([i], [i.operating_system_version]) - |> select_merge([i], %{ + |> select_merge_as([i], %{ ^key => - selected_as( - fragment( - "if(empty(?), ?, ?)", - i.operating_system_version, - @not_set, - i.operating_system_version - ), - ^key + fragment( + "if(empty(?), ?, ?)", + i.operating_system_version, + @not_set, + i.operating_system_version ) }) end @@ -679,22 +646,22 @@ defmodule Plausible.Stats.Imported do defp group_imported_by(q, :name, key) do q |> group_by([i], i.name) - |> select_merge([i], %{^key => selected_as(i.name, ^key)}) + |> select_merge_as([i], %{^key => i.name}) end defp group_imported_by(q, :url, key) do q |> group_by([i], i.link_url) - |> select_merge([i], %{ - ^key => selected_as(fragment("if(not empty(?), ?, ?)", i.link_url, i.link_url, @none), ^key) + |> select_merge_as([i], %{ + ^key => fragment("if(not empty(?), ?, ?)", i.link_url, i.link_url, @none) }) end defp group_imported_by(q, :path, key) do q |> group_by([i], i.path) - |> select_merge([i], %{ - ^key => selected_as(fragment("if(not empty(?), ?, ?)", i.path, i.path, @none), ^key) + |> select_merge_as([i], %{ + ^key => fragment("if(not empty(?), ?, ?)", i.path, i.path, @none) }) end @@ -705,23 +672,14 @@ defmodule Plausible.Stats.Imported do end defp select_joined_dimension(q, "visit:city", key) do - select_merge(q, [s, i], %{ - ^key => selected_as(fragment("greatest(?,?)", field(i, ^key), field(s, ^key)), ^key) + select_merge_as(q, [s, i], %{ + ^key => fragment("greatest(?,?)", field(i, ^key), field(s, ^key)) }) end defp select_joined_dimension(q, _dimension, key) do - select_merge(q, [s, i], %{ - ^key => - selected_as( - fragment( - "if(empty(?), ?, ?)", - field(s, ^key), - field(i, ^key), - field(s, ^key) - ), - ^key - ) + select_merge_as(q, [s, i], %{ + ^key => fragment("if(empty(?), ?, ?)", field(s, ^key), field(i, ^key), field(s, ^key)) }) end @@ -734,31 +692,31 @@ defmodule Plausible.Stats.Imported do defp select_joined_metrics(q, [:visits | rest]) do q - |> select_merge([s, i], %{visits: selected_as(s.visits + i.visits, :visits)}) + |> select_merge_as([s, i], %{visits: s.visits + i.visits}) |> select_joined_metrics(rest) end defp select_joined_metrics(q, [:visitors | rest]) do q - |> select_merge([s, i], %{visitors: selected_as(s.visitors + i.visitors, :visitors)}) + |> select_merge_as([s, i], %{visitors: s.visitors + i.visitors}) |> select_joined_metrics(rest) end defp select_joined_metrics(q, [:events | rest]) do q - |> select_merge([s, i], %{events: selected_as(s.events + i.events, :events)}) + |> select_merge_as([s, i], %{events: s.events + i.events}) |> select_joined_metrics(rest) end defp select_joined_metrics(q, [:pageviews | rest]) do q - |> select_merge([s, i], %{pageviews: selected_as(s.pageviews + i.pageviews, :pageviews)}) + |> select_merge_as([s, i], %{pageviews: s.pageviews + i.pageviews}) |> select_joined_metrics(rest) end defp select_joined_metrics(q, [:views_per_visit | rest]) do q - |> select_merge([s, i], %{ + |> select_merge_as([s, i], %{ views_per_visit: fragment( "if(? + ? > 0, round((? + ? * ?) / (? + ?), 2), 0)", @@ -776,7 +734,7 @@ defmodule Plausible.Stats.Imported do defp select_joined_metrics(q, [:bounce_rate | rest]) do q - |> select_merge([s, i], %{ + |> select_merge_as([s, i], %{ bounce_rate: fragment( "if(? + ? > 0, round(100 * (? + (? * ? / 100)) / (? + ?)), 0)", @@ -794,7 +752,7 @@ defmodule Plausible.Stats.Imported do defp select_joined_metrics(q, [:visit_duration | rest]) do q - |> select_merge([s, i], %{ + |> select_merge_as([s, i], %{ visit_duration: fragment( """ @@ -818,7 +776,7 @@ defmodule Plausible.Stats.Imported do defp select_joined_metrics(q, [:sample_percent | rest]) do q - |> select_merge([s, i], %{sample_percent: s.sample_percent}) + |> select_merge_as([s, i], %{sample_percent: s.sample_percent}) |> select_joined_metrics(rest) end @@ -831,10 +789,11 @@ defmodule Plausible.Stats.Imported do from(a in subquery(q1), full_join: b in subquery(q2), on: a.dim0 == b.dim0, - select: %{ - dim0: selected_as(fragment("if(? != 0, ?, ?)", a.dim0, a.dim0, b.dim0), :dim0) - } + select: %{} ) + |> select_merge_as([a, b], %{ + dim0: fragment("if(? != 0, ?, ?)", a.dim0, a.dim0, b.dim0) + }) |> select_joined_metrics(metrics) end end diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index 0ca45003fee3..12bbf1280e4e 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -1,4 +1,11 @@ defmodule Plausible.Stats.SQL.Fragments do + defmacro __using__(_) do + quote do + import Plausible.Stats.SQL.Fragments + require Plausible.Stats.SQL.Fragments + end + end + defmacro uniq(user_id) do quote do fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", unquote(user_id)) @@ -56,21 +63,23 @@ defmodule Plausible.Stats.SQL.Fragments do `not_before` boundary is set to the past Saturday, which is before the weekstart, therefore the cap does not apply. - iex> this_wednesday = ~D[2022-11-09] - ...> past_saturday = ~D[2022-11-05] - ...> weekstart_not_before(this_wednesday, past_saturday) + ``` + > this_wednesday = ~D[2022-11-09] + > past_saturday = ~D[2022-11-05] + > weekstart_not_before(this_wednesday, past_saturday) ~D[2022-11-07] - + ``` In this other example, the fragment returns Tuesday and not the weekstart. The `not_before` boundary is set to Tuesday, which is past the weekstart, therefore the cap applies. - iex> this_wednesday = ~D[2022-11-09] - ...> this_tuesday = ~D[2022-11-08] - ...> weekstart_not_before(this_wednesday, this_tuesday) + ``` + > this_wednesday = ~D[2022-11-09] + > this_tuesday = ~D[2022-11-08] + > weekstart_not_before(this_wednesday, this_tuesday) ~D[2022-11-08] - + ``` """ defmacro weekstart_not_before(date, not_before) do quote do @@ -143,9 +152,29 @@ defmodule Plausible.Stats.SQL.Fragments do def meta_value_column(:meta), do: :"meta.value" def meta_value_column(:entry_meta), do: :"entry_meta.value" - defmacro __using__(_) do + @doc """ + Convenience Ecto macro for wrapping select_merge where each value gets in turn passed to selected_as. + + ### Examples + + iex> select_merge_as(q, [t], %{ foo: t.column }) |> expand_macro_once + "select_merge(q, [t], %{foo: selected_as(t.column, :foo)})" + """ + defmacro select_merge_as(q, binding, values_map) do + selected_as_map = select_as_each(values_map) + quote do - import Plausible.Stats.SQL.Fragments + select_merge(unquote(q), unquote(binding), unquote(selected_as_map)) end end + + defp select_as_each({:%{}, ctx, keyword_list}) do + { + :%{}, + ctx, + Enum.map(keyword_list, fn {key, value} -> + {key, quote(do: selected_as(unquote(value), unquote(key)))} + end) + } + end end diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 7e5e49a887f1..cba2435ffaa8 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -2,6 +2,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do @moduledoc false use Plausible + use Plausible.Stats.SQL.Fragments import Ecto.Query import Plausible.Stats.Imported @@ -190,16 +191,13 @@ defmodule Plausible.Stats.SQL.QueryBuilder do total_visitors: Base.total_visitors_subquery(site, total_query, query.include_imported) } ) - |> select_merge([e], %{ + |> select_merge_as([e], %{ conversion_rate: - selected_as( - fragment( - "if(? > 0, round(? / ? * 100, 1), 0)", - selected_as(:__total_visitors), - selected_as(:visitors), - selected_as(:__total_visitors) - ), - :conversion_rate + fragment( + "if(? > 0, round(? / ? * 100, 1), 0)", + selected_as(:__total_visitors), + selected_as(:visitors), + selected_as(:__total_visitors) ) }) else @@ -228,21 +226,18 @@ defmodule Plausible.Stats.SQL.QueryBuilder do from(e in subquery(q), left_join: c in subquery(build(group_totals_query, site)), - on: ^build_group_by_join(query), - select_merge: %{ - total_visitors: c.visitors, - group_conversion_rate: - selected_as( - fragment( - "if(? > 0, round(? / ? * 100, 1), 0)", - c.visitors, - e.visitors, - c.visitors - ), - :group_conversion_rate - ) - } + on: ^build_group_by_join(query) ) + |> select_merge_as([e, c], %{ + total_visitors: c.visitors, + group_conversion_rate: + fragment( + "if(? > 0, round(? / ? * 100, 1), 0)", + c.visitors, + e.visitors, + c.visitors + ) + }) |> select_join_fields(query, query.dimensions, e) |> select_join_fields(query, List.delete(query.metrics, :group_conversion_rate), e) else diff --git a/test/plausible/stats/sql/fragments_test.exs b/test/plausible/stats/sql/fragments_test.exs new file mode 100644 index 000000000000..0932d7e59894 --- /dev/null +++ b/test/plausible/stats/sql/fragments_test.exs @@ -0,0 +1,10 @@ +defmodule Plausible.Stats.SQL.FragmentsTest do + use ExUnit.Case, async: true + use Plausible.Stats.SQL.Fragments + + defmacro expand_macro_once(ast) do + ast |> Macro.expand_once(__ENV__) |> Macro.to_string() + end + + doctest Plausible.Stats.SQL.Fragments +end From 57fee068a320c13dfbfe8149cb938884072dee69 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 06:58:17 +0300 Subject: [PATCH 03/42] Simplify select_join_fields --- lib/plausible/stats/sql/query_builder.ex | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index cba2435ffaa8..a3dbb37b4c33 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -158,17 +158,13 @@ defmodule Plausible.Stats.SQL.QueryBuilder do defmacrop select_join_fields(q, query, list, table_name) do quote do Enum.reduce(unquote(list), unquote(q), fn metric_or_dimension, q -> - select_merge( + key = shortname(unquote(query), metric_or_dimension) + + select_merge_as( q, - ^%{ - shortname(unquote(query), metric_or_dimension) => - dynamic( - [e, s], - selected_as( - field(unquote(table_name), ^shortname(unquote(query), metric_or_dimension)), - ^shortname(unquote(query), metric_or_dimension) - ) - ) + [e, s], + %{ + ^key => field(unquote(table_name), ^key) } ) end) From 34a6025d620a6f342827084fec9482e14e226244 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 07:00:35 +0300 Subject: [PATCH 04/42] Remove a needless dynamic --- lib/plausible/stats/sql/query_builder.ex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index a3dbb37b4c33..2b94f797d5ed 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -148,9 +148,9 @@ defmodule Plausible.Stats.SQL.QueryBuilder do order_by( q, [t], - ^{ - order_direction, - dynamic([], selected_as(^shortname(query, metric_or_dimension))) + { + ^order_direction, + selected_as(^shortname(query, metric_or_dimension)) } ) end From 588a5cc0bbcb7d23fb4425a6a433efede23fa5dc Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 08:45:15 +0300 Subject: [PATCH 05/42] wrap_select_columns macro --- extra/lib/plausible/stats/goal/revenue.ex | 20 --- lib/plausible/stats/base.ex | 182 ++++++++-------------- lib/plausible/stats/sql/expression.ex | 142 ++++++++--------- lib/plausible/stats/sql/fragments.ex | 35 ++++- lib/plausible/stats/sql/query_builder.ex | 2 +- 5 files changed, 156 insertions(+), 225 deletions(-) diff --git a/extra/lib/plausible/stats/goal/revenue.ex b/extra/lib/plausible/stats/goal/revenue.ex index 7443a1758d37..1ba564e3ff34 100644 --- a/extra/lib/plausible/stats/goal/revenue.ex +++ b/extra/lib/plausible/stats/goal/revenue.ex @@ -12,26 +12,6 @@ defmodule Plausible.Stats.Goal.Revenue do @revenue_metrics end - def total_revenue_query() do - dynamic( - [e], - selected_as( - fragment("toDecimal64(sum(?) * any(_sample_factor), 3)", e.revenue_reporting_amount), - :total_revenue - ) - ) - end - - def average_revenue_query() do - dynamic( - [e], - selected_as( - fragment("toDecimal64(avg(?) * any(_sample_factor), 3)", e.revenue_reporting_amount), - :average_revenue - ) - ) - end - @spec get_revenue_tracking_currency(Plausible.Site.t(), Plausible.Stats.Query.t(), [atom()]) :: {atom() | nil, [atom()]} @doc """ diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index f80d6054cb1d..1a8930462372 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -58,65 +58,57 @@ defmodule Plausible.Stats.Base do end defp select_event_metric(:pageviews) do - %{ + wrap_select_columns([e], %{ pageviews: - dynamic( - [e], - selected_as( - fragment("toUInt64(round(countIf(? = 'pageview') * any(_sample_factor)))", e.name), - :pageviews - ) - ) - } + fragment("toUInt64(round(countIf(? = 'pageview') * any(_sample_factor)))", e.name) + }) end defp select_event_metric(:events) do - %{ - events: - dynamic( - [], - selected_as(fragment("toUInt64(round(count(*) * any(_sample_factor)))"), :events) - ) - } + wrap_select_columns([], %{ + events: fragment("toUInt64(round(count(*) * any(_sample_factor)))") + }) end defp select_event_metric(:visitors) do - %{ - visitors: dynamic([e], selected_as(fragment(@uniq_users_expression, e.user_id), :visitors)) - } + wrap_select_columns([e], %{ + visitors: fragment(@uniq_users_expression, e.user_id) + }) end defp select_event_metric(:visits) do - %{ - visits: - dynamic( - [e], - selected_as( - fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.session_id), - :visits - ) - ) - } + wrap_select_columns([e], %{ + visits: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.session_id) + }) end on_ee do defp select_event_metric(:total_revenue) do - %{total_revenue: Plausible.Stats.Goal.Revenue.total_revenue_query()} + wrap_select_columns( + [e], + %{ + total_revenue: + fragment("toDecimal64(sum(?) * any(_sample_factor), 3)", e.revenue_reporting_amount) + } + ) end defp select_event_metric(:average_revenue) do - %{average_revenue: Plausible.Stats.Goal.Revenue.average_revenue_query()} + wrap_select_columns( + [e], + %{ + average_revenue: + fragment("toDecimal64(avg(?) * any(_sample_factor), 3)", e.revenue_reporting_amount) + } + ) end end defp select_event_metric(:sample_percent) do - %{ + wrap_select_columns([], %{ sample_percent: - dynamic( - [], - fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") - ) - } + fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") + }) end defp select_event_metric(:percentage), do: %{} @@ -137,116 +129,68 @@ defmodule Plausible.Stats.Base do event_page_filter = Query.get_filter(query, "event:page") condition = Filters.WhereBuilder.build_condition(:entry_page, event_page_filter) - %{ + wrap_select_columns([], %{ bounce_rate: - dynamic( - [], - selected_as( - fragment( - "toUInt32(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0))", - ^condition, - ^condition - ), - :bounce_rate - ) + fragment( + "toUInt32(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0))", + ^condition, + ^condition ), - __internal_visits: dynamic([], fragment("toUInt32(sum(sign))")) - } + __internal_visits: fragment("toUInt32(sum(sign))") + }) end defp select_session_metric(:visits, _query) do - %{ - visits: - dynamic( - [s], - selected_as( - fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign), - :visits - ) - ) - } + wrap_select_columns([s], %{ + visits: fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign) + }) end defp select_session_metric(:pageviews, _query) do - %{ + wrap_select_columns([s], %{ pageviews: - dynamic( - [s], - selected_as( - fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.pageviews), - :pageviews - ) - ) - } + fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.pageviews) + }) end defp select_session_metric(:events, _query) do - %{ - events: - dynamic( - [s], - selected_as( - fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.events), - :events - ) - ) - } + wrap_select_columns([s], %{ + events: fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.events) + }) end defp select_session_metric(:visitors, _query) do - %{ - visitors: - dynamic( - [s], - selected_as( - fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id), - :visitors - ) - ) - } + wrap_select_columns([s], %{ + visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id) + }) end defp select_session_metric(:visit_duration, _query) do - %{ + wrap_select_columns([], %{ visit_duration: - dynamic( - [], - selected_as( - fragment("toUInt32(ifNotFinite(round(sum(duration * sign) / sum(sign)), 0))"), - :visit_duration - ) - ), - __internal_visits: dynamic([], fragment("toUInt32(sum(sign))")) - } + fragment("toUInt32(ifNotFinite(round(sum(duration * sign) / sum(sign)), 0))"), + __internal_visits: fragment("toUInt32(sum(sign))") + }) end defp select_session_metric(:views_per_visit, _query) do - %{ + wrap_select_columns([s], %{ views_per_visit: - dynamic( - [s], - selected_as( - fragment( - "ifNotFinite(round(sum(? * ?) / sum(?), 2), 0)", - s.sign, - s.pageviews, - s.sign - ), - :views_per_visit - ) + fragment( + "ifNotFinite(round(sum(? * ?) / sum(?), 2), 0)", + s.sign, + s.pageviews, + s.sign ), - __internal_visits: dynamic([], fragment("toUInt32(sum(sign))")) - } + __internal_visits: fragment("toUInt32(sum(sign))") + }) end defp select_session_metric(:sample_percent, _query) do - %{ + wrap_select_columns([], %{ sample_percent: - dynamic( - [], - fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") - ) - } + fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") + }) end defp select_session_metric(:percentage, _query), do: %{} diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index 252c9c8cf862..867d6d6b58d0 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -11,121 +11,103 @@ defmodule Plausible.Stats.SQL.Expression do @no_ref "Direct / None" @not_set "(not set)" - defmacrop field_or_blank_value(expr, empty_value, select_alias) do + defmacrop field_or_blank_value(key, expr, empty_value) do quote do - dynamic( - [t], - selected_as( - fragment("if(empty(?), ?, ?)", unquote(expr), unquote(empty_value), unquote(expr)), - ^unquote(select_alias) - ) - ) + wrap_select_columns([t], %{ + unquote(key) => + fragment("if(empty(?), ?, ?)", unquote(expr), unquote(empty_value), unquote(expr)) + }) end end - def dimension("time:hour", query, select_alias) do - dynamic( - [t], - selected_as( - fragment("toStartOfHour(toTimeZone(?, ?))", t.timestamp, ^query.timezone), - ^select_alias - ) - ) + def dimension(key, "time:hour", query) do + wrap_select_columns([t], %{ + key => fragment("toStartOfHour(toTimeZone(?, ?))", t.timestamp, ^query.timezone) + }) end - def dimension("time:day", query, select_alias) do - dynamic( - [t], - selected_as( - fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone), - ^select_alias - ) - ) + def dimension(key, "time:day", query) do + wrap_select_columns([t], %{ + key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone) + }) end - def dimension("time:month", query, select_alias) do - dynamic( - [t], - selected_as( - fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone), - ^select_alias - ) - ) + def dimension(key, "time:month", query) do + wrap_select_columns([t], %{ + key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone) + }) end - def dimension("event:name", _query, select_alias), - do: dynamic([t], selected_as(t.name, ^select_alias)) + def dimension(key, "event:name", _query), + do: wrap_select_columns([t], %{key => t.name}) - def dimension("event:page", _query, select_alias), - do: dynamic([t], selected_as(t.pathname, ^select_alias)) + def dimension(key, "event:page", _query), + do: wrap_select_columns([t], %{key => t.pathname}) - def dimension("event:hostname", _query, select_alias), - do: dynamic([t], selected_as(t.hostname, ^select_alias)) + def dimension(key, "event:hostname", _query), + do: wrap_select_columns([t], %{key => t.hostname}) - def dimension("event:props:" <> property_name, _query, select_alias) do - dynamic( - [t], - selected_as( + def dimension(key, "event:props:" <> property_name, _query) do + wrap_select_columns([t], %{ + key => fragment( "if(not empty(?), ?, '(none)')", get_by_key(t, :meta, ^property_name), get_by_key(t, :meta, ^property_name) - ), - ^select_alias - ) - ) + ) + }) end - def dimension("visit:entry_page", _query, select_alias), - do: dynamic([t], selected_as(t.entry_page, ^select_alias)) + def dimension(key, "visit:entry_page", _query), + do: wrap_select_columns([t], %{key => t.entry_page}) - def dimension("visit:exit_page", _query, select_alias), - do: dynamic([t], selected_as(t.exit_page, ^select_alias)) + def dimension(key, "visit:exit_page", _query), + do: wrap_select_columns([t], %{key => t.exit_page}) - def dimension("visit:utm_medium", _query, select_alias), - do: field_or_blank_value(t.utm_medium, @not_set, select_alias) + def dimension(key, "visit:utm_medium", _query), + do: field_or_blank_value(key, t.utm_medium, @not_set) - def dimension("visit:utm_source", _query, select_alias), - do: field_or_blank_value(t.utm_source, @not_set, select_alias) + def dimension(key, "visit:utm_source", _query), + do: field_or_blank_value(key, t.utm_source, @not_set) - def dimension("visit:utm_campaign", _query, select_alias), - do: field_or_blank_value(t.utm_campaign, @not_set, select_alias) + def dimension(key, "visit:utm_campaign", _query), + do: field_or_blank_value(key, t.utm_campaign, @not_set) - def dimension("visit:utm_content", _query, select_alias), - do: field_or_blank_value(t.utm_content, @not_set, select_alias) + def dimension(key, "visit:utm_content", _query), + do: field_or_blank_value(key, t.utm_content, @not_set) - def dimension("visit:utm_term", _query, select_alias), - do: field_or_blank_value(t.utm_term, @not_set, select_alias) + def dimension(key, "visit:utm_term", _query), + do: field_or_blank_value(key, t.utm_term, @not_set) - def dimension("visit:source", _query, select_alias), - do: field_or_blank_value(t.source, @no_ref, select_alias) + def dimension(key, "visit:source", _query), + do: field_or_blank_value(key, t.source, @no_ref) - def dimension("visit:referrer", _query, select_alias), - do: field_or_blank_value(t.referrer, @no_ref, select_alias) + def dimension(key, "visit:referrer", _query), + do: field_or_blank_value(key, t.referrer, @no_ref) - def dimension("visit:device", _query, select_alias), - do: field_or_blank_value(t.device, @not_set, select_alias) + def dimension(key, "visit:device", _query), + do: field_or_blank_value(key, t.device, @not_set) - def dimension("visit:os", _query, select_alias), - do: field_or_blank_value(t.os, @not_set, select_alias) + def dimension(key, "visit:os", _query), + do: field_or_blank_value(key, t.os, @not_set) - def dimension("visit:os_version", _query, select_alias), - do: field_or_blank_value(t.os_version, @not_set, select_alias) + def dimension(key, "visit:os_version", _query), + do: field_or_blank_value(key, t.os_version, @not_set) - def dimension("visit:browser", _query, select_alias), - do: field_or_blank_value(t.browser, @not_set, select_alias) + def dimension(key, "visit:browser", _query), + do: field_or_blank_value(key, t.browser, @not_set) - def dimension("visit:browser_version", _query, select_alias), - do: field_or_blank_value(t.browser_version, @not_set, select_alias) + def dimension(key, "visit:browser_version", _query), + do: field_or_blank_value(key, t.browser_version, @not_set) - def dimension("visit:country", _query, select_alias), - do: dynamic([t], selected_as(t.country, ^select_alias)) + def dimension(key, "visit:country", _query), + do: wrap_select_columns([t], %{key => t.country}) - def dimension("visit:region", _query, select_alias), - do: dynamic([t], selected_as(t.region, ^select_alias)) + def dimension(key, "visit:region", _query), + do: wrap_select_columns([t], %{key => t.region}) - def dimension("visit:city", _query, select_alias), - do: dynamic([t], selected_as(t.city, ^select_alias)) + def dimension(key, "visit:city", _query), + do: wrap_select_columns([t], %{key => t.city}) defmacro event_goal_join(events, page_regexes) do quote do diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index 12bbf1280e4e..c06a18523e31 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -160,20 +160,45 @@ defmodule Plausible.Stats.SQL.Fragments do iex> select_merge_as(q, [t], %{ foo: t.column }) |> expand_macro_once "select_merge(q, [t], %{foo: selected_as(t.column, :foo)})" """ - defmacro select_merge_as(q, binding, values_map) do - selected_as_map = select_as_each(values_map) + defmacro select_merge_as(q, binding, map_literal) do + selected_as_map = + update_literal_map_values(map_literal, fn {key, expr} -> + quote(do: selected_as(unquote(expr), unquote(key))) + end) quote do select_merge(unquote(q), unquote(binding), unquote(selected_as_map)) end end - defp select_as_each({:%{}, ctx, keyword_list}) do + @doc """ + Convenience Ecto macro for wrapping a map passed to select_merge such that each + expression gets wrapped in dynamic and set as selected_as. + + ### Examples + + iex> wrap_select_columns([t], %{ foo: t.column }) |> expand_macro_once + "%{foo: dynamic([t], selected_as(t.column, :foo))}" + """ + defmacro wrap_select_columns(binding, map_literal) do + update_literal_map_values(map_literal, fn {key, expr} -> + key_expr = + if Macro.quoted_literal?(key) do + key + else + quote(do: ^unquote(key)) + end + + quote(do: dynamic(unquote(binding), selected_as(unquote(expr), unquote(key_expr)))) + end) + end + + defp update_literal_map_values({:%{}, ctx, keyword_list}, mapper_fn) do { :%{}, ctx, - Enum.map(keyword_list, fn {key, value} -> - {key, quote(do: selected_as(unquote(value), unquote(key)))} + Enum.map(keyword_list, fn {key, expr} -> + {key, mapper_fn.({key, expr})} end) } end diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 2b94f797d5ed..f1cdb4394396 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -136,7 +136,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do key = shortname(query, dimension) q - |> select_merge(^%{key => Expression.dimension(dimension, query, key)}) + |> select_merge(^Expression.dimension(key, dimension, query)) |> group_by([], selected_as(^key)) end From a91501a177cf3a89ec1c7cf2e0876543ebb8d07f Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 08:53:18 +0300 Subject: [PATCH 06/42] Move metrics from base.ex to expression.ex --- lib/plausible/stats/base.ex | 146 ++------------------------ lib/plausible/stats/sql/expression.ex | 144 ++++++++++++++++++++++++- 2 files changed, 148 insertions(+), 142 deletions(-) diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 1a8930462372..5a3d34547a66 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -3,12 +3,10 @@ defmodule Plausible.Stats.Base do use Plausible use Plausible.Stats.SQL.Fragments - alias Plausible.Stats.{Query, Filters, TableDecider} + alias Plausible.Stats.{Query, Filters, TableDecider, SQL} alias Plausible.Timezones import Ecto.Query - @uniq_users_expression "toUInt64(round(uniq(?) * any(_sample_factor)))" - def base_event_query(site, query) do events_q = query_events(site, query) @@ -53,150 +51,16 @@ defmodule Plausible.Stats.Base do def select_event_metrics(metrics) do metrics - |> Enum.map(&select_event_metric/1) + |> Enum.map(&SQL.Expression.event_metric/1) |> Enum.reduce(%{}, &Map.merge/2) end - defp select_event_metric(:pageviews) do - wrap_select_columns([e], %{ - pageviews: - fragment("toUInt64(round(countIf(? = 'pageview') * any(_sample_factor)))", e.name) - }) - end - - defp select_event_metric(:events) do - wrap_select_columns([], %{ - events: fragment("toUInt64(round(count(*) * any(_sample_factor)))") - }) - end - - defp select_event_metric(:visitors) do - wrap_select_columns([e], %{ - visitors: fragment(@uniq_users_expression, e.user_id) - }) - end - - defp select_event_metric(:visits) do - wrap_select_columns([e], %{ - visits: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.session_id) - }) - end - - on_ee do - defp select_event_metric(:total_revenue) do - wrap_select_columns( - [e], - %{ - total_revenue: - fragment("toDecimal64(sum(?) * any(_sample_factor), 3)", e.revenue_reporting_amount) - } - ) - end - - defp select_event_metric(:average_revenue) do - wrap_select_columns( - [e], - %{ - average_revenue: - fragment("toDecimal64(avg(?) * any(_sample_factor), 3)", e.revenue_reporting_amount) - } - ) - end - end - - defp select_event_metric(:sample_percent) do - wrap_select_columns([], %{ - sample_percent: - fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") - }) - end - - defp select_event_metric(:percentage), do: %{} - defp select_event_metric(:conversion_rate), do: %{} - defp select_event_metric(:group_conversion_rate), do: %{} - defp select_event_metric(:total_visitors), do: %{} - - defp select_event_metric(unknown), do: raise("Unknown metric: #{unknown}") - def select_session_metrics(metrics, query) do metrics - |> Enum.map(&select_session_metric(&1, query)) + |> Enum.map(&SQL.Expression.session_metric(&1, query)) |> Enum.reduce(%{}, &Map.merge/2) end - defp select_session_metric(:bounce_rate, query) do - # :TRICKY: If page is passed to query, we only count bounce rate where users _entered_ at page. - event_page_filter = Query.get_filter(query, "event:page") - condition = Filters.WhereBuilder.build_condition(:entry_page, event_page_filter) - - wrap_select_columns([], %{ - bounce_rate: - fragment( - "toUInt32(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0))", - ^condition, - ^condition - ), - __internal_visits: fragment("toUInt32(sum(sign))") - }) - end - - defp select_session_metric(:visits, _query) do - wrap_select_columns([s], %{ - visits: fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign) - }) - end - - defp select_session_metric(:pageviews, _query) do - wrap_select_columns([s], %{ - pageviews: - fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.pageviews) - }) - end - - defp select_session_metric(:events, _query) do - wrap_select_columns([s], %{ - events: fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.events) - }) - end - - defp select_session_metric(:visitors, _query) do - wrap_select_columns([s], %{ - visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id) - }) - end - - defp select_session_metric(:visit_duration, _query) do - wrap_select_columns([], %{ - visit_duration: - fragment("toUInt32(ifNotFinite(round(sum(duration * sign) / sum(sign)), 0))"), - __internal_visits: fragment("toUInt32(sum(sign))") - }) - end - - defp select_session_metric(:views_per_visit, _query) do - wrap_select_columns([s], %{ - views_per_visit: - fragment( - "ifNotFinite(round(sum(? * ?) / sum(?), 2), 0)", - s.sign, - s.pageviews, - s.sign - ), - __internal_visits: fragment("toUInt32(sum(sign))") - }) - end - - defp select_session_metric(:sample_percent, _query) do - wrap_select_columns([], %{ - sample_percent: - fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") - }) - end - - defp select_session_metric(:percentage, _query), do: %{} - defp select_session_metric(:conversion_rate, _query), do: %{} - defp select_session_metric(:group_conversion_rate, _query), do: %{} - def filter_converted_sessions(db_query, site, query) do if Query.has_event_filters?(query) do converted_sessions = @@ -278,7 +142,9 @@ defmodule Plausible.Stats.Base do defp total_visitors(site, query) do base_event_query(site, query) - |> select([e], total_visitors: fragment(@uniq_users_expression, e.user_id)) + |> select([e], + total_visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id) + ) end # `total_visitors_subquery` returns a subquery which selects `total_visitors` - diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index 867d6d6b58d0..34b8f6e63710 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -1,12 +1,18 @@ defmodule Plausible.Stats.SQL.Expression do @moduledoc """ This module is responsible for generating SQL/Ecto expressions - for dimensions used in query select, group_by and order_by. + for dimensions and metrics used in query SELECT statement. + + Each dimension and metric is tagged with with selected_as for easier + usage down the line. """ + use Plausible + use Plausible.Stats.SQL.Fragments + import Ecto.Query - use Plausible.Stats.SQL.Fragments + alias Plausible.Stats.{Query, Filters} @no_ref "Direct / None" @not_set "(not set)" @@ -109,6 +115,140 @@ defmodule Plausible.Stats.SQL.Expression do def dimension(key, "visit:city", _query), do: wrap_select_columns([t], %{key => t.city}) + def event_metric(:pageviews) do + wrap_select_columns([e], %{ + pageviews: + fragment("toUInt64(round(countIf(? = 'pageview') * any(_sample_factor)))", e.name) + }) + end + + def event_metric(:events) do + wrap_select_columns([], %{ + events: fragment("toUInt64(round(count(*) * any(_sample_factor)))") + }) + end + + def event_metric(:visitors) do + wrap_select_columns([e], %{ + visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id) + }) + end + + def event_metric(:visits) do + wrap_select_columns([e], %{ + visits: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.session_id) + }) + end + + on_ee do + def event_metric(:total_revenue) do + wrap_select_columns( + [e], + %{ + total_revenue: + fragment("toDecimal64(sum(?) * any(_sample_factor), 3)", e.revenue_reporting_amount) + } + ) + end + + def event_metric(:average_revenue) do + wrap_select_columns( + [e], + %{ + average_revenue: + fragment("toDecimal64(avg(?) * any(_sample_factor), 3)", e.revenue_reporting_amount) + } + ) + end + end + + def event_metric(:sample_percent) do + wrap_select_columns([], %{ + sample_percent: + fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") + }) + end + + def event_metric(:percentage), do: %{} + def event_metric(:conversion_rate), do: %{} + def event_metric(:group_conversion_rate), do: %{} + def event_metric(:total_visitors), do: %{} + + def event_metric(unknown), do: raise("Unknown metric: #{unknown}") + + def session_metric(:bounce_rate, query) do + # :TRICKY: If page is passed to query, we only count bounce rate where users _entered_ at page. + event_page_filter = Query.get_filter(query, "event:page") + condition = Filters.WhereBuilder.build_condition(:entry_page, event_page_filter) + + wrap_select_columns([], %{ + bounce_rate: + fragment( + "toUInt32(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0))", + ^condition, + ^condition + ), + __internal_visits: fragment("toUInt32(sum(sign))") + }) + end + + def session_metric(:visits, _query) do + wrap_select_columns([s], %{ + visits: fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign) + }) + end + + def session_metric(:pageviews, _query) do + wrap_select_columns([s], %{ + pageviews: + fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.pageviews) + }) + end + + def session_metric(:events, _query) do + wrap_select_columns([s], %{ + events: fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.events) + }) + end + + def session_metric(:visitors, _query) do + wrap_select_columns([s], %{ + visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id) + }) + end + + def session_metric(:visit_duration, _query) do + wrap_select_columns([], %{ + visit_duration: + fragment("toUInt32(ifNotFinite(round(sum(duration * sign) / sum(sign)), 0))"), + __internal_visits: fragment("toUInt32(sum(sign))") + }) + end + + def session_metric(:views_per_visit, _query) do + wrap_select_columns([s], %{ + views_per_visit: + fragment( + "ifNotFinite(round(sum(? * ?) / sum(?), 2), 0)", + s.sign, + s.pageviews, + s.sign + ), + __internal_visits: fragment("toUInt32(sum(sign))") + }) + end + + def session_metric(:sample_percent, _query) do + wrap_select_columns([], %{ + sample_percent: + fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") + }) + end + + def session_metric(:percentage, _query), do: %{} + def session_metric(:conversion_rate, _query), do: %{} + def session_metric(:group_conversion_rate, _query), do: %{} + defmacro event_goal_join(events, page_regexes) do quote do fragment( From 3dbdad1da92e43fd24ca777ac2d9872d4a7edc87 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 08:57:46 +0300 Subject: [PATCH 07/42] Move WhereBuilder under Plausible.Stats.SQL --- lib/plausible/stats/aggregate.ex | 5 ++--- lib/plausible/stats/base.ex | 6 +++--- lib/plausible/stats/imported/base.ex | 7 +++---- lib/plausible/stats/sql/expression.ex | 4 ++-- lib/plausible/stats/sql/query_builder.ex | 8 ++++---- lib/plausible/stats/{filters => sql}/where_builder.ex | 2 +- 6 files changed, 15 insertions(+), 17 deletions(-) rename lib/plausible/stats/{filters => sql}/where_builder.ex (99%) diff --git a/lib/plausible/stats/aggregate.ex b/lib/plausible/stats/aggregate.ex index 9a2e65bab607..8c5cdc4f7bbb 100644 --- a/lib/plausible/stats/aggregate.ex +++ b/lib/plausible/stats/aggregate.ex @@ -3,7 +3,7 @@ defmodule Plausible.Stats.Aggregate do use Plausible import Plausible.Stats.Base import Ecto.Query - alias Plausible.Stats.{Query, Util} + alias Plausible.Stats.{Query, Util, SQL} def aggregate(site, query, metrics) do {currency, metrics} = @@ -64,8 +64,7 @@ defmodule Plausible.Stats.Aggregate do timed_page_transitions_q = from e in Ecto.Query.subquery(windowed_pages_q), group_by: [e.pathname, e.next_pathname, e.session_id], - where: - ^Plausible.Stats.Filters.WhereBuilder.build_condition(:pathname, event_page_filter), + where: ^SQL.WhereBuilder.build_condition(:pathname, event_page_filter), where: e.next_timestamp != 0, select: %{ pathname: e.pathname, diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 5a3d34547a66..b35ef413dec8 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -3,7 +3,7 @@ defmodule Plausible.Stats.Base do use Plausible use Plausible.Stats.SQL.Fragments - alias Plausible.Stats.{Query, Filters, TableDecider, SQL} + alias Plausible.Stats.{Query, TableDecider, SQL} alias Plausible.Timezones import Ecto.Query @@ -30,7 +30,7 @@ defmodule Plausible.Stats.Base do end def query_events(site, query) do - q = from(e in "events_v2", where: ^Filters.WhereBuilder.build(:events, site, query)) + q = from(e in "events_v2", where: ^SQL.WhereBuilder.build(:events, site, query)) on_ee do q = Plausible.Stats.Sampling.add_query_hint(q, query) @@ -40,7 +40,7 @@ defmodule Plausible.Stats.Base do end def query_sessions(site, query) do - q = from(s in "sessions_v2", where: ^Filters.WhereBuilder.build(:sessions, site, query)) + q = from(s in "sessions_v2", where: ^SQL.WhereBuilder.build(:sessions, site, query)) on_ee do q = Plausible.Stats.Sampling.add_query_hint(q, query) diff --git a/lib/plausible/stats/imported/base.ex b/lib/plausible/stats/imported/base.ex index 5b774d9ea960..72a888d186d1 100644 --- a/lib/plausible/stats/imported/base.ex +++ b/lib/plausible/stats/imported/base.ex @@ -6,8 +6,7 @@ defmodule Plausible.Stats.Imported.Base do import Ecto.Query alias Plausible.Imported - alias Plausible.Stats.Filters - alias Plausible.Stats.Query + alias Plausible.Stats.{Filters, Query, SQL} @property_to_table_mappings %{ "visit:source" => "imported_sources", @@ -213,9 +212,9 @@ defmodule Plausible.Stats.Imported.Base do defp apply_filter(q, %Query{filters: filters}) do Enum.reduce(filters, q, fn [_, filter_key | _] = filter, q -> - db_field = Plausible.Stats.Filters.without_prefix(filter_key) + db_field = Filters.without_prefix(filter_key) mapped_db_field = Map.get(@db_field_mappings, db_field, db_field) - condition = Filters.WhereBuilder.build_condition(mapped_db_field, filter) + condition = SQL.WhereBuilder.build_condition(mapped_db_field, filter) where(q, ^condition) end) diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index 34b8f6e63710..a7d9a44c339a 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -12,7 +12,7 @@ defmodule Plausible.Stats.SQL.Expression do import Ecto.Query - alias Plausible.Stats.{Query, Filters} + alias Plausible.Stats.{Query, SQL} @no_ref "Direct / None" @not_set "(not set)" @@ -179,7 +179,7 @@ defmodule Plausible.Stats.SQL.Expression do def session_metric(:bounce_rate, query) do # :TRICKY: If page is passed to query, we only count bounce rate where users _entered_ at page. event_page_filter = Query.get_filter(query, "event:page") - condition = Filters.WhereBuilder.build_condition(:entry_page, event_page_filter) + condition = SQL.WhereBuilder.build_condition(:entry_page, event_page_filter) wrap_select_columns([], %{ bounce_rate: diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index f1cdb4394396..92910223f97a 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -8,7 +8,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do import Plausible.Stats.Imported import Plausible.Stats.Util - alias Plausible.Stats.{Base, Query, QueryOptimizer, TableDecider, Filters} + alias Plausible.Stats.{Base, Filters, Query, QueryOptimizer, TableDecider, SQL} alias Plausible.Stats.SQL.Expression require Plausible.Stats.SQL.Expression @@ -31,7 +31,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do q = from( e in "events_v2", - where: ^Filters.WhereBuilder.build(:events, site, events_query), + where: ^SQL.WhereBuilder.build(:events, site, events_query), select: ^Base.select_event_metrics(events_query.metrics) ) @@ -74,7 +74,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do q = from( e in "sessions_v2", - where: ^Filters.WhereBuilder.build(:sessions, site, sessions_query), + where: ^SQL.WhereBuilder.build(:sessions, site, sessions_query), select: ^Base.select_session_metrics(sessions_query.metrics, sessions_query) ) @@ -95,7 +95,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do if Query.has_event_filters?(query) do events_q = from(e in "events_v2", - where: ^Filters.WhereBuilder.build(:events, site, query), + where: ^SQL.WhereBuilder.build(:events, site, query), select: %{ session_id: fragment("DISTINCT ?", e.session_id), _sample_factor: fragment("_sample_factor") diff --git a/lib/plausible/stats/filters/where_builder.ex b/lib/plausible/stats/sql/where_builder.ex similarity index 99% rename from lib/plausible/stats/filters/where_builder.ex rename to lib/plausible/stats/sql/where_builder.ex index 302cd92a36e8..71bcdad49519 100644 --- a/lib/plausible/stats/filters/where_builder.ex +++ b/lib/plausible/stats/sql/where_builder.ex @@ -1,4 +1,4 @@ -defmodule Plausible.Stats.Filters.WhereBuilder do +defmodule Plausible.Stats.SQL.WhereBuilder do @moduledoc """ A module for building am ecto where clause of a query out of a query. """ From dcfa7a65b8131cedc714faabcf91e53c8ac61fac Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 08:57:56 +0300 Subject: [PATCH 08/42] Moduledoc --- lib/plausible/stats/sql/fragments.ex | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index c06a18523e31..f69713e2e682 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -1,4 +1,8 @@ defmodule Plausible.Stats.SQL.Fragments do + @moduledoc """ + Various macros and common SQL fragments used in Stats code. + """ + defmacro __using__(_) do quote do import Plausible.Stats.SQL.Fragments From 13369405c633368321b4dfadeb4293fcdc31716a Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 15:14:35 +0300 Subject: [PATCH 09/42] Improved macros --- lib/plausible/stats/imported/imported.ex | 32 +++++++------- lib/plausible/stats/sql/expression.ex | 56 ++++++++++++------------ lib/plausible/stats/sql/fragments.ex | 49 ++++++++++----------- lib/plausible/stats/sql/query_builder.ex | 2 +- 4 files changed, 67 insertions(+), 72 deletions(-) diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index be96a36f51fc..4fcdd04a66de 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -560,7 +560,7 @@ defmodule Plausible.Stats.Imported do q |> group_by([i], field(i, ^dim)) |> select_merge_as([i], %{ - ^key => fragment("if(empty(?), ?, ?)", field(i, ^dim), @no_ref, field(i, ^dim)) + key => fragment("if(empty(?), ?, ?)", field(i, ^dim), @no_ref, field(i, ^dim)) }) end @@ -569,41 +569,41 @@ defmodule Plausible.Stats.Imported do q |> group_by([i], field(i, ^dim)) |> where([i], fragment("not empty(?)", field(i, ^dim))) - |> select_merge_as([i], %{^key => field(i, ^dim)}) + |> select_merge_as([i], %{key => field(i, ^dim)}) end defp group_imported_by(q, :page, key) do q |> group_by([i], i.page) - |> select_merge_as([i], %{^key => i.page, time_on_page: sum(i.time_on_page)}) + |> select_merge_as([i], %{key => i.page, time_on_page: sum(i.time_on_page)}) end defp group_imported_by(q, :country, key) do q |> group_by([i], i.country) |> where([i], i.country != "ZZ") - |> select_merge_as([i], %{^key => i.country}) + |> select_merge_as([i], %{key => i.country}) end defp group_imported_by(q, :region, key) do q |> group_by([i], i.region) |> where([i], i.region != "") - |> select_merge_as([i], %{^key => i.region}) + |> select_merge_as([i], %{key => i.region}) end defp group_imported_by(q, :city, key) do q |> group_by([i], i.city) |> where([i], i.city != 0 and not is_nil(i.city)) - |> select_merge_as([i], %{^key => i.city}) + |> select_merge_as([i], %{key => i.city}) end defp group_imported_by(q, dim, key) when dim in [:device, :browser] do q |> group_by([i], field(i, ^dim)) |> select_merge_as([i], %{ - ^key => fragment("if(empty(?), ?, ?)", field(i, ^dim), @not_set, field(i, ^dim)) + key => fragment("if(empty(?), ?, ?)", field(i, ^dim), @not_set, field(i, ^dim)) }) end @@ -611,7 +611,7 @@ defmodule Plausible.Stats.Imported do q |> group_by([i], [i.browser_version]) |> select_merge_as([i], %{ - ^key => fragment("if(empty(?), ?, ?)", i.browser_version, @not_set, i.browser_version) + key => fragment("if(empty(?), ?, ?)", i.browser_version, @not_set, i.browser_version) }) end @@ -619,7 +619,7 @@ defmodule Plausible.Stats.Imported do q |> group_by([i], i.operating_system) |> select_merge_as([i], %{ - ^key => fragment("if(empty(?), ?, ?)", i.operating_system, @not_set, i.operating_system) + key => fragment("if(empty(?), ?, ?)", i.operating_system, @not_set, i.operating_system) }) end @@ -627,7 +627,7 @@ defmodule Plausible.Stats.Imported do q |> group_by([i], [i.operating_system_version]) |> select_merge_as([i], %{ - ^key => + key => fragment( "if(empty(?), ?, ?)", i.operating_system_version, @@ -640,20 +640,20 @@ defmodule Plausible.Stats.Imported do defp group_imported_by(q, dim, key) when dim in [:entry_page, :exit_page] do q |> group_by([i], field(i, ^dim)) - |> select_merge([i], %{^key => selected_as(field(i, ^dim), ^key)}) + |> select_merge_as([i], %{key => field(i, ^dim)}) end defp group_imported_by(q, :name, key) do q |> group_by([i], i.name) - |> select_merge_as([i], %{^key => i.name}) + |> select_merge_as([i], %{key => i.name}) end defp group_imported_by(q, :url, key) do q |> group_by([i], i.link_url) |> select_merge_as([i], %{ - ^key => fragment("if(not empty(?), ?, ?)", i.link_url, i.link_url, @none) + key => fragment("if(not empty(?), ?, ?)", i.link_url, i.link_url, @none) }) end @@ -661,7 +661,7 @@ defmodule Plausible.Stats.Imported do q |> group_by([i], i.path) |> select_merge_as([i], %{ - ^key => fragment("if(not empty(?), ?, ?)", i.path, i.path, @none) + key => fragment("if(not empty(?), ?, ?)", i.path, i.path, @none) }) end @@ -673,13 +673,13 @@ defmodule Plausible.Stats.Imported do defp select_joined_dimension(q, "visit:city", key) do select_merge_as(q, [s, i], %{ - ^key => fragment("greatest(?,?)", field(i, ^key), field(s, ^key)) + key => fragment("greatest(?,?)", field(i, ^key), field(s, ^key)) }) end defp select_joined_dimension(q, _dimension, key) do select_merge_as(q, [s, i], %{ - ^key => fragment("if(empty(?), ?, ?)", field(s, ^key), field(i, ^key), field(s, ^key)) + key => fragment("if(empty(?), ?, ?)", field(s, ^key), field(i, ^key), field(s, ^key)) }) end diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index a7d9a44c339a..b016e13bdb9f 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -19,7 +19,7 @@ defmodule Plausible.Stats.SQL.Expression do defmacrop field_or_blank_value(key, expr, empty_value) do quote do - wrap_select_columns([t], %{ + wrap_expression([t], %{ unquote(key) => fragment("if(empty(?), ?, ?)", unquote(expr), unquote(empty_value), unquote(expr)) }) @@ -27,34 +27,34 @@ defmodule Plausible.Stats.SQL.Expression do end def dimension(key, "time:hour", query) do - wrap_select_columns([t], %{ + wrap_expression([t], %{ key => fragment("toStartOfHour(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end def dimension(key, "time:day", query) do - wrap_select_columns([t], %{ + wrap_expression([t], %{ key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end def dimension(key, "time:month", query) do - wrap_select_columns([t], %{ + wrap_expression([t], %{ key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end def dimension(key, "event:name", _query), - do: wrap_select_columns([t], %{key => t.name}) + do: wrap_expression([t], %{key => t.name}) def dimension(key, "event:page", _query), - do: wrap_select_columns([t], %{key => t.pathname}) + do: wrap_expression([t], %{key => t.pathname}) def dimension(key, "event:hostname", _query), - do: wrap_select_columns([t], %{key => t.hostname}) + do: wrap_expression([t], %{key => t.hostname}) def dimension(key, "event:props:" <> property_name, _query) do - wrap_select_columns([t], %{ + wrap_expression([t], %{ key => fragment( "if(not empty(?), ?, '(none)')", @@ -65,10 +65,10 @@ defmodule Plausible.Stats.SQL.Expression do end def dimension(key, "visit:entry_page", _query), - do: wrap_select_columns([t], %{key => t.entry_page}) + do: wrap_expression([t], %{key => t.entry_page}) def dimension(key, "visit:exit_page", _query), - do: wrap_select_columns([t], %{key => t.exit_page}) + do: wrap_expression([t], %{key => t.exit_page}) def dimension(key, "visit:utm_medium", _query), do: field_or_blank_value(key, t.utm_medium, @not_set) @@ -107,42 +107,42 @@ defmodule Plausible.Stats.SQL.Expression do do: field_or_blank_value(key, t.browser_version, @not_set) def dimension(key, "visit:country", _query), - do: wrap_select_columns([t], %{key => t.country}) + do: wrap_expression([t], %{key => t.country}) def dimension(key, "visit:region", _query), - do: wrap_select_columns([t], %{key => t.region}) + do: wrap_expression([t], %{key => t.region}) def dimension(key, "visit:city", _query), - do: wrap_select_columns([t], %{key => t.city}) + do: wrap_expression([t], %{key => t.city}) def event_metric(:pageviews) do - wrap_select_columns([e], %{ + wrap_expression([e], %{ pageviews: fragment("toUInt64(round(countIf(? = 'pageview') * any(_sample_factor)))", e.name) }) end def event_metric(:events) do - wrap_select_columns([], %{ + wrap_expression([], %{ events: fragment("toUInt64(round(count(*) * any(_sample_factor)))") }) end def event_metric(:visitors) do - wrap_select_columns([e], %{ + wrap_expression([e], %{ visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id) }) end def event_metric(:visits) do - wrap_select_columns([e], %{ + wrap_expression([e], %{ visits: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.session_id) }) end on_ee do def event_metric(:total_revenue) do - wrap_select_columns( + wrap_expression( [e], %{ total_revenue: @@ -152,7 +152,7 @@ defmodule Plausible.Stats.SQL.Expression do end def event_metric(:average_revenue) do - wrap_select_columns( + wrap_expression( [e], %{ average_revenue: @@ -163,7 +163,7 @@ defmodule Plausible.Stats.SQL.Expression do end def event_metric(:sample_percent) do - wrap_select_columns([], %{ + wrap_expression([], %{ sample_percent: fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") }) @@ -181,7 +181,7 @@ defmodule Plausible.Stats.SQL.Expression do event_page_filter = Query.get_filter(query, "event:page") condition = SQL.WhereBuilder.build_condition(:entry_page, event_page_filter) - wrap_select_columns([], %{ + wrap_expression([], %{ bounce_rate: fragment( "toUInt32(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0))", @@ -193,32 +193,32 @@ defmodule Plausible.Stats.SQL.Expression do end def session_metric(:visits, _query) do - wrap_select_columns([s], %{ + wrap_expression([s], %{ visits: fragment("toUInt64(round(sum(?) * any(_sample_factor)))", s.sign) }) end def session_metric(:pageviews, _query) do - wrap_select_columns([s], %{ + wrap_expression([s], %{ pageviews: fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.pageviews) }) end def session_metric(:events, _query) do - wrap_select_columns([s], %{ + wrap_expression([s], %{ events: fragment("toUInt64(round(sum(? * ?) * any(_sample_factor)))", s.sign, s.events) }) end def session_metric(:visitors, _query) do - wrap_select_columns([s], %{ + wrap_expression([s], %{ visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", s.user_id) }) end def session_metric(:visit_duration, _query) do - wrap_select_columns([], %{ + wrap_expression([], %{ visit_duration: fragment("toUInt32(ifNotFinite(round(sum(duration * sign) / sum(sign)), 0))"), __internal_visits: fragment("toUInt32(sum(sign))") @@ -226,7 +226,7 @@ defmodule Plausible.Stats.SQL.Expression do end def session_metric(:views_per_visit, _query) do - wrap_select_columns([s], %{ + wrap_expression([s], %{ views_per_visit: fragment( "ifNotFinite(round(sum(? * ?) / sum(?), 2), 0)", @@ -239,7 +239,7 @@ defmodule Plausible.Stats.SQL.Expression do end def session_metric(:sample_percent, _query) do - wrap_select_columns([], %{ + wrap_expression([], %{ sample_percent: fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)") }) diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index f69713e2e682..6bef4616b7dd 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -156,35 +156,26 @@ defmodule Plausible.Stats.SQL.Fragments do def meta_value_column(:meta), do: :"meta.value" def meta_value_column(:entry_meta), do: :"entry_meta.value" - @doc """ - Convenience Ecto macro for wrapping select_merge where each value gets in turn passed to selected_as. - - ### Examples - - iex> select_merge_as(q, [t], %{ foo: t.column }) |> expand_macro_once - "select_merge(q, [t], %{foo: selected_as(t.column, :foo)})" - """ - defmacro select_merge_as(q, binding, map_literal) do - selected_as_map = - update_literal_map_values(map_literal, fn {key, expr} -> - quote(do: selected_as(unquote(expr), unquote(key))) + defp update_literal_map_values({:%{}, ctx, keyword_list}, mapper_fn) do + { + :%{}, + ctx, + Enum.map(keyword_list, fn {key, expr} -> + {key, mapper_fn.({key, expr})} end) - - quote do - select_merge(unquote(q), unquote(binding), unquote(selected_as_map)) - end + } end @doc """ - Convenience Ecto macro for wrapping a map passed to select_merge such that each + Convenience Ecto macro for wrapping a map passed to select_merge_as such that each expression gets wrapped in dynamic and set as selected_as. ### Examples - iex> wrap_select_columns([t], %{ foo: t.column }) |> expand_macro_once + iex> wrap_expression([t], %{ foo: t.column }) |> expand_macro_once "%{foo: dynamic([t], selected_as(t.column, :foo))}" """ - defmacro wrap_select_columns(binding, map_literal) do + defmacro wrap_expression(binding, map_literal) do update_literal_map_values(map_literal, fn {key, expr} -> key_expr = if Macro.quoted_literal?(key) do @@ -197,13 +188,17 @@ defmodule Plausible.Stats.SQL.Fragments do end) end - defp update_literal_map_values({:%{}, ctx, keyword_list}, mapper_fn) do - { - :%{}, - ctx, - Enum.map(keyword_list, fn {key, expr} -> - {key, mapper_fn.({key, expr})} - end) - } + @doc """ + Convenience Ecto macro for wrapping select_merge where each value gets in turn passed to selected_as. + + ### Examples + + iex> select_merge_as(q, [t], %{ foo: t.column }) |> expand_macro_once + "select_merge(q, [], ^wrap_expression([t], %{foo: t.column}))" + """ + defmacro select_merge_as(q, binding, map_literal) do + quote do + select_merge(unquote(q), [], ^wrap_expression(unquote(binding), unquote(map_literal))) + end end end diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 92910223f97a..27d6c08b008d 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -164,7 +164,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do q, [e, s], %{ - ^key => field(unquote(table_name), ^key) + key => field(unquote(table_name), ^key) } ) end) From 1698ead4f98880e00d9fabd4a5ac347d3d2ee146 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 15:25:53 +0300 Subject: [PATCH 10/42] Wrap more code --- lib/plausible/stats/base.ex | 53 ++++++++++-------------- lib/plausible/stats/sql/query_builder.ex | 10 ++--- 2 files changed, 24 insertions(+), 39 deletions(-) diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index b35ef413dec8..b0c2e47c910f 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -160,18 +160,17 @@ defmodule Plausible.Stats.Base do def total_visitors_subquery(site, query, include_imported) def total_visitors_subquery(site, query, true = _include_imported) do - dynamic( - [e], - selected_as( + wrap_expression([e], %{ + total_visitors: subquery(total_visitors(site, query)) + - subquery(Plausible.Stats.Imported.total_imported_visitors(site, query)), - :__total_visitors - ) - ) + subquery(Plausible.Stats.Imported.total_imported_visitors(site, query)) + }) end def total_visitors_subquery(site, query, false = _include_imported) do - dynamic([e], selected_as(subquery(total_visitors(site, query)), :__total_visitors)) + wrap_expression([e], %{ + total_visitors: subquery(total_visitors(site, query)) + }) end def add_percentage_metric(q, site, query, metrics) do @@ -179,19 +178,14 @@ defmodule Plausible.Stats.Base do total_query = Query.set_dimensions(query, []) q - |> select_merge( - ^%{__total_visitors: total_visitors_subquery(site, total_query, query.include_imported)} - ) - |> select_merge(%{ + |> select_merge(^total_visitors_subquery(site, total_query, query.include_imported)) + |> select_merge_as([], %{ percentage: - selected_as( - fragment( - "if(? > 0, round(? / ? * 100, 1), null)", - selected_as(:__total_visitors), - selected_as(:visitors), - selected_as(:__total_visitors) - ), - :percentage + fragment( + "if(? > 0, round(? / ? * 100, 1), null)", + selected_as(:total_visitors), + selected_as(:visitors), + selected_as(:total_visitors) ) }) else @@ -211,19 +205,14 @@ defmodule Plausible.Stats.Base do # :TRICKY: Subquery is used due to event:goal breakdown above doing an UNION ALL subquery(q) - |> select_merge( - ^%{total_visitors: total_visitors_subquery(site, total_query, query.include_imported)} - ) - |> select_merge([e], %{ + |> select_merge(^total_visitors_subquery(site, total_query, query.include_imported)) + |> select_merge_as([e], %{ conversion_rate: - selected_as( - fragment( - "if(? > 0, round(? / ? * 100, 1), 0)", - selected_as(:__total_visitors), - e.visitors, - selected_as(:__total_visitors) - ), - :conversion_rate + fragment( + "if(? > 0, round(? / ? * 100, 1), 0)", + selected_as(:total_visitors), + e.visitors, + selected_as(:total_visitors) ) }) else diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 27d6c08b008d..05fc56150c61 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -182,18 +182,14 @@ defmodule Plausible.Stats.SQL.QueryBuilder do |> Query.set_dimensions([]) q - |> select_merge( - ^%{ - total_visitors: Base.total_visitors_subquery(site, total_query, query.include_imported) - } - ) + |> select_merge(^Base.total_visitors_subquery(site, total_query, query.include_imported)) |> select_merge_as([e], %{ conversion_rate: fragment( "if(? > 0, round(? / ? * 100, 1), 0)", - selected_as(:__total_visitors), + selected_as(:total_visitors), selected_as(:visitors), - selected_as(:__total_visitors) + selected_as(:total_visitors) ) }) else From 45484612f2c46b942fa70e69d6d2a021d5b781d9 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 15:34:21 +0300 Subject: [PATCH 11/42] select_merge_as more --- lib/plausible/stats/base.ex | 4 ++-- lib/plausible/stats/sql/fragments.ex | 2 ++ lib/plausible/stats/sql/query_builder.ex | 23 +++++++++++------------ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index b0c2e47c910f..85e06ac91413 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -178,7 +178,7 @@ defmodule Plausible.Stats.Base do total_query = Query.set_dimensions(query, []) q - |> select_merge(^total_visitors_subquery(site, total_query, query.include_imported)) + |> select_merge_as([], total_visitors_subquery(site, total_query, query.include_imported)) |> select_merge_as([], %{ percentage: fragment( @@ -205,7 +205,7 @@ defmodule Plausible.Stats.Base do # :TRICKY: Subquery is used due to event:goal breakdown above doing an UNION ALL subquery(q) - |> select_merge(^total_visitors_subquery(site, total_query, query.include_imported)) + |> select_merge_as([], total_visitors_subquery(site, total_query, query.include_imported)) |> select_merge_as([e], %{ conversion_rate: fragment( diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index 6bef4616b7dd..2a076f0ef1f5 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -166,6 +166,8 @@ defmodule Plausible.Stats.SQL.Fragments do } end + defp update_literal_map_values(ast, _), do: ast + @doc """ Convenience Ecto macro for wrapping a map passed to select_merge_as such that each expression gets wrapped in dynamic and set as selected_as. diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 05fc56150c61..efa5d156d20c 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -124,19 +124,19 @@ defmodule Plausible.Stats.SQL.QueryBuilder do from(e in q, array_join: goal in Expression.event_goal_join(events, page_regexes), - select_merge: %{ - ^shortname(query, dimension) => fragment("?", goal) - }, group_by: goal, where: goal != 0 and (e.name == "pageview" or goal < 0) ) + |> select_merge_as([e, goal], %{ + shortname(query, dimension) => fragment("?", goal) + }) end defp dimension_group_by(q, query, dimension) do key = shortname(query, dimension) q - |> select_merge(^Expression.dimension(key, dimension, query)) + |> select_merge_as([], Expression.dimension(key, dimension, query)) |> group_by([], selected_as(^key)) end @@ -160,13 +160,9 @@ defmodule Plausible.Stats.SQL.QueryBuilder do Enum.reduce(unquote(list), unquote(q), fn metric_or_dimension, q -> key = shortname(unquote(query), metric_or_dimension) - select_merge_as( - q, - [e, s], - %{ - key => field(unquote(table_name), ^key) - } - ) + select_merge_as(q, [e, s], %{ + key => field(unquote(table_name), ^key) + }) end) end end @@ -182,7 +178,10 @@ defmodule Plausible.Stats.SQL.QueryBuilder do |> Query.set_dimensions([]) q - |> select_merge(^Base.total_visitors_subquery(site, total_query, query.include_imported)) + |> select_merge_as( + [], + Base.total_visitors_subquery(site, total_query, query.include_imported) + ) |> select_merge_as([e], %{ conversion_rate: fragment( From def9e2682e65d930a04c5e316809f16a6d71c359 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 15:38:17 +0300 Subject: [PATCH 12/42] Move defp to the end --- lib/plausible/stats/base.ex | 4 ++-- lib/plausible/stats/sql/fragments.ex | 24 ++++++++++++------------ lib/plausible/stats/sql/query_builder.ex | 6 +++--- lib/plausible/stats/util.ex | 1 - 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 85e06ac91413..5b8f576877a7 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -160,7 +160,7 @@ defmodule Plausible.Stats.Base do def total_visitors_subquery(site, query, include_imported) def total_visitors_subquery(site, query, true = _include_imported) do - wrap_expression([e], %{ + wrap_expression([], %{ total_visitors: subquery(total_visitors(site, query)) + subquery(Plausible.Stats.Imported.total_imported_visitors(site, query)) @@ -168,7 +168,7 @@ defmodule Plausible.Stats.Base do end def total_visitors_subquery(site, query, false = _include_imported) do - wrap_expression([e], %{ + wrap_expression([], %{ total_visitors: subquery(total_visitors(site, query)) }) end diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index 2a076f0ef1f5..fbc9dd2b0647 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -156,18 +156,6 @@ defmodule Plausible.Stats.SQL.Fragments do def meta_value_column(:meta), do: :"meta.value" def meta_value_column(:entry_meta), do: :"entry_meta.value" - defp update_literal_map_values({:%{}, ctx, keyword_list}, mapper_fn) do - { - :%{}, - ctx, - Enum.map(keyword_list, fn {key, expr} -> - {key, mapper_fn.({key, expr})} - end) - } - end - - defp update_literal_map_values(ast, _), do: ast - @doc """ Convenience Ecto macro for wrapping a map passed to select_merge_as such that each expression gets wrapped in dynamic and set as selected_as. @@ -203,4 +191,16 @@ defmodule Plausible.Stats.SQL.Fragments do select_merge(unquote(q), [], ^wrap_expression(unquote(binding), unquote(map_literal))) end end + + defp update_literal_map_values({:%{}, ctx, keyword_list}, mapper_fn) do + { + :%{}, + ctx, + Enum.map(keyword_list, fn {key, expr} -> + {key, mapper_fn.({key, expr})} + end) + } + end + + defp update_literal_map_values(ast, _), do: ast end diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index efa5d156d20c..8cf92725bcac 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -124,12 +124,12 @@ defmodule Plausible.Stats.SQL.QueryBuilder do from(e in q, array_join: goal in Expression.event_goal_join(events, page_regexes), + select_merge: %{ + ^shortname(query, dimension) => fragment("?", goal) + }, group_by: goal, where: goal != 0 and (e.name == "pageview" or goal < 0) ) - |> select_merge_as([e, goal], %{ - shortname(query, dimension) => fragment("?", goal) - }) end defp dimension_group_by(q, query, dimension) do diff --git a/lib/plausible/stats/util.ex b/lib/plausible/stats/util.ex index 60bdd7a931ab..de1411f637e2 100644 --- a/lib/plausible/stats/util.ex +++ b/lib/plausible/stats/util.ex @@ -6,7 +6,6 @@ defmodule Plausible.Stats.Util do @manually_removable_metrics [ :__internal_visits, :visitors, - :__total_visitors, :__breakdown_value, :total_visitors ] From ffbf21dd1eb3b6809e1861baa7a90aac26ac843e Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Fri, 28 Jun 2024 16:21:23 +0300 Subject: [PATCH 13/42] include.time_labels parsing --- lib/plausible/stats/comparisons.ex | 10 ++--- lib/plausible/stats/filters/query_parser.ex | 30 +++++++++++--- lib/plausible/stats/query.ex | 11 +++--- test/plausible/stats/query_parser_test.exs | 44 ++++++++++----------- 4 files changed, 57 insertions(+), 38 deletions(-) diff --git a/lib/plausible/stats/comparisons.ex b/lib/plausible/stats/comparisons.ex index 4993c6dd33de..157be97c1f8f 100644 --- a/lib/plausible/stats/comparisons.ex +++ b/lib/plausible/stats/comparisons.ex @@ -163,21 +163,21 @@ defmodule Plausible.Stats.Comparisons do end defp maybe_include_imported(query, source_query) do - requested? = source_query.imported_data_requested + requested? = source_query.include.imports case Query.ensure_include_imported(query, requested?) do :ok -> struct!(query, - imported_data_requested: true, include_imported: true, - skip_imported_reason: nil + skip_imported_reason: nil, + include: Map.put(query.include, :imports, true) ) {:error, reason} -> struct!(query, - imported_data_requested: requested?, include_imported: false, - skip_imported_reason: reason + skip_imported_reason: reason, + include: Map.put(query.include, :imports, requested?) ) end end diff --git a/lib/plausible/stats/filters/query_parser.ex b/lib/plausible/stats/filters/query_parser.ex index 3f802dcb64df..97744dd12264 100644 --- a/lib/plausible/stats/filters/query_parser.ex +++ b/lib/plausible/stats/filters/query_parser.ex @@ -6,6 +6,11 @@ defmodule Plausible.Stats.Filters.QueryParser do alias Plausible.Stats.Query alias Plausible.Stats.Metrics + @default_include %{ + imports: false, + time_labels: false + } + def parse(site, params, now \\ nil) when is_map(params) do with {:ok, metrics} <- parse_metrics(Map.get(params, "metrics", [])), {:ok, filters} <- parse_filters(Map.get(params, "filters", [])), @@ -22,8 +27,8 @@ defmodule Plausible.Stats.Filters.QueryParser do dimensions: dimensions, order_by: order_by, timezone: site.timezone, - imported_data_requested: Map.get(include, :imports, false), - preloaded_goals: preloaded_goals + preloaded_goals: preloaded_goals, + include: include }, :ok <- validate_order_by(query), :ok <- validate_goal_filters(query), @@ -226,9 +231,24 @@ defmodule Plausible.Stats.Filters.QueryParser do defp parse_order_direction([_, "desc"]), do: {:ok, :desc} defp parse_order_direction(entry), do: {:error, "Invalid order_by entry '#{inspect(entry)}'"} - defp parse_include(%{"imports" => value}) when is_boolean(value), do: {:ok, %{imports: value}} - defp parse_include(%{}), do: {:ok, %{}} - defp parse_include(include), do: {:error, "Invalid include passed '#{inspect(include)}'"} + defp parse_include(include) when is_map(include) do + with {:ok, parsed_include_list} <- parse_list(include, &parse_include_value/1) do + include = Map.merge(@default_include, Enum.into(parsed_include_list, %{})) + + {:ok, include} + end + end + + defp parse_include(entry), do: {:error, "Invalid include passed '#{inspect(entry)}'"} + + defp parse_include_value({"imports", value}) when is_boolean(value), + do: {:ok, {:imports, value}} + + defp parse_include_value({"time_labels", value}) when is_boolean(value), + do: {:ok, {:time_labels, value}} + + defp parse_include_value({key, value}), + do: {:error, "Invalid include entry '#{inspect(%{key => value})}'"} defp parse_filter_key_string(filter_key, error_message \\ "") do case filter_key do diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index 75fb8c503c08..628e0f1f4f70 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -7,7 +7,6 @@ defmodule Plausible.Stats.Query do dimensions: [], filters: [], sample_threshold: 20_000_000, - imported_data_requested: false, include_imported: false, skip_imported_reason: nil, now: nil, @@ -314,7 +313,7 @@ defmodule Plausible.Stats.Query do end defp put_imported_opts(query, site, params) do - requested? = params["with_imported"] == "true" || query.imported_data_requested + requested? = params["with_imported"] == "true" || query.include.imports latest_import_end_date = if site do @@ -328,15 +327,15 @@ defmodule Plausible.Stats.Query do case ensure_include_imported(query, requested?) do :ok -> struct!(query, - imported_data_requested: true, - include_imported: true + include_imported: true, + include: Map.put(query.include, :imports, true) ) {:error, reason} -> struct!(query, - imported_data_requested: requested?, include_imported: false, - skip_imported_reason: reason + skip_imported_reason: reason, + include: Map.put(query.include, :imports, requested?) ) end end diff --git a/test/plausible/stats/query_parser_test.exs b/test/plausible/stats/query_parser_test.exs index bf3f3bedb492..b0e368aef284 100644 --- a/test/plausible/stats/query_parser_test.exs +++ b/test/plausible/stats/query_parser_test.exs @@ -27,7 +27,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -47,7 +47,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -85,7 +85,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -115,7 +115,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -160,7 +160,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -184,7 +184,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -209,7 +209,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -248,11 +248,11 @@ defmodule Plausible.Stats.Filters.QueryParserTest do end describe "include validation" do - test "setting include.imports", %{site: site} do + test "setting include values", %{site: site} do %{ "metrics" => ["visitors"], "date_range" => "all", - "include" => %{"imports" => true} + "include" => %{"imports" => true, "time_labels" => true} } |> check_success(site, %{ metrics: [:visitors], @@ -261,7 +261,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: true, + include: %{imports: true, time_labels: true}, preloaded_goals: [] }) end @@ -297,7 +297,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [{:page, "/thank-you"}, {:event, "Signup"}] }) end @@ -379,7 +379,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: ["event:#{unquote(dimension)}"], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -399,7 +399,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: ["visit:#{unquote(dimension)}"], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -418,7 +418,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: ["event:props:foobar"], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -474,7 +474,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: [{:events, :desc}, {:visitors, :asc}], timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -493,7 +493,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: ["event:name"], order_by: [{"event:name", :desc}], timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -589,7 +589,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [event: "Signup"] }) end @@ -609,7 +609,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: ["event:goal"], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [event: "Signup"] }) end @@ -631,7 +631,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [event: "Signup"] }) end @@ -675,7 +675,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: ["visit:device"], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -705,7 +705,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: ["event:page"], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end @@ -723,7 +723,7 @@ defmodule Plausible.Stats.Filters.QueryParserTest do dimensions: [], order_by: nil, timezone: site.timezone, - imported_data_requested: false, + include: %{imports: false, time_labels: false}, preloaded_goals: [] }) end From 2b4c13c43eb159f39b6ee3f33d173f40647c05e9 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Mon, 1 Jul 2024 10:50:38 +0300 Subject: [PATCH 14/42] include.time_labels in result Note that the previous implementation of the labels from TimeSeries.ex was broken --- lib/plausible/stats/filters/query_parser.ex | 15 +- lib/plausible/stats/interval.ex | 53 +++++++ lib/plausible/stats/query_result.ex | 24 ++-- lib/plausible/stats/util.ex | 1 - test/plausible/stats/interval_test.exs | 132 ++++++++++++++++++ test/plausible/stats/query_parser_test.exs | 12 +- .../external_stats_controller/query_test.exs | 47 ++++++- 7 files changed, 265 insertions(+), 19 deletions(-) diff --git a/lib/plausible/stats/filters/query_parser.ex b/lib/plausible/stats/filters/query_parser.ex index 97744dd12264..257aac8b199c 100644 --- a/lib/plausible/stats/filters/query_parser.ex +++ b/lib/plausible/stats/filters/query_parser.ex @@ -33,7 +33,8 @@ defmodule Plausible.Stats.Filters.QueryParser do :ok <- validate_order_by(query), :ok <- validate_goal_filters(query), :ok <- validate_custom_props_access(site, query), - :ok <- validate_metrics(query) do + :ok <- validate_metrics(query), + :ok <- validate_include(query) do {:ok, query} end end @@ -406,7 +407,7 @@ defmodule Plausible.Stats.Filters.QueryParser do end end - def event_dimensions_not_allowing_session_metrics?(dimensions) do + defp event_dimensions_not_allowing_session_metrics?(dimensions) do Enum.any?(dimensions, fn "event:page" -> false "event:" <> _ -> true @@ -414,6 +415,16 @@ defmodule Plausible.Stats.Filters.QueryParser do end) end + defp validate_include(query) do + time_dimension? = Enum.any?(query.dimensions, &String.starts_with?(&1, "time")) + + if query.include.time_labels and not time_dimension? do + {:error, "Invalid include.time_labels: requires a time dimension"} + else + :ok + end + end + defp parse_list(list, parser_function) do Enum.reduce_while(list, {:ok, []}, fn value, {:ok, results} -> case parser_function.(value) do diff --git a/lib/plausible/stats/interval.ex b/lib/plausible/stats/interval.ex index 116f711c99e4..2335469eac21 100644 --- a/lib/plausible/stats/interval.ex +++ b/lib/plausible/stats/interval.ex @@ -103,4 +103,57 @@ defmodule Plausible.Stats.Interval do def valid_for_period?(period, interval, opts \\ []) do interval in Map.get(valid_by_period(opts), period, []) end + + @doc """ + Returns list of time bucket labels for the given query. + """ + def time_dimension(query) do + Enum.find(query.dimensions, &String.starts_with?(&1, "time")) + end + + def time_labels(query) do + time_labels_for_dimension(time_dimension(query), query) + end + + defp time_labels_for_dimension("time:month", query) do + n_buckets = + Timex.diff( + query.date_range.last, + Date.beginning_of_month(query.date_range.first), + :months + ) + + Enum.map(n_buckets..0, fn shift -> + query.date_range.last + |> Date.beginning_of_month() + |> Timex.shift(months: -shift) + end) + end + + defp time_labels_for_dimension("time:day", query) do + query.date_range + |> Enum.into([]) + end + + @full_day_in_hours 23 + defp time_labels_for_dimension("time:hour", query) do + n_buckets = + if query.date_range.first == query.date_range.last do + @full_day_in_hours + else + end_time = + query.date_range.last + |> Timex.to_datetime() + |> Timex.end_of_day() + + Timex.diff(end_time, query.date_range.first, :hours) + end + + Enum.map(0..n_buckets, fn step -> + query.date_range.first + |> Timex.to_datetime() + |> Timex.shift(hours: step) + |> DateTime.truncate(:second) + end) + end end diff --git a/lib/plausible/stats/query_result.ex b/lib/plausible/stats/query_result.ex index b9f374444834..6f4c0a3b5976 100644 --- a/lib/plausible/stats/query_result.ex +++ b/lib/plausible/stats/query_result.ex @@ -1,9 +1,9 @@ defmodule Plausible.Stats.QueryResult do @moduledoc false + alias Plausible.Stats.Interval alias Plausible.Stats.Util alias Plausible.Stats.Filters - alias Plausible.Stats.Query @derive Jason.Encoder defstruct results: [], @@ -34,16 +34,6 @@ defmodule Plausible.Stats.QueryResult do ) end - defp meta(%Query{skip_imported_reason: :unsupported_query}) do - %{ - warning: - "Imported stats are not included in the results because query parameters are not supported. " <> - "For more information, see: https://plausible.io/docs/stats-api#filtering-imported-stats" - } - end - - defp meta(_), do: %{} - defp dimension_label("event:goal", entry, query) do {events, paths} = Filters.Utils.split_goals(query.preloaded_goals) @@ -65,4 +55,16 @@ defmodule Plausible.Stats.QueryResult do end defp serializable_filter(filter), do: filter + + @import_warning "Imported stats are not included in the results because query parameters are not supported. " <> + "For more information, see: https://plausible.io/docs/stats-api#filtering-imported-stats" + + defp meta(query) do + %{ + warning: if(query.skip_imported_reason, do: @import_warning, else: nil), + time_labels: if(query.include.time_labels, do: Interval.time_labels(query), else: nil) + } + |> Enum.reject(fn {_, value} -> is_nil(value) end) + |> Enum.into(%{}) + end end diff --git a/lib/plausible/stats/util.ex b/lib/plausible/stats/util.ex index de1411f637e2..65e95078dd6c 100644 --- a/lib/plausible/stats/util.ex +++ b/lib/plausible/stats/util.ex @@ -6,7 +6,6 @@ defmodule Plausible.Stats.Util do @manually_removable_metrics [ :__internal_visits, :visitors, - :__breakdown_value, :total_visitors ] diff --git a/test/plausible/stats/interval_test.exs b/test/plausible/stats/interval_test.exs index 36e3c68093de..4c2169629b21 100644 --- a/test/plausible/stats/interval_test.exs +++ b/test/plausible/stats/interval_test.exs @@ -123,4 +123,136 @@ defmodule Plausible.Stats.IntervalTest do ) end end + + describe "time_labels/1" do + test "with time:month dimension" do + assert time_labels(%{ + dimensions: ["visit:device", "time:month"], + date_range: Date.range(~D[2022-01-17], ~D[2022-02-01]) + }) == [ + ~D[2022-01-01], + ~D[2022-02-01] + ] + + assert time_labels(%{ + dimensions: ["visit:device", "time:month"], + date_range: Date.range(~D[2022-01-01], ~D[2022-03-07]) + }) == [ + ~D[2022-01-01], + ~D[2022-02-01], + ~D[2022-03-01] + ] + end + + test "with time:day dimension" do + assert time_labels(%{ + dimensions: ["time:day"], + date_range: Date.range(~D[2022-01-17], ~D[2022-02-02]) + }) == [ + ~D[2022-01-17], + ~D[2022-01-18], + ~D[2022-01-19], + ~D[2022-01-20], + ~D[2022-01-21], + ~D[2022-01-22], + ~D[2022-01-23], + ~D[2022-01-24], + ~D[2022-01-25], + ~D[2022-01-26], + ~D[2022-01-27], + ~D[2022-01-28], + ~D[2022-01-29], + ~D[2022-01-30], + ~D[2022-01-31], + ~D[2022-02-01], + ~D[2022-02-02] + ] + end + + test "with time:hour dimension" do + assert time_labels(%{ + dimensions: ["time:hour"], + date_range: Date.range(~D[2022-01-17], ~D[2022-01-17]) + }) == [ + ~U[2022-01-17 00:00:00Z], + ~U[2022-01-17 01:00:00Z], + ~U[2022-01-17 02:00:00Z], + ~U[2022-01-17 03:00:00Z], + ~U[2022-01-17 04:00:00Z], + ~U[2022-01-17 05:00:00Z], + ~U[2022-01-17 06:00:00Z], + ~U[2022-01-17 07:00:00Z], + ~U[2022-01-17 08:00:00Z], + ~U[2022-01-17 09:00:00Z], + ~U[2022-01-17 10:00:00Z], + ~U[2022-01-17 11:00:00Z], + ~U[2022-01-17 12:00:00Z], + ~U[2022-01-17 13:00:00Z], + ~U[2022-01-17 14:00:00Z], + ~U[2022-01-17 15:00:00Z], + ~U[2022-01-17 16:00:00Z], + ~U[2022-01-17 17:00:00Z], + ~U[2022-01-17 18:00:00Z], + ~U[2022-01-17 19:00:00Z], + ~U[2022-01-17 20:00:00Z], + ~U[2022-01-17 21:00:00Z], + ~U[2022-01-17 22:00:00Z], + ~U[2022-01-17 23:00:00Z] + ] + + assert time_labels(%{ + dimensions: ["time:hour"], + date_range: Date.range(~D[2022-01-17], ~D[2022-01-18]) + }) == [ + ~U[2022-01-17 00:00:00Z], + ~U[2022-01-17 01:00:00Z], + ~U[2022-01-17 02:00:00Z], + ~U[2022-01-17 03:00:00Z], + ~U[2022-01-17 04:00:00Z], + ~U[2022-01-17 05:00:00Z], + ~U[2022-01-17 06:00:00Z], + ~U[2022-01-17 07:00:00Z], + ~U[2022-01-17 08:00:00Z], + ~U[2022-01-17 09:00:00Z], + ~U[2022-01-17 10:00:00Z], + ~U[2022-01-17 11:00:00Z], + ~U[2022-01-17 12:00:00Z], + ~U[2022-01-17 13:00:00Z], + ~U[2022-01-17 14:00:00Z], + ~U[2022-01-17 15:00:00Z], + ~U[2022-01-17 16:00:00Z], + ~U[2022-01-17 17:00:00Z], + ~U[2022-01-17 18:00:00Z], + ~U[2022-01-17 19:00:00Z], + ~U[2022-01-17 20:00:00Z], + ~U[2022-01-17 21:00:00Z], + ~U[2022-01-17 22:00:00Z], + ~U[2022-01-17 23:00:00Z], + ~U[2022-01-18 00:00:00Z], + ~U[2022-01-18 01:00:00Z], + ~U[2022-01-18 02:00:00Z], + ~U[2022-01-18 03:00:00Z], + ~U[2022-01-18 04:00:00Z], + ~U[2022-01-18 05:00:00Z], + ~U[2022-01-18 06:00:00Z], + ~U[2022-01-18 07:00:00Z], + ~U[2022-01-18 08:00:00Z], + ~U[2022-01-18 09:00:00Z], + ~U[2022-01-18 10:00:00Z], + ~U[2022-01-18 11:00:00Z], + ~U[2022-01-18 12:00:00Z], + ~U[2022-01-18 13:00:00Z], + ~U[2022-01-18 14:00:00Z], + ~U[2022-01-18 15:00:00Z], + ~U[2022-01-18 16:00:00Z], + ~U[2022-01-18 17:00:00Z], + ~U[2022-01-18 18:00:00Z], + ~U[2022-01-18 19:00:00Z], + ~U[2022-01-18 20:00:00Z], + ~U[2022-01-18 21:00:00Z], + ~U[2022-01-18 22:00:00Z], + ~U[2022-01-18 23:00:00Z] + ] + end + end end diff --git a/test/plausible/stats/query_parser_test.exs b/test/plausible/stats/query_parser_test.exs index b0e368aef284..017ba51575a7 100644 --- a/test/plausible/stats/query_parser_test.exs +++ b/test/plausible/stats/query_parser_test.exs @@ -252,13 +252,14 @@ defmodule Plausible.Stats.Filters.QueryParserTest do %{ "metrics" => ["visitors"], "date_range" => "all", + "dimensions" => ["time"], "include" => %{"imports" => true, "time_labels" => true} } |> check_success(site, %{ metrics: [:visitors], date_range: @date_range, filters: [], - dimensions: [], + dimensions: ["time"], order_by: nil, timezone: site.timezone, include: %{imports: true, time_labels: true}, @@ -274,6 +275,15 @@ defmodule Plausible.Stats.Filters.QueryParserTest do } |> check_error(site, ~r/Invalid include passed/) end + + test "setting include.time_labels without time dimension", %{site: site} do + %{ + "metrics" => ["visitors"], + "date_range" => "all", + "include" => %{"time_labels" => true} + } + |> check_error(site, ~r/Invalid include.time_labels: requires a time dimension/) + end end describe "event:goal filter validation" do diff --git a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs index 331687e1f0ae..b4c2698cc128 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs @@ -1096,7 +1096,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do end describe "timeseries" do - test "shows hourly data for a certain date", %{conn: conn, site: site} do + test "shows hourly data for a certain date with time_labels", %{conn: conn, site: site} do populate_stats(site, [ build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 00:00:00]), build(:pageview, user_id: @user_id, timestamp: ~N[2021-01-01 00:10:00]), @@ -1108,16 +1108,44 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do "site_id" => site.domain, "metrics" => ["visitors", "pageviews", "visits", "visit_duration", "bounce_rate"], "date_range" => ["2021-01-01", "2021-01-01"], - "dimensions" => ["time:hour"] + "dimensions" => ["time:hour"], + "include" => %{"time_labels" => true} }) assert json_response(conn, 200)["results"] == [ %{"dimensions" => ["2021-01-01T00:00:00Z"], "metrics" => [1, 2, 1, 600, 0]}, %{"dimensions" => ["2021-01-01T23:00:00Z"], "metrics" => [1, 1, 1, 0, 100]} ] + + assert json_response(conn, 200)["meta"]["time_labels"] == [ + "2021-01-01T00:00:00Z", + "2021-01-01T01:00:00Z", + "2021-01-01T02:00:00Z", + "2021-01-01T03:00:00Z", + "2021-01-01T04:00:00Z", + "2021-01-01T05:00:00Z", + "2021-01-01T06:00:00Z", + "2021-01-01T07:00:00Z", + "2021-01-01T08:00:00Z", + "2021-01-01T09:00:00Z", + "2021-01-01T10:00:00Z", + "2021-01-01T11:00:00Z", + "2021-01-01T12:00:00Z", + "2021-01-01T13:00:00Z", + "2021-01-01T14:00:00Z", + "2021-01-01T15:00:00Z", + "2021-01-01T16:00:00Z", + "2021-01-01T17:00:00Z", + "2021-01-01T18:00:00Z", + "2021-01-01T19:00:00Z", + "2021-01-01T20:00:00Z", + "2021-01-01T21:00:00Z", + "2021-01-01T22:00:00Z", + "2021-01-01T23:00:00Z" + ] end - test "shows last 7 days of visitors", %{conn: conn, site: site} do + test "shows last 7 days of visitors with time labels", %{conn: conn, site: site} do populate_stats(site, [ build(:pageview, timestamp: ~N[2021-01-01 00:00:00]), build(:pageview, timestamp: ~N[2021-01-07 23:59:00]) @@ -1128,13 +1156,24 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do "site_id" => site.domain, "metrics" => ["visitors"], "date_range" => ["2021-01-01", "2021-01-07"], - "dimensions" => ["time"] + "dimensions" => ["time"], + "include" => %{"time_labels" => true} }) assert json_response(conn, 200)["results"] == [ %{"dimensions" => ["2021-01-01"], "metrics" => [1]}, %{"dimensions" => ["2021-01-07"], "metrics" => [1]} ] + + assert json_response(conn, 200)["meta"]["time_labels"] == [ + "2021-01-01", + "2021-01-02", + "2021-01-03", + "2021-01-04", + "2021-01-05", + "2021-01-06", + "2021-01-07" + ] end test "shows last 6 months of visitors", %{conn: conn, site: site} do From 69ece9a6835c0269efc2974fc676184e68d2fc9c Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Mon, 1 Jul 2024 10:58:19 +0300 Subject: [PATCH 15/42] Apply consistent function in imports and timeseries.ex --- lib/plausible/stats/imported/imported.ex | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index 4fcdd04a66de..6a8f34b5e4f5 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -256,8 +256,10 @@ defmodule Plausible.Stats.Imported do defp apply_interval(imported_q, %Plausible.Stats.Query{interval: "week"} = query, _site) do imported_q - |> group_by([i], weekstart_not_before(i.date, ^query.date_range.first)) - |> select_merge([i], %{date: weekstart_not_before(i.date, ^query.date_range.first)}) + |> group_by([i], weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone)) + |> select_merge([i], %{ + date: weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone) + }) end defp apply_interval(imported_q, _query, _site) do From 1968d3d3e0ed9f5a1857b18eb63d9076528059b5 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Mon, 1 Jul 2024 11:41:22 +0300 Subject: [PATCH 16/42] Remove boilerplate --- lib/plausible/stats/timeseries.ex | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 4914023bfc4a..c1295511d08a 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -6,18 +6,6 @@ defmodule Plausible.Stats.Timeseries do import Ecto.Query use Plausible.Stats.SQL.Fragments - @typep metric :: - :pageviews - | :events - | :visitors - | :visits - | :bounce_rate - | :visit_duration - | :average_revenue - | :total_revenue - @typep value :: nil | integer() | float() - @type results :: nonempty_list(%{required(:date) => Date.t(), required(metric()) => value()}) - def timeseries(site, query, metrics) do steps = buckets(query) From 5e9a2c696fe7c030f3d42dfce0bfe609d5f5cc64 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Mon, 1 Jul 2024 13:18:56 +0300 Subject: [PATCH 17/42] WIP: Limited support for timeseries-with-querybuilder --- lib/plausible/stats/query.ex | 2 +- lib/plausible/stats/timeseries.ex | 363 ++++-------------------------- 2 files changed, 43 insertions(+), 322 deletions(-) diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index 628e0f1f4f70..ee336ffa39a9 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -35,7 +35,7 @@ defmodule Plausible.Stats.Query do query = __MODULE__ - |> struct!(now: now) + |> struct!(now: now, timezone: site.timezone) |> put_experimental_session_count(site, params) |> put_experimental_reduced_joins(site, params) |> put_period(site, params) diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index c1295511d08a..a11d5f13446f 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -1,339 +1,60 @@ defmodule Plausible.Stats.Timeseries do use Plausible.ClickhouseRepo - use Plausible - alias Plausible.Stats.{Query, Util, Imported} - import Plausible.Stats.{Base} - import Ecto.Query - use Plausible.Stats.SQL.Fragments + alias Plausible.Stats.{Query, QueryOptimizer, QueryResult, SQL} def timeseries(site, query, metrics) do - steps = buckets(query) - - {event_metrics, session_metrics, _} = - Plausible.Stats.TableDecider.partition_metrics(metrics, query) - - {currency, event_metrics} = - on_ee do - Plausible.Stats.Goal.Revenue.get_revenue_tracking_currency(site, query, event_metrics) - else - {nil, event_metrics} - end - - Query.trace(query, metrics) - - [event_result, session_result] = - Plausible.ClickhouseRepo.parallel_tasks([ - fn -> events_timeseries(site, query, event_metrics) end, - fn -> sessions_timeseries(site, query, session_metrics) end - ]) - - Enum.map(steps, fn step -> - empty_row(step, metrics) - |> Map.merge(Enum.find(event_result, fn row -> date_eq(row[:date], step) end) || %{}) - |> Map.merge(Enum.find(session_result, fn row -> date_eq(row[:date], step) end) || %{}) - |> Map.update!(:date, &date_format/1) - |> cast_revenue_metrics_to_money(currency) - end) - |> Util.keep_requested_metrics(metrics) - end + query_with_metrics = + Query.set( + query, + metrics: metrics, + dimensions: [time_dimension(query)], + order_by: [{time_dimension(query), :asc}], + v2: true, + include: %{time_labels: true, imports: query.include.imports} + ) + |> QueryOptimizer.optimize() - defp events_timeseries(_, _, []), do: [] + IO.inspect(query_with_metrics) - defp events_timeseries(site, query, metrics) do - metrics = Util.maybe_add_visitors_metric(metrics) + q = SQL.QueryBuilder.build(query_with_metrics, site) - from(e in base_event_query(site, query), select: ^select_event_metrics(metrics)) - |> select_bucket(:events, site, query) - |> Imported.merge_imported_timeseries(site, query, metrics) - |> maybe_add_timeseries_conversion_rate(site, query, metrics) + q + |> IO.inspect() |> ClickhouseRepo.all() + |> QueryResult.from(query_with_metrics) + |> build_timeseries_result(query_with_metrics) end - defp sessions_timeseries(_, _, []), do: [] - - defp sessions_timeseries(site, query, metrics) do - from(e in query_sessions(site, query), select: ^select_session_metrics(metrics, query)) - |> filter_converted_sessions(site, query) - |> select_bucket(:sessions, site, query) - |> Imported.merge_imported_timeseries(site, query, metrics) - |> ClickhouseRepo.all() - |> Util.keep_requested_metrics(metrics) - end - - defp buckets(%Query{interval: "month"} = query) do - n_buckets = Timex.diff(query.date_range.last, query.date_range.first, :months) - - Enum.map(n_buckets..0, fn shift -> - query.date_range.last - |> Timex.beginning_of_month() - |> Timex.shift(months: -shift) - end) - end - - defp buckets(%Query{interval: "week"} = query) do - n_buckets = Timex.diff(query.date_range.last, query.date_range.first, :weeks) - - Enum.map(0..n_buckets, fn shift -> - query.date_range.first - |> Timex.shift(weeks: shift) - |> date_or_weekstart(query) - end) - end - - defp buckets(%Query{interval: "date"} = query) do - Enum.into(query.date_range, []) - end - - @full_day_in_hours 23 - defp buckets(%Query{interval: "hour"} = query) do - n_buckets = - if query.date_range.first == query.date_range.last do - @full_day_in_hours - else - Timex.diff(query.date_range.last, query.date_range.first, :hours) - end - - Enum.map(0..n_buckets, fn step -> - query.date_range.first - |> Timex.to_datetime() - |> Timex.shift(hours: step) - end) - end - - defp buckets(%Query{period: "30m", interval: "minute"}) do - Enum.into(-30..-1, []) - end - - @full_day_in_minutes 1439 - defp buckets(%Query{interval: "minute"} = query) do - n_buckets = - if query.date_range.first == query.date_range.last do - @full_day_in_minutes - else - Timex.diff(query.date_range.last, query.date_range.first, :minutes) - end - - Enum.map(0..n_buckets, fn step -> - query.date_range.first - |> Timex.to_datetime() - |> Timex.shift(minutes: step) - end) - end - - defp date_eq(%DateTime{} = left, %DateTime{} = right) do - NaiveDateTime.compare(left, right) == :eq - end - - defp date_eq(%Date{} = left, %Date{} = right) do - Date.compare(left, right) == :eq - end - - defp date_eq(left, right) do - left == right - end - - defp date_format(%DateTime{} = date) do - Timex.format!(date, "{YYYY}-{0M}-{0D} {h24}:{m}:{s}") - end - - defp date_format(date) do - date - end - - defp select_bucket(q, _table, site, %Query{interval: "month"}) do - from( - e in q, - group_by: fragment("toStartOfMonth(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - order_by: fragment("toStartOfMonth(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - select_merge: %{ - date: fragment("toStartOfMonth(toTimeZone(?, ?))", e.timestamp, ^site.timezone) - } - ) - end - - defp select_bucket(q, _table, site, %Query{interval: "week"} = query) do - {first_datetime, _} = utc_boundaries(query, site) - - from( - e in q, - select_merge: %{date: weekstart_not_before(e.timestamp, ^first_datetime, ^site.timezone)}, - group_by: weekstart_not_before(e.timestamp, ^first_datetime, ^site.timezone), - order_by: weekstart_not_before(e.timestamp, ^first_datetime, ^site.timezone) - ) - end - - defp select_bucket(q, _table, site, %Query{interval: "date"}) do - from( - e in q, - group_by: fragment("toDate(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - order_by: fragment("toDate(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - select_merge: %{ - date: fragment("toDate(toTimeZone(?, ?))", e.timestamp, ^site.timezone) - } - ) - end - - defp select_bucket(q, :sessions, site, %Query{ - interval: "hour", - experimental_session_count?: true - }) do - bucket_with_timeslots(q, site, 3600) - end - - defp select_bucket(q, _table, site, %Query{interval: "hour"}) do - from( - e in q, - group_by: fragment("toStartOfHour(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - order_by: fragment("toStartOfHour(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - select_merge: %{ - date: fragment("toStartOfHour(toTimeZone(?, ?))", e.timestamp, ^site.timezone) - } - ) - end - - defp select_bucket(q, :sessions, _site, %Query{ - interval: "minute", - period: "30m", - experimental_session_count?: true - }) do - from( - s in q, - array_join: - bucket in fragment( - "timeSlots(?, toUInt32(timeDiff(?, ?)), ?)", - s.start, - s.start, - s.timestamp, - 60 - ), - group_by: fragment("dateDiff('minute', now(), ?)", bucket), - order_by: fragment("dateDiff('minute', now(), ?)", bucket), - select_merge: %{ - date: fragment("dateDiff('minute', now(), ?)", bucket) - } - ) - end - - defp select_bucket(q, _table, _site, %Query{interval: "minute", period: "30m"}) do - from( - e in q, - group_by: fragment("dateDiff('minute', now(), ?)", e.timestamp), - order_by: fragment("dateDiff('minute', now(), ?)", e.timestamp), - select_merge: %{ - date: fragment("dateDiff('minute', now(), ?)", e.timestamp) - } - ) - end - - defp select_bucket(q, _table, site, %Query{interval: "minute"}) do - from( - e in q, - group_by: fragment("toStartOfMinute(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - order_by: fragment("toStartOfMinute(toTimeZone(?, ?))", e.timestamp, ^site.timezone), - select_merge: %{ - date: fragment("toStartOfMinute(toTimeZone(?, ?))", e.timestamp, ^site.timezone) - } - ) - end - - defp select_bucket(q, :sessions, site, %Query{ - interval: "minute", - experimental_session_count?: true - }) do - bucket_with_timeslots(q, site, 60) - end - - # Includes session in _every_ time bucket it was active in. - # Only done in hourly and minute graphs for performance reasons. - defp bucket_with_timeslots(q, site, period_in_seconds) do - from( - s in q, - array_join: - bucket in fragment( - "timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?))", - s.start, - ^site.timezone, - s.start, - s.timestamp, - ^period_in_seconds - ), - group_by: bucket, - order_by: bucket, - select_merge: %{ - date: fragment("?", bucket) - } - ) - end - - defp date_or_weekstart(date, query) do - weekstart = Timex.beginning_of_week(date) - - if Enum.member?(query.date_range, weekstart) do - weekstart - else - date + defp time_dimension(query) do + case query.interval do + "month" -> "time:month" + # :TODO: + "week" -> "time:month" + "date" -> "time:day" + "hour" -> "time:hour" end end - defp empty_row(date, metrics) do - Enum.reduce(metrics, %{date: date}, fn metric, row -> - case metric do - :pageviews -> Map.merge(row, %{pageviews: 0}) - :events -> Map.merge(row, %{events: 0}) - :visitors -> Map.merge(row, %{visitors: 0}) - :visits -> Map.merge(row, %{visits: 0}) - :views_per_visit -> Map.merge(row, %{views_per_visit: 0.0}) - :conversion_rate -> Map.merge(row, %{conversion_rate: 0.0}) - :bounce_rate -> Map.merge(row, %{bounce_rate: nil}) - :visit_duration -> Map.merge(row, %{visit_duration: nil}) - :average_revenue -> Map.merge(row, %{average_revenue: nil}) - :total_revenue -> Map.merge(row, %{total_revenue: nil}) - end - end) - end - - on_ee do - defp cast_revenue_metrics_to_money(results, revenue_goals) do - Plausible.Stats.Goal.Revenue.cast_revenue_metrics_to_money(results, revenue_goals) - end - else - defp cast_revenue_metrics_to_money(results, _revenue_goals), do: results - end + defp build_timeseries_result(query_result, query) do + results_map = + query_result.results + |> Enum.map(fn %{dimensions: [time_dimension_value], metrics: entry_metrics} -> + metrics_map = Enum.zip(query.metrics, entry_metrics) |> Enum.into(%{}) - defp maybe_add_timeseries_conversion_rate(q, site, query, metrics) do - if :conversion_rate in metrics do - # Having removed some filters, the query might become eligible - # for including imported data. However, we still want to make - # sure that that include_imported is in sync between original - # and the totals query. - totals_query = - query - |> Query.remove_filters(["event:goal", "event:props"]) - |> struct!(include_imported: query.include_imported) - - totals_timeseries_q = - from(e in base_event_query(site, totals_query), - select: ^select_event_metrics([:visitors]) - ) - |> select_bucket(:events, site, totals_query) - |> Imported.merge_imported_timeseries(site, totals_query, [:visitors]) - - from(e in subquery(q), - left_join: c in subquery(totals_timeseries_q), - on: e.date == c.date, - select_merge: %{ - total_visitors: c.visitors, - conversion_rate: - fragment( - "if(? > 0, round(? / ? * 100, 1), 0)", - c.visitors, - e.visitors, - c.visitors - ) + { + time_dimension_value, + Map.put(metrics_map, :date, time_dimension_value) } + end) + |> Enum.into(%{}) + + query_result.meta.time_labels + |> Enum.map(fn key -> + Map.get( + results_map, + key, + empty_row(key, query.metrics) ) - else - q - end + end) end end From ab3cf05a007fdae6c2c53274d4f3b0f074912ca2 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Mon, 1 Jul 2024 14:30:42 +0300 Subject: [PATCH 18/42] time:week dimension --- lib/plausible/stats/filters/query_parser.ex | 1 + lib/plausible/stats/query_optimizer.ex | 1 + lib/plausible/stats/sql/expression.ex | 6 ++++++ lib/plausible/stats/timeseries.ex | 20 +++++++++++++++++-- test/plausible/stats/query_optimizer_test.exs | 6 +++--- 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lib/plausible/stats/filters/query_parser.ex b/lib/plausible/stats/filters/query_parser.ex index 257aac8b199c..0ed934fd683b 100644 --- a/lib/plausible/stats/filters/query_parser.ex +++ b/lib/plausible/stats/filters/query_parser.ex @@ -225,6 +225,7 @@ defmodule Plausible.Stats.Filters.QueryParser do defp parse_time("time"), do: {:ok, "time"} defp parse_time("time:hour"), do: {:ok, "time:hour"} defp parse_time("time:day"), do: {:ok, "time:day"} + defp parse_time("time:week"), do: {:ok, "time:week"} defp parse_time("time:month"), do: {:ok, "time:month"} defp parse_time(_), do: :error diff --git a/lib/plausible/stats/query_optimizer.ex b/lib/plausible/stats/query_optimizer.ex index 25146d950d73..c8a7de9165b6 100644 --- a/lib/plausible/stats/query_optimizer.ex +++ b/lib/plausible/stats/query_optimizer.ex @@ -80,6 +80,7 @@ defmodule Plausible.Stats.QueryOptimizer do cond do Timex.diff(last, first, :hours) <= 48 -> "time:hour" Timex.diff(last, first, :days) <= 40 -> "time:day" + Timex.diff(last, first, :weeks) <= 52 -> "time:week" true -> "time:month" end end diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index b016e13bdb9f..d63942c0a02a 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -38,6 +38,12 @@ defmodule Plausible.Stats.SQL.Expression do }) end + def dimension(key, "time:week", query) do + wrap_expression([t], %{ + key => weekstart_not_before(t.timestamp, ^query.date_range.first, ^query.timezone) + }) + end + def dimension(key, "time:month", query) do wrap_expression([t], %{ key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone) diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index a11d5f13446f..fee25f9f2194 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -28,8 +28,7 @@ defmodule Plausible.Stats.Timeseries do defp time_dimension(query) do case query.interval do "month" -> "time:month" - # :TODO: - "week" -> "time:month" + "week" -> "time:week" "date" -> "time:day" "hour" -> "time:hour" end @@ -57,4 +56,21 @@ defmodule Plausible.Stats.Timeseries do ) end) end + + defp empty_row(date, metrics) do + Enum.reduce(metrics, %{date: date}, fn metric, row -> + case metric do + :pageviews -> Map.merge(row, %{pageviews: 0}) + :events -> Map.merge(row, %{events: 0}) + :visitors -> Map.merge(row, %{visitors: 0}) + :visits -> Map.merge(row, %{visits: 0}) + :views_per_visit -> Map.merge(row, %{views_per_visit: 0.0}) + :conversion_rate -> Map.merge(row, %{conversion_rate: 0.0}) + :bounce_rate -> Map.merge(row, %{bounce_rate: 0.0}) + :visit_duration -> Map.merge(row, %{visit_duration: nil}) + :average_revenue -> Map.merge(row, %{average_revenue: nil}) + :total_revenue -> Map.merge(row, %{total_revenue: nil}) + end + end) + end end diff --git a/test/plausible/stats/query_optimizer_test.exs b/test/plausible/stats/query_optimizer_test.exs index aee73b88c0e9..d35b958bcd0d 100644 --- a/test/plausible/stats/query_optimizer_test.exs +++ b/test/plausible/stats/query_optimizer_test.exs @@ -74,17 +74,17 @@ defmodule Plausible.Stats.QueryOptimizerTest do assert perform(%{ date_range: Date.range(~D[2022-01-01], ~D[2022-02-16]), dimensions: ["time"] - }).dimensions == ["time:month"] + }).dimensions == ["time:week"] assert perform(%{ date_range: Date.range(~D[2022-01-01], ~D[2022-03-16]), dimensions: ["time"] - }).dimensions == ["time:month"] + }).dimensions == ["time:week"] assert perform(%{ date_range: Date.range(~D[2022-01-01], ~D[2022-03-16]), dimensions: ["time"] - }).dimensions == ["time:month"] + }).dimensions == ["time:week"] assert perform(%{ date_range: Date.range(~D[2022-01-01], ~D[2023-11-16]), From 0a43348879d12e41faf16a599ca7d7ee73ffe134 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Mon, 1 Jul 2024 14:33:32 +0300 Subject: [PATCH 19/42] cleanup: property -> dimension --- lib/plausible/stats/filters/filters.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/plausible/stats/filters/filters.ex b/lib/plausible/stats/filters/filters.ex index b6760965edf7..60c1575fc8db 100644 --- a/lib/plausible/stats/filters/filters.ex +++ b/lib/plausible/stats/filters/filters.ex @@ -83,8 +83,8 @@ defmodule Plausible.Stats.Filters do def parse(_), do: [] - def without_prefix(property) do - property + def without_prefix(dimension) do + dimension |> String.split(":") |> List.last() |> String.to_existing_atom() From 7609750dff982a7950a521ce298d6d9ee15575d0 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Mon, 1 Jul 2024 14:35:03 +0300 Subject: [PATCH 20/42] Make querying with time series work --- lib/plausible/stats/imported/base.ex | 9 +- lib/plausible/stats/imported/imported.ex | 99 ++++++++----------- .../timeseries_test.exs | 46 ++++----- 3 files changed, 71 insertions(+), 83 deletions(-) diff --git a/lib/plausible/stats/imported/base.ex b/lib/plausible/stats/imported/base.ex index 72a888d186d1..f2559e271cb5 100644 --- a/lib/plausible/stats/imported/base.ex +++ b/lib/plausible/stats/imported/base.ex @@ -29,9 +29,14 @@ defmodule Plausible.Stats.Imported.Base do "event:page" => "imported_pages", "event:name" => "imported_custom_events", - # NOTE: these properties can be only filtered by + # NOTE: these dimensions can be only filtered by "visit:screen" => "imported_devices", - "event:hostname" => "imported_pages" + "event:hostname" => "imported_pages", + + # NOTE: These dimensions are only used in group by + "time:month" => "imported_visitors", + "time:week" => "imported_visitors", + "time:day" => "imported_visitors" } @imported_custom_props Imported.imported_custom_props() diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index 6a8f34b5e4f5..e73c7c3aeb10 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -225,49 +225,6 @@ defmodule Plausible.Stats.Imported do {table, db_field} end - def merge_imported_timeseries(native_q, _, %Plausible.Stats.Query{include_imported: false}, _), - do: native_q - - def merge_imported_timeseries( - native_q, - site, - query, - metrics - ) do - imported_q = - site - |> Imported.Base.query_imported(query) - |> select_imported_metrics(metrics) - |> apply_interval(query, site) - - from(s in Ecto.Query.subquery(native_q), - full_join: i in subquery(imported_q), - on: s.date == i.date, - select: %{date: fragment("greatest(?, ?)", s.date, i.date)} - ) - |> select_joined_metrics(metrics) - end - - defp apply_interval(imported_q, %Plausible.Stats.Query{interval: "month"}, _site) do - imported_q - |> group_by([i], fragment("toStartOfMonth(?)", i.date)) - |> select_merge([i], %{date: fragment("toStartOfMonth(?)", i.date)}) - end - - defp apply_interval(imported_q, %Plausible.Stats.Query{interval: "week"} = query, _site) do - imported_q - |> group_by([i], weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone)) - |> select_merge([i], %{ - date: weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone) - }) - end - - defp apply_interval(imported_q, _query, _site) do - imported_q - |> group_by([i], i.date) - |> select_merge([i], %{date: i.date}) - end - def merge_imported(q, _, %Query{include_imported: false}, _), do: q def merge_imported(q, site, %Query{dimensions: []} = query, metrics) do @@ -554,11 +511,11 @@ defmodule Plausible.Stats.Imported do Enum.reduce(query.dimensions, q, fn dimension, q -> dim = Plausible.Stats.Filters.without_prefix(dimension) - group_imported_by(q, dim, shortname(query, dimension)) + group_imported_by(q, dim, shortname(query, dimension), query) end) end - defp group_imported_by(q, dim, key) when dim in [:source, :referrer] do + defp group_imported_by(q, dim, key, _query) when dim in [:source, :referrer] do q |> group_by([i], field(i, ^dim)) |> select_merge_as([i], %{ @@ -566,7 +523,7 @@ defmodule Plausible.Stats.Imported do }) end - defp group_imported_by(q, dim, key) + defp group_imported_by(q, dim, key, _query) when dim in [:utm_source, :utm_medium, :utm_campaign, :utm_term, :utm_content] do q |> group_by([i], field(i, ^dim)) @@ -574,34 +531,34 @@ defmodule Plausible.Stats.Imported do |> select_merge_as([i], %{key => field(i, ^dim)}) end - defp group_imported_by(q, :page, key) do + defp group_imported_by(q, :page, key, _query) do q |> group_by([i], i.page) |> select_merge_as([i], %{key => i.page, time_on_page: sum(i.time_on_page)}) end - defp group_imported_by(q, :country, key) do + defp group_imported_by(q, :country, key, _query) do q |> group_by([i], i.country) |> where([i], i.country != "ZZ") |> select_merge_as([i], %{key => i.country}) end - defp group_imported_by(q, :region, key) do + defp group_imported_by(q, :region, key, _query) do q |> group_by([i], i.region) |> where([i], i.region != "") |> select_merge_as([i], %{key => i.region}) end - defp group_imported_by(q, :city, key) do + defp group_imported_by(q, :city, key, _query) do q |> group_by([i], i.city) |> where([i], i.city != 0 and not is_nil(i.city)) |> select_merge_as([i], %{key => i.city}) end - defp group_imported_by(q, dim, key) when dim in [:device, :browser] do + defp group_imported_by(q, dim, key, _query) when dim in [:device, :browser] do q |> group_by([i], field(i, ^dim)) |> select_merge_as([i], %{ @@ -609,7 +566,7 @@ defmodule Plausible.Stats.Imported do }) end - defp group_imported_by(q, :browser_version, key) do + defp group_imported_by(q, :browser_version, key, _query) do q |> group_by([i], [i.browser_version]) |> select_merge_as([i], %{ @@ -617,7 +574,7 @@ defmodule Plausible.Stats.Imported do }) end - defp group_imported_by(q, :os, key) do + defp group_imported_by(q, :os, key, _query) do q |> group_by([i], i.operating_system) |> select_merge_as([i], %{ @@ -625,7 +582,7 @@ defmodule Plausible.Stats.Imported do }) end - defp group_imported_by(q, :os_version, key) do + defp group_imported_by(q, :os_version, key, _query) do q |> group_by([i], [i.operating_system_version]) |> select_merge_as([i], %{ @@ -639,19 +596,19 @@ defmodule Plausible.Stats.Imported do }) end - defp group_imported_by(q, dim, key) when dim in [:entry_page, :exit_page] do + defp group_imported_by(q, dim, key, _query) when dim in [:entry_page, :exit_page] do q |> group_by([i], field(i, ^dim)) |> select_merge_as([i], %{key => field(i, ^dim)}) end - defp group_imported_by(q, :name, key) do + defp group_imported_by(q, :name, key, _query) do q |> group_by([i], i.name) |> select_merge_as([i], %{key => i.name}) end - defp group_imported_by(q, :url, key) do + defp group_imported_by(q, :url, key, _query) do q |> group_by([i], i.link_url) |> select_merge_as([i], %{ @@ -659,7 +616,7 @@ defmodule Plausible.Stats.Imported do }) end - defp group_imported_by(q, :path, key) do + defp group_imported_by(q, :path, key, _query) do q |> group_by([i], i.path) |> select_merge_as([i], %{ @@ -667,6 +624,26 @@ defmodule Plausible.Stats.Imported do }) end + defp group_imported_by(q, :month, key, _query) do + q + |> group_by([i], fragment("toStartOfMonth(?)", i.date)) + |> select_merge_as([i], %{key => fragment("toStartOfMonth(?)", i.date)}) + end + + defp group_imported_by(q, :week, key, query) do + q + |> group_by([i], weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone)) + |> select_merge_as([i], %{ + key => weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone) + }) + end + + defp group_imported_by(q, :day, key, _query) do + q + |> group_by([i], i.date) + |> select_merge_as([i], %{key => i.date}) + end + defp select_joined_dimensions(q, query) do Enum.reduce(query.dimensions, q, fn dimension, q -> select_joined_dimension(q, dimension, shortname(query, dimension)) @@ -679,6 +656,12 @@ defmodule Plausible.Stats.Imported do }) end + defp select_joined_dimension(q, "time:" <> _, key) do + select_merge_as(q, [s, i], %{ + key => fragment("greatest(?, ?)", field(i, ^key), field(s, ^key)) + }) + end + defp select_joined_dimension(q, _dimension, key) do select_merge_as(q, [s, i], %{ key => fragment("if(empty(?), ?, ?)", field(s, ^key), field(i, ^key), field(s, ^key)) diff --git a/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs b/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs index 2a60ea2f74b2..bf8b39e73efb 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/timeseries_test.exs @@ -131,7 +131,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 02:00:00", @@ -139,7 +139,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 03:00:00", @@ -147,7 +147,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 04:00:00", @@ -155,7 +155,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 05:00:00", @@ -163,7 +163,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 06:00:00", @@ -171,7 +171,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 07:00:00", @@ -179,7 +179,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 08:00:00", @@ -187,7 +187,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 09:00:00", @@ -195,7 +195,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 10:00:00", @@ -203,7 +203,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 11:00:00", @@ -211,7 +211,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 12:00:00", @@ -219,7 +219,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 13:00:00", @@ -227,7 +227,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 14:00:00", @@ -235,7 +235,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 15:00:00", @@ -243,7 +243,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 16:00:00", @@ -251,7 +251,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 17:00:00", @@ -259,7 +259,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 18:00:00", @@ -267,7 +267,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 19:00:00", @@ -275,7 +275,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 20:00:00", @@ -283,7 +283,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 21:00:00", @@ -291,7 +291,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 22:00:00", @@ -299,7 +299,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 0, "pageviews" => 0, "visit_duration" => nil, - "bounce_rate" => nil + "bounce_rate" => 0 }, %{ "date" => "2021-01-01 23:00:00", @@ -1712,7 +1712,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.TimeseriesTest do "visits" => 1 }, %{ - "bounce_rate" => nil, + "bounce_rate" => 0, "date" => "2021-01-02", "events" => 0, "pageviews" => 0, From 73c79e72ead4eef7d465e6b83d38f4ea140b05d6 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 08:20:47 +0300 Subject: [PATCH 21/42] Refactor: Move special metrics (percentage, conversion rate) to own module --- lib/plausible/stats/base.ex | 80 ------------ lib/plausible/stats/sql/fragments.ex | 15 +++ lib/plausible/stats/sql/query_builder.ex | 89 +------------ lib/plausible/stats/sql/special_metrics.ex | 142 +++++++++++++++++++++ 4 files changed, 159 insertions(+), 167 deletions(-) create mode 100644 lib/plausible/stats/sql/special_metrics.ex diff --git a/lib/plausible/stats/base.ex b/lib/plausible/stats/base.ex index 5b8f576877a7..cb104ffc76ed 100644 --- a/lib/plausible/stats/base.ex +++ b/lib/plausible/stats/base.ex @@ -139,84 +139,4 @@ defmodule Plausible.Stats.Base do "^#{escaped}$" end - - defp total_visitors(site, query) do - base_event_query(site, query) - |> select([e], - total_visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id) - ) - end - - # `total_visitors_subquery` returns a subquery which selects `total_visitors` - - # the number used as the denominator in the calculation of `conversion_rate` and - # `percentage` metrics. - - # Usually, when calculating the totals, a new query is passed into this function, - # where certain filters (e.g. goal, props) are removed. That might make the query - # able to include imported data. However, we always want to include imported data - # only if it's included in the base query - otherwise the total will be based on - # a different data set, making the metric inaccurate. This is why we're using an - # explicit `include_imported` argument here. - def total_visitors_subquery(site, query, include_imported) - - def total_visitors_subquery(site, query, true = _include_imported) do - wrap_expression([], %{ - total_visitors: - subquery(total_visitors(site, query)) + - subquery(Plausible.Stats.Imported.total_imported_visitors(site, query)) - }) - end - - def total_visitors_subquery(site, query, false = _include_imported) do - wrap_expression([], %{ - total_visitors: subquery(total_visitors(site, query)) - }) - end - - def add_percentage_metric(q, site, query, metrics) do - if :percentage in metrics do - total_query = Query.set_dimensions(query, []) - - q - |> select_merge_as([], total_visitors_subquery(site, total_query, query.include_imported)) - |> select_merge_as([], %{ - percentage: - fragment( - "if(? > 0, round(? / ? * 100, 1), null)", - selected_as(:total_visitors), - selected_as(:visitors), - selected_as(:total_visitors) - ) - }) - else - q - end - end - - # Adds conversion_rate metric to query, calculated as - # X / Y where Y is the same breakdown value without goal or props - # filters. - def maybe_add_conversion_rate(q, site, query, metrics) do - if :conversion_rate in metrics do - total_query = - query - |> Query.remove_filters(["event:goal", "event:props"]) - |> Query.set_dimensions([]) - - # :TRICKY: Subquery is used due to event:goal breakdown above doing an UNION ALL - subquery(q) - |> select_merge_as([], total_visitors_subquery(site, total_query, query.include_imported)) - |> select_merge_as([e], %{ - conversion_rate: - fragment( - "if(? > 0, round(? / ? * 100, 1), 0)", - selected_as(:total_visitors), - e.visitors, - selected_as(:total_visitors) - ) - }) - else - q - end - end end diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index fbc9dd2b0647..9399190fcb51 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -203,4 +203,19 @@ defmodule Plausible.Stats.SQL.Fragments do end defp update_literal_map_values(ast, _), do: ast + + @doc """ + Macro that helps join two Ecto queries by selecting fields from either one + """ + defmacro select_join_fields(q, query, list, table_name) do + quote do + Enum.reduce(unquote(list), unquote(q), fn metric_or_dimension, q -> + key = shortname(unquote(query), metric_or_dimension) + + select_merge_as(q, [e, s], %{ + key => field(unquote(table_name), ^key) + }) + end) + end + end end diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 8cf92725bcac..99845f601075 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -43,9 +43,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do |> join_sessions_if_needed(site, events_query) |> build_group_by(events_query) |> merge_imported(site, events_query, events_query.metrics) - |> maybe_add_global_conversion_rate(site, events_query) - |> maybe_add_group_conversion_rate(site, events_query) - |> Base.add_percentage_metric(site, events_query, events_query.metrics) + |> SQL.SpecialMetrics.add(site, events_query) end defp join_sessions_if_needed(q, site, query) do @@ -86,9 +84,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do |> join_events_if_needed(site, sessions_query) |> build_group_by(sessions_query) |> merge_imported(site, sessions_query, sessions_query.metrics) - |> maybe_add_global_conversion_rate(site, sessions_query) - |> maybe_add_group_conversion_rate(site, sessions_query) - |> Base.add_percentage_metric(site, sessions_query, sessions_query.metrics) + |> SQL.SpecialMetrics.add(site, sessions_query) end def join_events_if_needed(q, site, query) do @@ -155,87 +151,6 @@ defmodule Plausible.Stats.SQL.QueryBuilder do ) end - defmacrop select_join_fields(q, query, list, table_name) do - quote do - Enum.reduce(unquote(list), unquote(q), fn metric_or_dimension, q -> - key = shortname(unquote(query), metric_or_dimension) - - select_merge_as(q, [e, s], %{ - key => field(unquote(table_name), ^key) - }) - end) - end - end - - # Adds conversion_rate metric to query, calculated as - # X / Y where Y is the same breakdown value without goal or props - # filters. - def maybe_add_global_conversion_rate(q, site, query) do - if :conversion_rate in query.metrics do - total_query = - query - |> Query.remove_filters(["event:goal", "event:props"]) - |> Query.set_dimensions([]) - - q - |> select_merge_as( - [], - Base.total_visitors_subquery(site, total_query, query.include_imported) - ) - |> select_merge_as([e], %{ - conversion_rate: - fragment( - "if(? > 0, round(? / ? * 100, 1), 0)", - selected_as(:total_visitors), - selected_as(:visitors), - selected_as(:total_visitors) - ) - }) - else - q - end - end - - # This function injects a group_conversion_rate metric into - # a dimensional query. It is calculated as X / Y, where: - # - # * X is the number of conversions for a set of dimensions - # result (conversion = number of visitors who - # completed the filtered goal with the filtered - # custom properties). - # - # * Y is the number of all visitors for this set of dimensions - # result without the `event:goal` and `event:props:*` - # filters. - def maybe_add_group_conversion_rate(q, site, query) do - if :group_conversion_rate in query.metrics do - group_totals_query = - query - |> Query.remove_filters(["event:goal", "event:props"]) - |> Query.set_metrics([:visitors]) - |> Query.set_order_by([]) - - from(e in subquery(q), - left_join: c in subquery(build(group_totals_query, site)), - on: ^build_group_by_join(query) - ) - |> select_merge_as([e, c], %{ - total_visitors: c.visitors, - group_conversion_rate: - fragment( - "if(? > 0, round(? / ? * 100, 1), 0)", - c.visitors, - e.visitors, - c.visitors - ) - }) - |> select_join_fields(query, query.dimensions, e) - |> select_join_fields(query, List.delete(query.metrics, :group_conversion_rate), e) - else - q - end - end - defp join_query_results({nil, _}, {nil, _}), do: nil defp join_query_results({events_q, events_query}, {nil, _}), diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex new file mode 100644 index 000000000000..f2465e97735f --- /dev/null +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -0,0 +1,142 @@ +defmodule Plausible.Stats.SQL.SpecialMetrics do + @doc """ + This module defines how special metrics like `conversion_rate` and + `percentage` are calculated. + """ + + use Plausible.Stats.SQL.Fragments + + alias Plausible.Stats.{Base, Query, SQL} + + import Ecto.Query + import Plausible.Stats.Util + + def add(q, site, query) do + q + |> maybe_add_percentage_metric(site, query) + |> maybe_add_global_conversion_rate(site, query) + |> maybe_add_group_conversion_rate(site, query) + end + + defp maybe_add_percentage_metric(q, site, query) do + if :percentage in query.metrics do + total_query = Query.set_dimensions(query, []) + + q + |> select_merge_as([], total_visitors_subquery(site, total_query, query.include_imported)) + |> select_merge_as([], %{ + percentage: + fragment( + "if(? > 0, round(? / ? * 100, 1), null)", + selected_as(:total_visitors), + selected_as(:visitors), + selected_as(:total_visitors) + ) + }) + else + q + end + end + + # Adds conversion_rate metric to query, calculated as + # X / Y where Y is the same breakdown value without goal or props + # filters. + def maybe_add_global_conversion_rate(q, site, query) do + if :conversion_rate in query.metrics do + total_query = + query + |> Query.remove_filters(["event:goal", "event:props"]) + |> Query.set_dimensions([]) + + q + |> select_merge_as( + [], + total_visitors_subquery(site, total_query, query.include_imported) + ) + |> select_merge_as([e], %{ + conversion_rate: + fragment( + "if(? > 0, round(? / ? * 100, 1), 0)", + selected_as(:total_visitors), + selected_as(:visitors), + selected_as(:total_visitors) + ) + }) + else + q + end + end + + # This function injects a group_conversion_rate metric into + # a dimensional query. It is calculated as X / Y, where: + # + # * X is the number of conversions for a set of dimensions + # result (conversion = number of visitors who + # completed the filtered goal with the filtered + # custom properties). + # + # * Y is the number of all visitors for this set of dimensions + # result without the `event:goal` and `event:props:*` + # filters. + def maybe_add_group_conversion_rate(q, site, query) do + if :group_conversion_rate in query.metrics do + group_totals_query = + query + |> Query.remove_filters(["event:goal", "event:props"]) + |> Query.set_metrics([:visitors]) + |> Query.set_order_by([]) + + from(e in subquery(q), + left_join: c in subquery(SQL.QueryBuilder.build(group_totals_query, site)), + on: ^SQL.QueryBuilder.build_group_by_join(query) + ) + |> select_merge_as([e, c], %{ + total_visitors: c.visitors, + group_conversion_rate: + fragment( + "if(? > 0, round(? / ? * 100, 1), 0)", + c.visitors, + e.visitors, + c.visitors + ) + }) + |> select_join_fields(query, query.dimensions, e) + |> select_join_fields(query, List.delete(query.metrics, :group_conversion_rate), e) + else + q + end + end + + defp total_visitors(site, query) do + Base.base_event_query(site, query) + |> select([e], + total_visitors: fragment("toUInt64(round(uniq(?) * any(_sample_factor)))", e.user_id) + ) + end + + # `total_visitors_subquery` returns a subquery which selects `total_visitors` - + # the number used as the denominator in the calculation of `conversion_rate` and + # `percentage` metrics. + + # Usually, when calculating the totals, a new query is passed into this function, + # where certain filters (e.g. goal, props) are removed. That might make the query + # able to include imported data. However, we always want to include imported data + # only if it's included in the base query - otherwise the total will be based on + # a different data set, making the metric inaccurate. This is why we're using an + # explicit `include_imported` argument here. + defp total_visitors_subquery(site, query, include_imported) + + defp total_visitors_subquery(site, query, true = _include_imported) do + wrap_expression([], %{ + total_visitors: + subquery(total_visitors(site, query)) + + subquery(Plausible.Stats.Imported.total_imported_visitors(site, query)) + }) + end + + defp total_visitors_subquery(site, query, false = _include_imported) do + wrap_expression([], %{ + total_visitors: subquery(total_visitors(site, query)) + }) + end +end From 2e24a856ccaaa5e0bce374db4a785b8864f16c92 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 10:38:47 +0300 Subject: [PATCH 22/42] Explicitly format datetimes --- lib/plausible/stats/interval.ex | 8 ++ lib/plausible/stats/query_result.ex | 6 + test/plausible/stats/interval_test.exs | 188 ++++++++++++------------- 3 files changed, 108 insertions(+), 94 deletions(-) diff --git a/lib/plausible/stats/interval.ex b/lib/plausible/stats/interval.ex index 2335469eac21..ceaf3a07c07a 100644 --- a/lib/plausible/stats/interval.ex +++ b/lib/plausible/stats/interval.ex @@ -104,6 +104,11 @@ defmodule Plausible.Stats.Interval do interval in Map.get(valid_by_period(opts), period, []) end + def format_datetime(%Date{} = date), do: Date.to_string(date) + + def format_datetime(%DateTime{} = datetime), + do: Timex.format!(datetime, "{YYYY}-{0M}-{0D} {h24}:{m}:{s}") + @doc """ Returns list of time bucket labels for the given query. """ @@ -127,12 +132,14 @@ defmodule Plausible.Stats.Interval do query.date_range.last |> Date.beginning_of_month() |> Timex.shift(months: -shift) + |> format_datetime() end) end defp time_labels_for_dimension("time:day", query) do query.date_range |> Enum.into([]) + |> Enum.map(&format_datetime/1) end @full_day_in_hours 23 @@ -154,6 +161,7 @@ defmodule Plausible.Stats.Interval do |> Timex.to_datetime() |> Timex.shift(hours: step) |> DateTime.truncate(:second) + |> format_datetime() end) end end diff --git a/lib/plausible/stats/query_result.ex b/lib/plausible/stats/query_result.ex index 6f4c0a3b5976..d09068973375 100644 --- a/lib/plausible/stats/query_result.ex +++ b/lib/plausible/stats/query_result.ex @@ -46,6 +46,12 @@ defmodule Plausible.Stats.QueryResult do end end + defp dimension_label("time:" <> _ = time_dimension, entry, query) do + datetime = Map.get(entry, Util.shortname(query, time_dimension)) + + Interval.format_datetime(datetime) + end + defp dimension_label(dimension, entry, query) do Map.get(entry, Util.shortname(query, dimension)) end diff --git a/test/plausible/stats/interval_test.exs b/test/plausible/stats/interval_test.exs index 4c2169629b21..dfcf0b5901cc 100644 --- a/test/plausible/stats/interval_test.exs +++ b/test/plausible/stats/interval_test.exs @@ -130,17 +130,17 @@ defmodule Plausible.Stats.IntervalTest do dimensions: ["visit:device", "time:month"], date_range: Date.range(~D[2022-01-17], ~D[2022-02-01]) }) == [ - ~D[2022-01-01], - ~D[2022-02-01] + "2022-01-01", + "2022-02-01" ] assert time_labels(%{ dimensions: ["visit:device", "time:month"], date_range: Date.range(~D[2022-01-01], ~D[2022-03-07]) }) == [ - ~D[2022-01-01], - ~D[2022-02-01], - ~D[2022-03-01] + "2022-01-01", + "2022-02-01", + "2022-03-01" ] end @@ -149,23 +149,23 @@ defmodule Plausible.Stats.IntervalTest do dimensions: ["time:day"], date_range: Date.range(~D[2022-01-17], ~D[2022-02-02]) }) == [ - ~D[2022-01-17], - ~D[2022-01-18], - ~D[2022-01-19], - ~D[2022-01-20], - ~D[2022-01-21], - ~D[2022-01-22], - ~D[2022-01-23], - ~D[2022-01-24], - ~D[2022-01-25], - ~D[2022-01-26], - ~D[2022-01-27], - ~D[2022-01-28], - ~D[2022-01-29], - ~D[2022-01-30], - ~D[2022-01-31], - ~D[2022-02-01], - ~D[2022-02-02] + "2022-01-17", + "2022-01-18", + "2022-01-19", + "2022-01-20", + "2022-01-21", + "2022-01-22", + "2022-01-23", + "2022-01-24", + "2022-01-25", + "2022-01-26", + "2022-01-27", + "2022-01-28", + "2022-01-29", + "2022-01-30", + "2022-01-31", + "2022-02-01", + "2022-02-02" ] end @@ -174,84 +174,84 @@ defmodule Plausible.Stats.IntervalTest do dimensions: ["time:hour"], date_range: Date.range(~D[2022-01-17], ~D[2022-01-17]) }) == [ - ~U[2022-01-17 00:00:00Z], - ~U[2022-01-17 01:00:00Z], - ~U[2022-01-17 02:00:00Z], - ~U[2022-01-17 03:00:00Z], - ~U[2022-01-17 04:00:00Z], - ~U[2022-01-17 05:00:00Z], - ~U[2022-01-17 06:00:00Z], - ~U[2022-01-17 07:00:00Z], - ~U[2022-01-17 08:00:00Z], - ~U[2022-01-17 09:00:00Z], - ~U[2022-01-17 10:00:00Z], - ~U[2022-01-17 11:00:00Z], - ~U[2022-01-17 12:00:00Z], - ~U[2022-01-17 13:00:00Z], - ~U[2022-01-17 14:00:00Z], - ~U[2022-01-17 15:00:00Z], - ~U[2022-01-17 16:00:00Z], - ~U[2022-01-17 17:00:00Z], - ~U[2022-01-17 18:00:00Z], - ~U[2022-01-17 19:00:00Z], - ~U[2022-01-17 20:00:00Z], - ~U[2022-01-17 21:00:00Z], - ~U[2022-01-17 22:00:00Z], - ~U[2022-01-17 23:00:00Z] + "2022-01-17 00:00:00", + "2022-01-17 01:00:00", + "2022-01-17 02:00:00", + "2022-01-17 03:00:00", + "2022-01-17 04:00:00", + "2022-01-17 05:00:00", + "2022-01-17 06:00:00", + "2022-01-17 07:00:00", + "2022-01-17 08:00:00", + "2022-01-17 09:00:00", + "2022-01-17 10:00:00", + "2022-01-17 11:00:00", + "2022-01-17 12:00:00", + "2022-01-17 13:00:00", + "2022-01-17 14:00:00", + "2022-01-17 15:00:00", + "2022-01-17 16:00:00", + "2022-01-17 17:00:00", + "2022-01-17 18:00:00", + "2022-01-17 19:00:00", + "2022-01-17 20:00:00", + "2022-01-17 21:00:00", + "2022-01-17 22:00:00", + "2022-01-17 23:00:00" ] assert time_labels(%{ dimensions: ["time:hour"], date_range: Date.range(~D[2022-01-17], ~D[2022-01-18]) }) == [ - ~U[2022-01-17 00:00:00Z], - ~U[2022-01-17 01:00:00Z], - ~U[2022-01-17 02:00:00Z], - ~U[2022-01-17 03:00:00Z], - ~U[2022-01-17 04:00:00Z], - ~U[2022-01-17 05:00:00Z], - ~U[2022-01-17 06:00:00Z], - ~U[2022-01-17 07:00:00Z], - ~U[2022-01-17 08:00:00Z], - ~U[2022-01-17 09:00:00Z], - ~U[2022-01-17 10:00:00Z], - ~U[2022-01-17 11:00:00Z], - ~U[2022-01-17 12:00:00Z], - ~U[2022-01-17 13:00:00Z], - ~U[2022-01-17 14:00:00Z], - ~U[2022-01-17 15:00:00Z], - ~U[2022-01-17 16:00:00Z], - ~U[2022-01-17 17:00:00Z], - ~U[2022-01-17 18:00:00Z], - ~U[2022-01-17 19:00:00Z], - ~U[2022-01-17 20:00:00Z], - ~U[2022-01-17 21:00:00Z], - ~U[2022-01-17 22:00:00Z], - ~U[2022-01-17 23:00:00Z], - ~U[2022-01-18 00:00:00Z], - ~U[2022-01-18 01:00:00Z], - ~U[2022-01-18 02:00:00Z], - ~U[2022-01-18 03:00:00Z], - ~U[2022-01-18 04:00:00Z], - ~U[2022-01-18 05:00:00Z], - ~U[2022-01-18 06:00:00Z], - ~U[2022-01-18 07:00:00Z], - ~U[2022-01-18 08:00:00Z], - ~U[2022-01-18 09:00:00Z], - ~U[2022-01-18 10:00:00Z], - ~U[2022-01-18 11:00:00Z], - ~U[2022-01-18 12:00:00Z], - ~U[2022-01-18 13:00:00Z], - ~U[2022-01-18 14:00:00Z], - ~U[2022-01-18 15:00:00Z], - ~U[2022-01-18 16:00:00Z], - ~U[2022-01-18 17:00:00Z], - ~U[2022-01-18 18:00:00Z], - ~U[2022-01-18 19:00:00Z], - ~U[2022-01-18 20:00:00Z], - ~U[2022-01-18 21:00:00Z], - ~U[2022-01-18 22:00:00Z], - ~U[2022-01-18 23:00:00Z] + "2022-01-17 00:00:00", + "2022-01-17 01:00:00", + "2022-01-17 02:00:00", + "2022-01-17 03:00:00", + "2022-01-17 04:00:00", + "2022-01-17 05:00:00", + "2022-01-17 06:00:00", + "2022-01-17 07:00:00", + "2022-01-17 08:00:00", + "2022-01-17 09:00:00", + "2022-01-17 10:00:00", + "2022-01-17 11:00:00", + "2022-01-17 12:00:00", + "2022-01-17 13:00:00", + "2022-01-17 14:00:00", + "2022-01-17 15:00:00", + "2022-01-17 16:00:00", + "2022-01-17 17:00:00", + "2022-01-17 18:00:00", + "2022-01-17 19:00:00", + "2022-01-17 20:00:00", + "2022-01-17 21:00:00", + "2022-01-17 22:00:00", + "2022-01-17 23:00:00", + "2022-01-18 00:00:00", + "2022-01-18 01:00:00", + "2022-01-18 02:00:00", + "2022-01-18 03:00:00", + "2022-01-18 04:00:00", + "2022-01-18 05:00:00", + "2022-01-18 06:00:00", + "2022-01-18 07:00:00", + "2022-01-18 08:00:00", + "2022-01-18 09:00:00", + "2022-01-18 10:00:00", + "2022-01-18 11:00:00", + "2022-01-18 12:00:00", + "2022-01-18 13:00:00", + "2022-01-18 14:00:00", + "2022-01-18 15:00:00", + "2022-01-18 16:00:00", + "2022-01-18 17:00:00", + "2022-01-18 18:00:00", + "2022-01-18 19:00:00", + "2022-01-18 20:00:00", + "2022-01-18 21:00:00", + "2022-01-18 22:00:00", + "2022-01-18 23:00:00" ] end end From 5d355be4467bfffa78fa42e42a2867e8cd45d962 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 10:57:57 +0300 Subject: [PATCH 23/42] Consistent include_imported in special metrics --- lib/plausible/stats/query.ex | 10 +++++++--- lib/plausible/stats/sql/special_metrics.ex | 18 ++++++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index ee336ffa39a9..56568f01fbbb 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -230,9 +230,13 @@ defmodule Plausible.Stats.Query do end def set(query, keywords) do - query - |> struct!(keywords) - |> refresh_imported_opts() + new_query = struct!(query, keywords) + + if Keyword.has_key?(keywords, :include_imported) do + new_query + else + refresh_imported_opts(new_query) + end end @spec set_dimensions(t(), list(String.t())) :: t() diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex index f2465e97735f..29eaec275d36 100644 --- a/lib/plausible/stats/sql/special_metrics.ex +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -20,7 +20,11 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do defp maybe_add_percentage_metric(q, site, query) do if :percentage in query.metrics do - total_query = Query.set_dimensions(query, []) + total_query = + Query.set(query, + dimensions: [], + include_imported: query.include_imported + ) q |> select_merge_as([], total_visitors_subquery(site, total_query, query.include_imported)) @@ -46,7 +50,10 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do total_query = query |> Query.remove_filters(["event:goal", "event:props"]) - |> Query.set_dimensions([]) + |> Query.set( + dimensions: [], + include_imported: query.include_imported + ) q |> select_merge_as( @@ -83,8 +90,11 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do group_totals_query = query |> Query.remove_filters(["event:goal", "event:props"]) - |> Query.set_metrics([:visitors]) - |> Query.set_order_by([]) + |> Query.set( + metrics: [:visitors], + order_by: [], + include_imported: query.include_imported + ) from(e in subquery(q), left_join: c in subquery(SQL.QueryBuilder.build(group_totals_query, site)), From 87edab1373c1432edbe699b6a275ab873153b2a3 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 13:05:44 +0300 Subject: [PATCH 24/42] Solve week-related crash --- lib/plausible/stats/imported/imported.ex | 4 ++-- lib/plausible/stats/sql/expression.ex | 6 +++++- lib/plausible/stats/sql/fragments.ex | 13 ------------- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index e73c7c3aeb10..a40d8748f466 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -632,9 +632,9 @@ defmodule Plausible.Stats.Imported do defp group_imported_by(q, :week, key, query) do q - |> group_by([i], weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone)) + |> group_by([i], weekstart_not_before(i.date, ^query.date_range.first)) |> select_merge_as([i], %{ - key => weekstart_not_before(i.date, ^query.date_range.first, ^query.timezone) + key => weekstart_not_before(i.date, ^query.date_range.first) }) end diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index d63942c0a02a..bfbc0b1c222a 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -40,7 +40,11 @@ defmodule Plausible.Stats.SQL.Expression do def dimension(key, "time:week", query) do wrap_expression([t], %{ - key => weekstart_not_before(t.timestamp, ^query.date_range.first, ^query.timezone) + key => + weekstart_not_before( + to_timezone(t.timestamp, ^query.timezone), + ^query.date_range.first + ) }) end diff --git a/lib/plausible/stats/sql/fragments.ex b/lib/plausible/stats/sql/fragments.ex index 9399190fcb51..92cfcc510000 100644 --- a/lib/plausible/stats/sql/fragments.ex +++ b/lib/plausible/stats/sql/fragments.ex @@ -97,19 +97,6 @@ defmodule Plausible.Stats.SQL.Fragments do end end - @doc """ - Same as Plausible.Stats.SQL.Fragments.weekstart_not_before/2 but converts dates to - the specified timezone. - """ - defmacro weekstart_not_before(date, not_before, timezone) do - quote do - weekstart_not_before( - to_timezone(unquote(date), unquote(timezone)), - to_timezone(unquote(not_before), unquote(timezone)) - ) - end - end - @doc """ Returns whether a key (usually property) exists under `meta.key` array or similar. From 5f222398443cead4282fcfe79e1324cc433ac00f Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 13:09:15 +0300 Subject: [PATCH 25/42] conversion_rate hacking --- lib/plausible/stats/timeseries.ex | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index fee25f9f2194..c48cf6ffe1b4 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -6,7 +6,7 @@ defmodule Plausible.Stats.Timeseries do query_with_metrics = Query.set( query, - metrics: metrics, + metrics: transform_metrics(metrics, %{conversion_rate: :group_conversion_rate}), dimensions: [time_dimension(query)], order_by: [{time_dimension(query), :asc}], v2: true, @@ -14,15 +14,13 @@ defmodule Plausible.Stats.Timeseries do ) |> QueryOptimizer.optimize() - IO.inspect(query_with_metrics) - q = SQL.QueryBuilder.build(query_with_metrics, site) q - |> IO.inspect() |> ClickhouseRepo.all() |> QueryResult.from(query_with_metrics) |> build_timeseries_result(query_with_metrics) + |> transform_keys(%{group_conversion_rate: :conversion_rate}) end defp time_dimension(query) do @@ -66,6 +64,7 @@ defmodule Plausible.Stats.Timeseries do :visits -> Map.merge(row, %{visits: 0}) :views_per_visit -> Map.merge(row, %{views_per_visit: 0.0}) :conversion_rate -> Map.merge(row, %{conversion_rate: 0.0}) + :group_conversion_rate -> Map.merge(row, %{group_conversion_rate: 0.0}) :bounce_rate -> Map.merge(row, %{bounce_rate: 0.0}) :visit_duration -> Map.merge(row, %{visit_duration: nil}) :average_revenue -> Map.merge(row, %{average_revenue: nil}) @@ -73,4 +72,17 @@ defmodule Plausible.Stats.Timeseries do end end) end + + defp transform_metrics(metrics, to_replace) do + Enum.map(metrics, &Map.get(to_replace, &1, &1)) + end + + defp transform_keys(results, keys_to_replace) do + Enum.map(results, fn map -> + Enum.map(map, fn {key, val} -> + {Map.get(keys_to_replace, key, key), val} + end) + |> Enum.into(%{}) + end) + end end From 618525f049b889b4fa7c5a513ba8793aa9c42f09 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 13:20:59 +0300 Subject: [PATCH 26/42] Keep include_imported consistent after splitting the query --- lib/plausible/stats/imported/base.ex | 9 ++++++--- lib/plausible/stats/query_optimizer.ex | 9 +++++++-- lib/plausible/stats/timeseries.ex | 16 ++++++++-------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/lib/plausible/stats/imported/base.ex b/lib/plausible/stats/imported/base.ex index f2559e271cb5..bd4c097ec8b9 100644 --- a/lib/plausible/stats/imported/base.ex +++ b/lib/plausible/stats/imported/base.ex @@ -202,9 +202,12 @@ defmodule Plausible.Stats.Imported.Base do filters |> Enum.map(fn [_, filter_key | _] -> filter_key end) |> Enum.concat(dimensions) - |> Enum.map(fn - "visit:screen" -> "visit:device" - dimension -> dimension + |> Enum.flat_map(fn + "time:month" -> [] + "time:week" -> [] + "time:day" -> [] + "visit:screen" -> ["visit:device"] + dimension -> [dimension] end) |> Enum.map(&@property_to_table_mappings[&1]) diff --git a/lib/plausible/stats/query_optimizer.ex b/lib/plausible/stats/query_optimizer.ex index c8a7de9165b6..5f3bf706f898 100644 --- a/lib/plausible/stats/query_optimizer.ex +++ b/lib/plausible/stats/query_optimizer.ex @@ -33,7 +33,7 @@ defmodule Plausible.Stats.QueryOptimizer do |> TableDecider.partition_metrics(query) { - Query.set_metrics(query, event_metrics), + Query.set(query, metrics: event_metrics, include_imported: query.include_imported), split_sessions_query(query, sessions_metrics) } end @@ -161,6 +161,11 @@ defmodule Plausible.Stats.QueryOptimizer do query.filters end - Query.set(query, filters: filters, metrics: session_metrics, dimensions: dimensions) + Query.set(query, + filters: filters, + metrics: session_metrics, + dimensions: dimensions, + include_imported: query.include_imported + ) end end diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index c48cf6ffe1b4..57e2a537ef9f 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -2,6 +2,13 @@ defmodule Plausible.Stats.Timeseries do use Plausible.ClickhouseRepo alias Plausible.Stats.{Query, QueryOptimizer, QueryResult, SQL} + @time_dimension %{ + "month" => "time:month", + "week" => "time:week", + "date" => "time:day", + "hour" => "time:hour" + } + def timeseries(site, query, metrics) do query_with_metrics = Query.set( @@ -23,14 +30,7 @@ defmodule Plausible.Stats.Timeseries do |> transform_keys(%{group_conversion_rate: :conversion_rate}) end - defp time_dimension(query) do - case query.interval do - "month" -> "time:month" - "week" -> "time:week" - "date" -> "time:day" - "hour" -> "time:hour" - end - end + defp time_dimension(query), do: Map.fetch!(@time_dimension, query.interval) defp build_timeseries_result(query_result, query) do results_map = From 1d2bd2f4e1a75853e40ede99faf9aeb5d9fdf91b Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 14:15:29 +0300 Subject: [PATCH 27/42] Simplify do_decide_tables --- lib/plausible/stats/imported/base.ex | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/lib/plausible/stats/imported/base.ex b/lib/plausible/stats/imported/base.ex index bd4c097ec8b9..6f6343225f5b 100644 --- a/lib/plausible/stats/imported/base.ex +++ b/lib/plausible/stats/imported/base.ex @@ -174,14 +174,6 @@ defmodule Plausible.Stats.Imported.Base do ["imported_pages", "imported_custom_events"] end - defp do_decide_tables(%Query{filters: [], dimensions: [dimension]}) do - if Map.has_key?(@property_to_table_mappings, dimension) do - [@property_to_table_mappings[dimension]] - else - [] - end - end - defp do_decide_tables(%Query{filters: filters, dimensions: ["event:goal"]}) do filter_props = Enum.map(filters, &Enum.at(&1, 1)) @@ -212,6 +204,7 @@ defmodule Plausible.Stats.Imported.Base do |> Enum.map(&@property_to_table_mappings[&1]) case Enum.uniq(table_candidates) do + [] -> ["imported_visitors"] [nil] -> [] [candidate] -> [candidate] _ -> [] From 01a99eb8f6f6f1240993ba52e4d047ff211a4b6e Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 14:21:03 +0300 Subject: [PATCH 28/42] Handle time dimensions in imports cleaner --- lib/plausible/stats/imported/base.ex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/plausible/stats/imported/base.ex b/lib/plausible/stats/imported/base.ex index 6f6343225f5b..3057f7cce562 100644 --- a/lib/plausible/stats/imported/base.ex +++ b/lib/plausible/stats/imported/base.ex @@ -39,6 +39,8 @@ defmodule Plausible.Stats.Imported.Base do "time:day" => "imported_visitors" } + @queriable_time_dimensions ["time:month", "time:week", "time:day"] + @imported_custom_props Imported.imported_custom_props() @db_field_mappings %{ @@ -194,10 +196,8 @@ defmodule Plausible.Stats.Imported.Base do filters |> Enum.map(fn [_, filter_key | _] -> filter_key end) |> Enum.concat(dimensions) + |> Enum.reject(&(&1 in @queriable_time_dimensions)) |> Enum.flat_map(fn - "time:month" -> [] - "time:week" -> [] - "time:day" -> [] "visit:screen" -> ["visit:device"] dimension -> [dimension] end) From 794aed1bf4dc6c0b5d11a6667552f1d0f9f15585 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 14:30:07 +0300 Subject: [PATCH 29/42] Allow time dimensions in custom property queries --- lib/plausible/stats/imported/base.ex | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/plausible/stats/imported/base.ex b/lib/plausible/stats/imported/base.ex index 3057f7cce562..34916de888a1 100644 --- a/lib/plausible/stats/imported/base.ex +++ b/lib/plausible/stats/imported/base.ex @@ -128,9 +128,10 @@ defmodule Plausible.Stats.Imported.Base do do_decide_custom_prop_table(query, dimension) end + @queriable_custom_prop_dimensions ["event:goal", "event:name"] ++ @queriable_time_dimensions defp do_decide_custom_prop_table(%{dimensions: dimensions} = query) do if dimensions == [] or - (length(dimensions) == 1 and hd(dimensions) in ["event:goal", "event:name"]) do + (length(dimensions) == 1 and hd(dimensions) in @queriable_custom_prop_dimensions) do custom_prop_filters = query.filters |> Enum.map(&Enum.at(&1, 1)) From 44d2b0fd2b562752f8e56497ed2d34793180c34f Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 2 Jul 2024 16:26:35 +0300 Subject: [PATCH 30/42] time:week handling continued --- lib/plausible/stats/interval.ex | 26 ++++++ test/plausible/stats/interval_test.exs | 20 ++++ .../external_stats_controller/query_test.exs | 93 ++++++++++++------- 3 files changed, 107 insertions(+), 32 deletions(-) diff --git a/lib/plausible/stats/interval.ex b/lib/plausible/stats/interval.ex index ceaf3a07c07a..abd1032f4a30 100644 --- a/lib/plausible/stats/interval.ex +++ b/lib/plausible/stats/interval.ex @@ -136,6 +136,22 @@ defmodule Plausible.Stats.Interval do end) end + defp time_labels_for_dimension("time:week", query) do + n_buckets = + Timex.diff( + query.date_range.last, + Date.beginning_of_week(query.date_range.first), + :weeks + ) + + Enum.map(0..n_buckets, fn shift -> + query.date_range.first + |> Timex.shift(weeks: shift) + |> date_or_weekstart(query) + |> format_datetime() + end) + end + defp time_labels_for_dimension("time:day", query) do query.date_range |> Enum.into([]) @@ -164,4 +180,14 @@ defmodule Plausible.Stats.Interval do |> format_datetime() end) end + + defp date_or_weekstart(date, query) do + weekstart = Timex.beginning_of_week(date) + + if Enum.member?(query.date_range, weekstart) do + weekstart + else + date + end + end end diff --git a/test/plausible/stats/interval_test.exs b/test/plausible/stats/interval_test.exs index dfcf0b5901cc..57d15969ffa8 100644 --- a/test/plausible/stats/interval_test.exs +++ b/test/plausible/stats/interval_test.exs @@ -144,6 +144,26 @@ defmodule Plausible.Stats.IntervalTest do ] end + test "with time:week dimension" do + assert time_labels(%{ + dimensions: ["time:week"], + date_range: Date.range(~D[2020-12-20], ~D[2021-01-08]) + }) == [ + "2020-12-20", + "2020-12-21", + "2020-12-28", + "2021-01-04" + ] + + assert time_labels(%{ + dimensions: ["time:week"], + date_range: Date.range(~D[2020-12-21], ~D[2021-01-03]) + }) == [ + "2020-12-21", + "2020-12-28" + ] + end + test "with time:day dimension" do assert time_labels(%{ dimensions: ["time:day"], diff --git a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs index b4c2698cc128..390dc6da9b88 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs @@ -1113,35 +1113,35 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do }) assert json_response(conn, 200)["results"] == [ - %{"dimensions" => ["2021-01-01T00:00:00Z"], "metrics" => [1, 2, 1, 600, 0]}, - %{"dimensions" => ["2021-01-01T23:00:00Z"], "metrics" => [1, 1, 1, 0, 100]} + %{"dimensions" => ["2021-01-01 00:00:00"], "metrics" => [1, 2, 1, 600, 0]}, + %{"dimensions" => ["2021-01-01 23:00:00"], "metrics" => [1, 1, 1, 0, 100]} ] assert json_response(conn, 200)["meta"]["time_labels"] == [ - "2021-01-01T00:00:00Z", - "2021-01-01T01:00:00Z", - "2021-01-01T02:00:00Z", - "2021-01-01T03:00:00Z", - "2021-01-01T04:00:00Z", - "2021-01-01T05:00:00Z", - "2021-01-01T06:00:00Z", - "2021-01-01T07:00:00Z", - "2021-01-01T08:00:00Z", - "2021-01-01T09:00:00Z", - "2021-01-01T10:00:00Z", - "2021-01-01T11:00:00Z", - "2021-01-01T12:00:00Z", - "2021-01-01T13:00:00Z", - "2021-01-01T14:00:00Z", - "2021-01-01T15:00:00Z", - "2021-01-01T16:00:00Z", - "2021-01-01T17:00:00Z", - "2021-01-01T18:00:00Z", - "2021-01-01T19:00:00Z", - "2021-01-01T20:00:00Z", - "2021-01-01T21:00:00Z", - "2021-01-01T22:00:00Z", - "2021-01-01T23:00:00Z" + "2021-01-01 00:00:00", + "2021-01-01 01:00:00", + "2021-01-01 02:00:00", + "2021-01-01 03:00:00", + "2021-01-01 04:00:00", + "2021-01-01 05:00:00", + "2021-01-01 06:00:00", + "2021-01-01 07:00:00", + "2021-01-01 08:00:00", + "2021-01-01 09:00:00", + "2021-01-01 10:00:00", + "2021-01-01 11:00:00", + "2021-01-01 12:00:00", + "2021-01-01 13:00:00", + "2021-01-01 14:00:00", + "2021-01-01 15:00:00", + "2021-01-01 16:00:00", + "2021-01-01 17:00:00", + "2021-01-01 18:00:00", + "2021-01-01 19:00:00", + "2021-01-01 20:00:00", + "2021-01-01 21:00:00", + "2021-01-01 22:00:00", + "2021-01-01 23:00:00" ] end @@ -1176,6 +1176,35 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do ] end + test "shows weekly data with time labels", %{conn: conn, site: site} do + populate_stats(site, [ + build(:pageview, timestamp: ~N[2021-01-01 00:00:00]), + build(:pageview, timestamp: ~N[2021-01-03 23:59:00]), + build(:pageview, timestamp: ~N[2021-01-07 23:59:00]) + ]) + + conn = + post(conn, "/api/v2/query", %{ + "site_id" => site.domain, + "metrics" => ["visitors"], + "date_range" => ["2020-12-20", "2021-01-07"], + "dimensions" => ["time:week"], + "include" => %{"time_labels" => true} + }) + + assert json_response(conn, 200)["results"] == [ + %{"dimensions" => ["2020-12-28"], "metrics" => [2]}, + %{"dimensions" => ["2021-01-04"], "metrics" => [1]} + ] + + assert json_response(conn, 200)["meta"]["time_labels"] == [ + "2020-12-20", + "2020-12-21", + "2020-12-28", + "2021-01-04" + ] + end + test "shows last 6 months of visitors", %{conn: conn, site: site} do populate_stats(site, [ build(:pageview, timestamp: ~N[2020-08-13 00:00:00]), @@ -1189,7 +1218,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do "site_id" => site.domain, "metrics" => ["visitors"], "date_range" => ["2020-07-01", "2021-01-31"], - "dimensions" => ["time"] + "dimensions" => ["time:month"] }) assert json_response(conn, 200)["results"] == [ @@ -4021,11 +4050,11 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do }) assert json_response(conn, 200)["results"] == [ - %{"dimensions" => ["2021-01-01T00:00:00Z", "Google"], "metrics" => [1]}, - %{"dimensions" => ["2021-01-02T00:00:00Z", "Google"], "metrics" => [1]}, - %{"dimensions" => ["2021-01-02T00:00:00Z", "Direct / None"], "metrics" => [1]}, - %{"dimensions" => ["2021-01-03T00:00:00Z", "Direct / None"], "metrics" => [1]}, - %{"dimensions" => ["2021-01-03T00:00:00Z", "Twitter"], "metrics" => [1]} + %{"dimensions" => ["2021-01-01 00:00:00", "Google"], "metrics" => [1]}, + %{"dimensions" => ["2021-01-02 00:00:00", "Google"], "metrics" => [1]}, + %{"dimensions" => ["2021-01-02 00:00:00", "Direct / None"], "metrics" => [1]}, + %{"dimensions" => ["2021-01-03 00:00:00", "Direct / None"], "metrics" => [1]}, + %{"dimensions" => ["2021-01-03 00:00:00", "Twitter"], "metrics" => [1]} ] end end From ece6f32ecb091a8e2103646090a56f1a79e9e66f Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 10:08:36 +0300 Subject: [PATCH 31/42] cast_revenue_metrics_to_money --- lib/plausible/stats/timeseries.ex | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 57e2a537ef9f..5e94a22a8253 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -1,4 +1,5 @@ defmodule Plausible.Stats.Timeseries do + use Plausible use Plausible.ClickhouseRepo alias Plausible.Stats.{Query, QueryOptimizer, QueryResult, SQL} @@ -10,6 +11,13 @@ defmodule Plausible.Stats.Timeseries do } def timeseries(site, query, metrics) do + {currency, metrics} = + on_ee do + Plausible.Stats.Goal.Revenue.get_revenue_tracking_currency(site, query, metrics) + else + {nil, metrics} + end + query_with_metrics = Query.set( query, @@ -26,13 +34,13 @@ defmodule Plausible.Stats.Timeseries do q |> ClickhouseRepo.all() |> QueryResult.from(query_with_metrics) - |> build_timeseries_result(query_with_metrics) + |> build_timeseries_result(query_with_metrics, currency) |> transform_keys(%{group_conversion_rate: :conversion_rate}) end defp time_dimension(query), do: Map.fetch!(@time_dimension, query.interval) - defp build_timeseries_result(query_result, query) do + defp build_timeseries_result(query_result, query, currency) do results_map = query_result.results |> Enum.map(fn %{dimensions: [time_dimension_value], metrics: entry_metrics} -> @@ -52,6 +60,7 @@ defmodule Plausible.Stats.Timeseries do key, empty_row(key, query.metrics) ) + |> cast_revenue_metrics_to_money(currency) end) end @@ -85,4 +94,12 @@ defmodule Plausible.Stats.Timeseries do |> Enum.into(%{}) end) end + + on_ee do + defp cast_revenue_metrics_to_money(results, revenue_goals) do + Plausible.Stats.Goal.Revenue.cast_revenue_metrics_to_money(results, revenue_goals) + end + else + defp cast_revenue_metrics_to_money(results, _revenue_goals), do: results + end end From 4036103ae55b202c379d1fc2c8d2792a388596c9 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 10:15:56 +0300 Subject: [PATCH 32/42] fix `full_intervals` support --- .../controllers/api/stats_controller.ex | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/plausible_web/controllers/api/stats_controller.ex b/lib/plausible_web/controllers/api/stats_controller.ex index 5dcf7800cf63..e5f01eda3545 100644 --- a/lib/plausible_web/controllers/api/stats_controller.ex +++ b/lib/plausible_web/controllers/api/stats_controller.ex @@ -174,8 +174,10 @@ defmodule PlausibleWeb.Api.StatsController do defp build_full_intervals(%{interval: "week", date_range: range}, labels) do for label <- labels, into: %{} do - interval_start = Timex.beginning_of_week(label) - interval_end = Timex.end_of_week(label) + date = Date.from_iso8601!(label) + + interval_start = Timex.beginning_of_week(date) + interval_end = Timex.end_of_week(date) within_interval? = Enum.member?(range, interval_start) && Enum.member?(range, interval_end) @@ -185,8 +187,10 @@ defmodule PlausibleWeb.Api.StatsController do defp build_full_intervals(%{interval: "month", date_range: range}, labels) do for label <- labels, into: %{} do - interval_start = Timex.beginning_of_month(label) - interval_end = Timex.end_of_month(label) + date = Date.from_iso8601!(label) + + interval_start = Timex.beginning_of_month(date) + interval_end = Timex.end_of_month(date) within_interval? = Enum.member?(range, interval_start) && Enum.member?(range, interval_end) From 400386ff1b561c436e70e5a70261d2e6f7330ced Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 10:40:38 +0300 Subject: [PATCH 33/42] Handle minute/realtime graphs --- lib/plausible/stats/interval.ex | 27 +++++++++++++++++++++ lib/plausible/stats/sql/expression.ex | 34 +++++++++++++++++++-------- lib/plausible/stats/timeseries.ex | 3 ++- 3 files changed, 53 insertions(+), 11 deletions(-) diff --git a/lib/plausible/stats/interval.ex b/lib/plausible/stats/interval.ex index abd1032f4a30..ae80ba8eab32 100644 --- a/lib/plausible/stats/interval.ex +++ b/lib/plausible/stats/interval.ex @@ -7,6 +7,8 @@ defmodule Plausible.Stats.Interval do `week`, and `month`. """ + alias Plausible.Stats.Query + @type t() :: String.t() @type(opt() :: {:site, Plausible.Site.t()} | {:from, Date.t()}, {:to, Date.t()}) @type opts :: list(opt()) @@ -109,6 +111,9 @@ defmodule Plausible.Stats.Interval do def format_datetime(%DateTime{} = datetime), do: Timex.format!(datetime, "{YYYY}-{0M}-{0D} {h24}:{m}:{s}") + # Realtime graphs return numbers + def format_datetime(other), do: other + @doc """ Returns list of time bucket labels for the given query. """ @@ -181,6 +186,28 @@ defmodule Plausible.Stats.Interval do end) end + # Only supported in dashboards not via API + defp time_labels_for_dimension("time:minute", %Query{period: "30m"}) do + Enum.into(-30..-1, []) + end + + @full_day_in_minutes 24 * 60 - 1 + defp time_labels_for_dimension("time:minute", query) do + n_buckets = + if query.date_range.first == query.date_range.last do + @full_day_in_minutes + else + Timex.diff(query.date_range.last, query.date_range.first, :minutes) + end + + Enum.map(0..n_buckets, fn step -> + query.date_range.first + |> Timex.to_datetime() + |> Timex.shift(minutes: step) + |> format_datetime() + end) + end + defp date_or_weekstart(date, query) do weekstart = Timex.beginning_of_week(date) diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index bfbc0b1c222a..a2172da5e348 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -26,15 +26,9 @@ defmodule Plausible.Stats.SQL.Expression do end end - def dimension(key, "time:hour", query) do - wrap_expression([t], %{ - key => fragment("toStartOfHour(toTimeZone(?, ?))", t.timestamp, ^query.timezone) - }) - end - - def dimension(key, "time:day", query) do + def dimension(key, "time:month", query) do wrap_expression([t], %{ - key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone) + key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end @@ -48,9 +42,29 @@ defmodule Plausible.Stats.SQL.Expression do }) end - def dimension(key, "time:month", query) do + def dimension(key, "time:day", query) do wrap_expression([t], %{ - key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone) + key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone) + }) + end + + def dimension(key, "time:hour", query) do + wrap_expression([t], %{ + key => fragment("toStartOfHour(toTimeZone(?, ?))", t.timestamp, ^query.timezone) + }) + end + + # :NOTE: This is not exposed in Query APIv2 + def dimension(key, "time:minute", %Query{period: "30m"}) do + wrap_expression([t], %{ + key => fragment("dateDiff('minute', now(), ?)", t.timestamp) + }) + end + + # :NOTE: This is not exposed in Query APIv2 + def dimension(key, "time:minute", query) do + wrap_expression([t], %{ + key => fragment("toStartOfMinute(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 5e94a22a8253..5fc3938ed0b3 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -7,7 +7,8 @@ defmodule Plausible.Stats.Timeseries do "month" => "time:month", "week" => "time:week", "date" => "time:day", - "hour" => "time:hour" + "hour" => "time:hour", + "minute" => "time:minute" } def timeseries(site, query, metrics) do From 18162b775040df82c6642abb021cfb3fa67b589e Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 12:15:22 +0300 Subject: [PATCH 34/42] experimental_session_count? with timeseries This becomes required as we try to include visits from sessions by default --- CHANGELOG.md | 1 + lib/plausible/stats/sql/expression.ex | 95 +++++++++++++------ lib/plausible/stats/sql/query_builder.ex | 14 +-- lib/plausible/stats/timeseries.ex | 3 +- .../api/stats_controller/main_graph_test.exs | 14 +-- 5 files changed, 80 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75f271eeb96a..0b1a0693ecee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file. ### Changed +- Realtime and hourly graphs now show visits lasting their whole duration instead when specific events occur - Increase hourly request limit for API keys in CE from 600 to 1000000 (practically removing the limit) plausible/analytics#4200 - Make TCP connections try IPv6 first with IPv4 fallback in CE plausible/analytics#4245 - `is` and `is not` filters in dashboard no longer support wildcards. Use contains/does not contain filter instead. diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index a2172da5e348..c4e2c9e3264e 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -26,13 +26,26 @@ defmodule Plausible.Stats.SQL.Expression do end end - def dimension(key, "time:month", query) do + defmacrop regular_time_slots(query, period_in_seconds) do + quote do + fragment( + "arrayJoin(timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?)))", + s.start, + ^unquote(query).timezone, + s.start, + s.timestamp, + ^unquote(period_in_seconds) + ) + end + end + + def dimension(key, "time:month", _table, query) do wrap_expression([t], %{ key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end - def dimension(key, "time:week", query) do + def dimension(key, "time:week", _table, query) do wrap_expression([t], %{ key => weekstart_not_before( @@ -42,42 +55,70 @@ defmodule Plausible.Stats.SQL.Expression do }) end - def dimension(key, "time:day", query) do + def dimension(key, "time:day", _table, query) do wrap_expression([t], %{ key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end - def dimension(key, "time:hour", query) do + def dimension(key, "time:hour", :sessions, %Query{experimental_session_count?: true} = query) do + wrap_expression([s], %{ + key => regular_time_slots(query, 3600) + }) + end + + def dimension(key, "time:hour", _table, query) do wrap_expression([t], %{ key => fragment("toStartOfHour(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end # :NOTE: This is not exposed in Query APIv2 - def dimension(key, "time:minute", %Query{period: "30m"}) do + def dimension(key, "time:minute", :sessions, %Query{ + period: "30m", + experimental_session_count?: true + }) do + wrap_expression([s], %{ + key => + fragment( + "arrayJoin(range(dateDiff('minute', now(), ?), dateDiff('minute', now(), ?) + 1))", + s.start, + s.timestamp + ) + }) + end + + # :NOTE: This is not exposed in Query APIv2 + def dimension(key, "time:minute", _table, %Query{period: "30m"}) do wrap_expression([t], %{ key => fragment("dateDiff('minute', now(), ?)", t.timestamp) }) end # :NOTE: This is not exposed in Query APIv2 - def dimension(key, "time:minute", query) do + def dimension(key, "time:minute", :sessions, %Query{experimental_session_count?: true} = query) do + wrap_expression([s], %{ + key => regular_time_slots(query, 60) + }) + end + + # :NOTE: This is not exposed in Query APIv2 + def dimension(key, "time:minute", _table, query) do wrap_expression([t], %{ key => fragment("toStartOfMinute(toTimeZone(?, ?))", t.timestamp, ^query.timezone) }) end - def dimension(key, "event:name", _query), + def dimension(key, "event:name", _table, _query), do: wrap_expression([t], %{key => t.name}) - def dimension(key, "event:page", _query), + def dimension(key, "event:page", _table, _query), do: wrap_expression([t], %{key => t.pathname}) - def dimension(key, "event:hostname", _query), + def dimension(key, "event:hostname", _table, _query), do: wrap_expression([t], %{key => t.hostname}) - def dimension(key, "event:props:" <> property_name, _query) do + def dimension(key, "event:props:" <> property_name, _table, _query) do wrap_expression([t], %{ key => fragment( @@ -88,55 +129,55 @@ defmodule Plausible.Stats.SQL.Expression do }) end - def dimension(key, "visit:entry_page", _query), + def dimension(key, "visit:entry_page", _table, _query), do: wrap_expression([t], %{key => t.entry_page}) - def dimension(key, "visit:exit_page", _query), + def dimension(key, "visit:exit_page", _table, _query), do: wrap_expression([t], %{key => t.exit_page}) - def dimension(key, "visit:utm_medium", _query), + def dimension(key, "visit:utm_medium", _table, _query), do: field_or_blank_value(key, t.utm_medium, @not_set) - def dimension(key, "visit:utm_source", _query), + def dimension(key, "visit:utm_source", _table, _query), do: field_or_blank_value(key, t.utm_source, @not_set) - def dimension(key, "visit:utm_campaign", _query), + def dimension(key, "visit:utm_campaign", _table, _query), do: field_or_blank_value(key, t.utm_campaign, @not_set) - def dimension(key, "visit:utm_content", _query), + def dimension(key, "visit:utm_content", _table, _query), do: field_or_blank_value(key, t.utm_content, @not_set) - def dimension(key, "visit:utm_term", _query), + def dimension(key, "visit:utm_term", _table, _query), do: field_or_blank_value(key, t.utm_term, @not_set) - def dimension(key, "visit:source", _query), + def dimension(key, "visit:source", _table, _query), do: field_or_blank_value(key, t.source, @no_ref) - def dimension(key, "visit:referrer", _query), + def dimension(key, "visit:referrer", _table, _query), do: field_or_blank_value(key, t.referrer, @no_ref) - def dimension(key, "visit:device", _query), + def dimension(key, "visit:device", _table, _query), do: field_or_blank_value(key, t.device, @not_set) - def dimension(key, "visit:os", _query), + def dimension(key, "visit:os", _table, _query), do: field_or_blank_value(key, t.os, @not_set) - def dimension(key, "visit:os_version", _query), + def dimension(key, "visit:os_version", _table, _query), do: field_or_blank_value(key, t.os_version, @not_set) - def dimension(key, "visit:browser", _query), + def dimension(key, "visit:browser", _table, _query), do: field_or_blank_value(key, t.browser, @not_set) - def dimension(key, "visit:browser_version", _query), + def dimension(key, "visit:browser_version", _table, _query), do: field_or_blank_value(key, t.browser_version, @not_set) - def dimension(key, "visit:country", _query), + def dimension(key, "visit:country", _table, _query), do: wrap_expression([t], %{key => t.country}) - def dimension(key, "visit:region", _query), + def dimension(key, "visit:region", _table, _query), do: wrap_expression([t], %{key => t.region}) - def dimension(key, "visit:city", _query), + def dimension(key, "visit:city", _table, _query), do: wrap_expression([t], %{key => t.city}) def event_metric(:pageviews) do diff --git a/lib/plausible/stats/sql/query_builder.ex b/lib/plausible/stats/sql/query_builder.ex index 99845f601075..ef61a59758cd 100644 --- a/lib/plausible/stats/sql/query_builder.ex +++ b/lib/plausible/stats/sql/query_builder.ex @@ -41,7 +41,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do q |> join_sessions_if_needed(site, events_query) - |> build_group_by(events_query) + |> build_group_by(:events, events_query) |> merge_imported(site, events_query, events_query.metrics) |> SQL.SpecialMetrics.add(site, events_query) end @@ -82,7 +82,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do q |> join_events_if_needed(site, sessions_query) - |> build_group_by(sessions_query) + |> build_group_by(:sessions, sessions_query) |> merge_imported(site, sessions_query, sessions_query.metrics) |> SQL.SpecialMetrics.add(site, sessions_query) end @@ -111,11 +111,11 @@ defmodule Plausible.Stats.SQL.QueryBuilder do end end - defp build_group_by(q, query) do - Enum.reduce(query.dimensions, q, &dimension_group_by(&2, query, &1)) + defp build_group_by(q, table, query) do + Enum.reduce(query.dimensions, q, &dimension_group_by(&2, table, query, &1)) end - defp dimension_group_by(q, query, "event:goal" = dimension) do + defp dimension_group_by(q, _table, query, "event:goal" = dimension) do {events, page_regexes} = Filters.Utils.split_goals_query_expressions(query.preloaded_goals) from(e in q, @@ -128,11 +128,11 @@ defmodule Plausible.Stats.SQL.QueryBuilder do ) end - defp dimension_group_by(q, query, dimension) do + defp dimension_group_by(q, table, query, dimension) do key = shortname(query, dimension) q - |> select_merge_as([], Expression.dimension(key, dimension, query)) + |> select_merge_as([], Expression.dimension(key, dimension, table, query)) |> group_by([], selected_as(^key)) end diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index 5fc3938ed0b3..eafeb79fe5bf 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -26,7 +26,8 @@ defmodule Plausible.Stats.Timeseries do dimensions: [time_dimension(query)], order_by: [{time_dimension(query), :asc}], v2: true, - include: %{time_labels: true, imports: query.include.imports} + include: %{time_labels: true, imports: query.include.imports}, + experimental_session_count?: true ) |> QueryOptimizer.optimize() diff --git a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs index d4b0c36e8d7b..f04cbacbb01b 100644 --- a/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/main_graph_test.exs @@ -480,13 +480,7 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do assert %{"plot" => plot} = json_response(conn, 200) - expected_plot = - if FunWithFlags.enabled?(:experimental_session_count) do - ~w[1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0] - else - ~w[0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0] - end - + expected_plot = ~w[1 1 1 1 1 2 2 2 2 2 2 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0] assert plot == Enum.map(expected_plot, &String.to_integer/1) end @@ -565,11 +559,7 @@ defmodule PlausibleWeb.Api.StatsController.MainGraphTest do assert %{"plot" => plot} = json_response(conn, 200) - if FunWithFlags.enabled?(:experimental_session_count) do - assert plot == [1, 1, 0, 0, 0] - else - assert plot == [1, 1, 0, 0, 1] - end + assert plot == [1, 1, 0, 0, 0] end end From 333d93ac5cfd68d4bced9e06f95c7a9c8d090aa1 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 15:28:32 +0300 Subject: [PATCH 35/42] Support hourly data in imports --- lib/plausible/stats/imported/base.ex | 5 +++-- lib/plausible/stats/imported/imported.ex | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/plausible/stats/imported/base.ex b/lib/plausible/stats/imported/base.ex index 34916de888a1..11909b234bf2 100644 --- a/lib/plausible/stats/imported/base.ex +++ b/lib/plausible/stats/imported/base.ex @@ -36,10 +36,11 @@ defmodule Plausible.Stats.Imported.Base do # NOTE: These dimensions are only used in group by "time:month" => "imported_visitors", "time:week" => "imported_visitors", - "time:day" => "imported_visitors" + "time:day" => "imported_visitors", + "time:hour" => "imported_visitors" } - @queriable_time_dimensions ["time:month", "time:week", "time:day"] + @queriable_time_dimensions ["time:month", "time:week", "time:day", "time:hour"] @imported_custom_props Imported.imported_custom_props() diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index a40d8748f466..13245bb93518 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -630,6 +630,12 @@ defmodule Plausible.Stats.Imported do |> select_merge_as([i], %{key => fragment("toStartOfMonth(?)", i.date)}) end + defp group_imported_by(q, :hour, key, _query) do + q + |> group_by([i], i.date) + |> select_merge_as([i], %{key => i.date}) + end + defp group_imported_by(q, :week, key, query) do q |> group_by([i], weekstart_not_before(i.date, ^query.date_range.first)) From 4536b356f0c5e5f84d4b4e397422370c3d664b97 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 15:50:31 +0300 Subject: [PATCH 36/42] Update bounce_rate in more csv tests --- .../CSVs/30d-filter-path/visitors.csv | 60 +++++++++---------- .../controllers/CSVs/30d/visitors.csv | 56 ++++++++--------- .../controllers/CSVs/6m/visitors.csv | 6 +- .../controllers/stats_controller_test.exs | 6 +- 4 files changed, 64 insertions(+), 64 deletions(-) diff --git a/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv b/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv index a49423c67055..fa82414a60eb 100644 --- a/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv +++ b/test/plausible_web/controllers/CSVs/30d-filter-path/visitors.csv @@ -1,32 +1,32 @@ date,visitors,pageviews,visits,views_per_visit,bounce_rate,visit_duration -2021-09-20,0,0,0,0.0,, -2021-09-21,0,0,0,0.0,, -2021-09-22,0,0,0,0.0,, -2021-09-23,0,0,0,0.0,, -2021-09-24,0,0,0,0.0,, -2021-09-25,0,0,0,0.0,, -2021-09-26,0,0,0,0.0,, -2021-09-27,0,0,0,0.0,, -2021-09-28,0,0,0,0.0,, -2021-09-29,0,0,0,0.0,, -2021-09-30,0,0,0,0.0,, -2021-10-01,0,0,0,0.0,, -2021-10-02,0,0,0,0.0,, -2021-10-03,0,0,0,0.0,, -2021-10-04,0,0,0,0.0,, -2021-10-05,0,0,0,0.0,, -2021-10-06,0,0,0,0.0,, -2021-10-07,0,0,0,0.0,, -2021-10-08,0,0,0,0.0,, -2021-10-09,0,0,0,0.0,, -2021-10-10,0,0,0,0.0,, -2021-10-11,0,0,0,0.0,, -2021-10-12,0,0,0,0.0,, -2021-10-13,0,0,0,0.0,, -2021-10-14,0,0,0,0.0,, -2021-10-15,0,0,0,0.0,, -2021-10-16,0,0,0,0.0,, -2021-10-17,0,0,0,0.0,, -2021-10-18,0,0,0,0.0,, -2021-10-19,0,0,0,0.0,, +2021-09-20,0,0,0,0.0,0.0, +2021-09-21,0,0,0,0.0,0.0, +2021-09-22,0,0,0,0.0,0.0, +2021-09-23,0,0,0,0.0,0.0, +2021-09-24,0,0,0,0.0,0.0, +2021-09-25,0,0,0,0.0,0.0, +2021-09-26,0,0,0,0.0,0.0, +2021-09-27,0,0,0,0.0,0.0, +2021-09-28,0,0,0,0.0,0.0, +2021-09-29,0,0,0,0.0,0.0, +2021-09-30,0,0,0,0.0,0.0, +2021-10-01,0,0,0,0.0,0.0, +2021-10-02,0,0,0,0.0,0.0, +2021-10-03,0,0,0,0.0,0.0, +2021-10-04,0,0,0,0.0,0.0, +2021-10-05,0,0,0,0.0,0.0, +2021-10-06,0,0,0,0.0,0.0, +2021-10-07,0,0,0,0.0,0.0, +2021-10-08,0,0,0,0.0,0.0, +2021-10-09,0,0,0,0.0,0.0, +2021-10-10,0,0,0,0.0,0.0, +2021-10-11,0,0,0,0.0,0.0, +2021-10-12,0,0,0,0.0,0.0, +2021-10-13,0,0,0,0.0,0.0, +2021-10-14,0,0,0,0.0,0.0, +2021-10-15,0,0,0,0.0,0.0, +2021-10-16,0,0,0,0.0,0.0, +2021-10-17,0,0,0,0.0,0.0, +2021-10-18,0,0,0,0.0,0.0, +2021-10-19,0,0,0,0.0,0.0, 2021-10-20,1,1,1,2.0,0,60 diff --git a/test/plausible_web/controllers/CSVs/30d/visitors.csv b/test/plausible_web/controllers/CSVs/30d/visitors.csv index 6bac77976e7c..76dbc7a9c5d5 100644 --- a/test/plausible_web/controllers/CSVs/30d/visitors.csv +++ b/test/plausible_web/controllers/CSVs/30d/visitors.csv @@ -1,32 +1,32 @@ date,visitors,pageviews,visits,views_per_visit,bounce_rate,visit_duration 2021-09-20,1,1,1,1.0,100,0 -2021-09-21,0,0,0,0.0,, -2021-09-22,0,0,0,0.0,, -2021-09-23,0,0,0,0.0,, -2021-09-24,0,0,0,0.0,, -2021-09-25,0,0,0,0.0,, -2021-09-26,0,0,0,0.0,, -2021-09-27,0,0,0,0.0,, -2021-09-28,0,0,0,0.0,, -2021-09-29,0,0,0,0.0,, -2021-09-30,0,0,0,0.0,, -2021-10-01,0,0,0,0.0,, -2021-10-02,0,0,0,0.0,, -2021-10-03,0,0,0,0.0,, -2021-10-04,0,0,0,0.0,, -2021-10-05,0,0,0,0.0,, -2021-10-06,0,0,0,0.0,, -2021-10-07,0,0,0,0.0,, -2021-10-08,0,0,0,0.0,, -2021-10-09,0,0,0,0.0,, -2021-10-10,0,0,0,0.0,, -2021-10-11,0,0,0,0.0,, -2021-10-12,0,0,0,0.0,, -2021-10-13,0,0,0,0.0,, -2021-10-14,0,0,0,0.0,, -2021-10-15,0,0,0,0.0,, -2021-10-16,0,0,0,0.0,, -2021-10-17,0,0,0,0.0,, -2021-10-18,0,0,0,0.0,, +2021-09-21,0,0,0,0.0,0.0, +2021-09-22,0,0,0,0.0,0.0, +2021-09-23,0,0,0,0.0,0.0, +2021-09-24,0,0,0,0.0,0.0, +2021-09-25,0,0,0,0.0,0.0, +2021-09-26,0,0,0,0.0,0.0, +2021-09-27,0,0,0,0.0,0.0, +2021-09-28,0,0,0,0.0,0.0, +2021-09-29,0,0,0,0.0,0.0, +2021-09-30,0,0,0,0.0,0.0, +2021-10-01,0,0,0,0.0,0.0, +2021-10-02,0,0,0,0.0,0.0, +2021-10-03,0,0,0,0.0,0.0, +2021-10-04,0,0,0,0.0,0.0, +2021-10-05,0,0,0,0.0,0.0, +2021-10-06,0,0,0,0.0,0.0, +2021-10-07,0,0,0,0.0,0.0, +2021-10-08,0,0,0,0.0,0.0, +2021-10-09,0,0,0,0.0,0.0, +2021-10-10,0,0,0,0.0,0.0, +2021-10-11,0,0,0,0.0,0.0, +2021-10-12,0,0,0,0.0,0.0, +2021-10-13,0,0,0,0.0,0.0, +2021-10-14,0,0,0,0.0,0.0, +2021-10-15,0,0,0,0.0,0.0, +2021-10-16,0,0,0,0.0,0.0, +2021-10-17,0,0,0,0.0,0.0, +2021-10-18,0,0,0,0.0,0.0, 2021-10-19,2,2,2,1.0,50,30 2021-10-20,1,2,1,2.0,0,60 diff --git a/test/plausible_web/controllers/CSVs/6m/visitors.csv b/test/plausible_web/controllers/CSVs/6m/visitors.csv index 230cc9d82cc1..6858d4466ef2 100644 --- a/test/plausible_web/controllers/CSVs/6m/visitors.csv +++ b/test/plausible_web/controllers/CSVs/6m/visitors.csv @@ -1,7 +1,7 @@ date,visitors,pageviews,visits,views_per_visit,bounce_rate,visit_duration 2021-05-01,1,1,1,1.0,100,0 -2021-06-01,0,0,0,0.0,, -2021-07-01,0,0,0,0.0,, -2021-08-01,0,0,0,0.0,, +2021-06-01,0,0,0,0.0,0.0, +2021-07-01,0,0,0,0.0,0.0, +2021-08-01,0,0,0,0.0,0.0, 2021-09-01,1,1,1,1.0,100,0 2021-10-01,3,4,3,1.33,33,40 diff --git a/test/plausible_web/controllers/stats_controller_test.exs b/test/plausible_web/controllers/stats_controller_test.exs index a2a95ab76268..bb4f7687a90b 100644 --- a/test/plausible_web/controllers/stats_controller_test.exs +++ b/test/plausible_web/controllers/stats_controller_test.exs @@ -323,9 +323,9 @@ defmodule PlausibleWeb.StatsControllerTest do "visit_duration" ], ["2021-09-20", "1", "1", "1", "1.0", "100", "0"], - ["2021-09-27", "0", "0", "0", "0.0", "", ""], - ["2021-10-04", "0", "0", "0", "0.0", "", ""], - ["2021-10-11", "0", "0", "0", "0.0", "", ""], + ["2021-09-27", "0", "0", "0", "0.0", "0.0", ""], + ["2021-10-04", "0", "0", "0", "0.0", "0.0", ""], + ["2021-10-11", "0", "0", "0", "0.0", "0.0", ""], ["2021-10-18", "3", "4", "3", "1.33", "33", "40"], [""] ] From e4deb1d7a8e72362e33725c68d60f892f372ed26 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 15:57:45 +0300 Subject: [PATCH 37/42] Update some time-series query tests --- .../api/external_stats_controller/query_test.exs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs index 390dc6da9b88..94541963987b 100644 --- a/test/plausible_web/controllers/api/external_stats_controller/query_test.exs +++ b/test/plausible_web/controllers/api/external_stats_controller/query_test.exs @@ -1241,7 +1241,7 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do "site_id" => site.domain, "metrics" => ["visitors"], "date_range" => ["2020-01-01", "2021-01-01"], - "dimensions" => ["time"] + "dimensions" => ["time:month"] }) assert json_response(conn, 200)["results"] == [ @@ -1264,7 +1264,8 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do "site_id" => site.domain, "metrics" => ["visitors"], "date_range" => ["2020-01-01", "2021-01-07"], - "dimensions" => ["time:day"] + "dimensions" => ["time:day"], + "include" => %{"time_labels" => true} }) assert json_response(conn, 200)["results"] == [ @@ -1272,6 +1273,8 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do %{"dimensions" => ["2020-12-31"], "metrics" => [1]}, %{"dimensions" => ["2021-01-01"], "metrics" => [2]} ] + + assert length(json_response(conn, 200)["meta"]["time_labels"]) == 373 end test "shows a custom range with daily interval", %{conn: conn, site: site} do From 95ff561533c870d1d64f56ac882519956f89e01a Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 16:04:03 +0300 Subject: [PATCH 38/42] Fix for meta.warning being included incorrectly --- lib/plausible/stats/query_result.ex | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/plausible/stats/query_result.ex b/lib/plausible/stats/query_result.ex index d09068973375..e4c8791e81bb 100644 --- a/lib/plausible/stats/query_result.ex +++ b/lib/plausible/stats/query_result.ex @@ -62,12 +62,16 @@ defmodule Plausible.Stats.QueryResult do defp serializable_filter(filter), do: filter - @import_warning "Imported stats are not included in the results because query parameters are not supported. " <> - "For more information, see: https://plausible.io/docs/stats-api#filtering-imported-stats" + @imports_unsupported_query_warning "Imported stats are not included in the results because query parameters are not supported. " <> + "For more information, see: https://plausible.io/docs/stats-api#filtering-imported-stats" defp meta(query) do %{ - warning: if(query.skip_imported_reason, do: @import_warning, else: nil), + warning: + case query.skip_imported_reason do + :unsupported_query -> @imports_unsupported_query_warning + _ -> nil + end, time_labels: if(query.include.time_labels, do: Interval.time_labels(query), else: nil) } |> Enum.reject(fn {_, value} -> is_nil(value) end) From 53c8c6b03b8e867b92eb170f66c96ec9050b6e3b Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 16:23:05 +0300 Subject: [PATCH 39/42] Simplify imported.ex --- lib/plausible/stats/imported/imported.ex | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/lib/plausible/stats/imported/imported.ex b/lib/plausible/stats/imported/imported.ex index 13245bb93518..a1e817e484ed 100644 --- a/lib/plausible/stats/imported/imported.ex +++ b/lib/plausible/stats/imported/imported.ex @@ -16,9 +16,6 @@ defmodule Plausible.Stats.Imported do @property_to_table_mappings Imported.Base.property_to_table_mappings() - @imported_dimensions Map.keys(@property_to_table_mappings) ++ - Plausible.Imported.imported_custom_props() - @goals_with_url Plausible.Imported.goals_with_url() def goals_with_url(), do: @goals_with_url @@ -279,8 +276,8 @@ defmodule Plausible.Stats.Imported do end) end - def merge_imported(q, site, %Query{dimensions: dimensions} = query, metrics) do - if merge_imported_dimensions?(dimensions) do + def merge_imported(q, site, query, metrics) do + if schema_supports_query?(query) do imported_q = site |> Imported.Base.query_imported(query) @@ -300,13 +297,6 @@ defmodule Plausible.Stats.Imported do end end - def merge_imported(q, _, _, _), do: q - - defp merge_imported_dimensions?(dimensions) do - dimensions in [["visit:browser", "visit:browser_version"], ["visit:os", "visit:os_version"]] or - (length(dimensions) == 1 and hd(dimensions) in @imported_dimensions) - end - def total_imported_visitors(site, query) do site |> Imported.Base.query_imported(query) From 7dd2eff63bac66f13b43278adc75e418f387b678 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 16:33:26 +0300 Subject: [PATCH 40/42] experimental_session_count flag removal --- assets/js/dashboard/api.js | 1 - assets/js/dashboard/query.js | 1 - lib/plausible/stats/query.ex | 15 --------------- lib/plausible/stats/sql/expression.ex | 7 +++---- lib/plausible/stats/sql/where_builder.ex | 11 +---------- lib/plausible/stats/timeseries.ex | 3 +-- .../api/stats_controller/top_stats_test.exs | 2 +- test/test_helper.exs | 2 -- 8 files changed, 6 insertions(+), 36 deletions(-) diff --git a/assets/js/dashboard/api.js b/assets/js/dashboard/api.js index 6aaae36339f3..a9ff924e445f 100644 --- a/assets/js/dashboard/api.js +++ b/assets/js/dashboard/api.js @@ -38,7 +38,6 @@ export function serializeQuery(query, extraQuery = []) { if (query.from) { queryObj.from = formatISO(query.from) } if (query.to) { queryObj.to = formatISO(query.to) } if (query.filters) { queryObj.filters = serializeApiFilters(query.filters) } - if (query.experimental_session_count) { queryObj.experimental_session_count = query.experimental_session_count } if (query.with_imported) { queryObj.with_imported = query.with_imported } if (SHARED_LINK_AUTH) { queryObj.auth = SHARED_LINK_AUTH } diff --git a/assets/js/dashboard/query.js b/assets/js/dashboard/query.js index 4a61841e9873..0335f510ff4e 100644 --- a/assets/js/dashboard/query.js +++ b/assets/js/dashboard/query.js @@ -42,7 +42,6 @@ export function parseQuery(querystring, site) { to: q.get('to') ? dayjs.utc(q.get('to')) : undefined, match_day_of_week: matchDayOfWeek == 'true', with_imported: q.get('with_imported') ? q.get('with_imported') === 'true' : true, - experimental_session_count: q.get('experimental_session_count'), filters: parseJsonUrl(q.get('filters'), []), labels: parseJsonUrl(q.get('labels'), {}) } diff --git a/lib/plausible/stats/query.ex b/lib/plausible/stats/query.ex index 56568f01fbbb..9d2eb0d66753 100644 --- a/lib/plausible/stats/query.ex +++ b/lib/plausible/stats/query.ex @@ -10,7 +10,6 @@ defmodule Plausible.Stats.Query do include_imported: false, skip_imported_reason: nil, now: nil, - experimental_session_count?: false, experimental_reduced_joins?: false, latest_import_end_date: nil, metrics: [], @@ -36,7 +35,6 @@ defmodule Plausible.Stats.Query do query = __MODULE__ |> struct!(now: now, timezone: site.timezone) - |> put_experimental_session_count(site, params) |> put_experimental_reduced_joins(site, params) |> put_period(site, params) |> put_dimensions(params) @@ -56,7 +54,6 @@ defmodule Plausible.Stats.Query do query = struct!(__MODULE__, Map.to_list(query_data)) |> put_imported_opts(site, %{}) - |> put_experimental_session_count(site, params) |> put_experimental_reduced_joins(site, params) |> struct!(v2: true) @@ -64,18 +61,6 @@ defmodule Plausible.Stats.Query do end end - defp put_experimental_session_count(query, site, params) do - if Map.has_key?(params, "experimental_session_count") do - struct!(query, - experimental_session_count?: Map.get(params, "experimental_session_count") == "true" - ) - else - struct!(query, - experimental_session_count?: FunWithFlags.enabled?(:experimental_session_count, for: site) - ) - end - end - defp put_experimental_reduced_joins(query, site, params) do if Map.has_key?(params, "experimental_reduced_joins") do struct!(query, diff --git a/lib/plausible/stats/sql/expression.ex b/lib/plausible/stats/sql/expression.ex index c4e2c9e3264e..2a0afe9ac9f4 100644 --- a/lib/plausible/stats/sql/expression.ex +++ b/lib/plausible/stats/sql/expression.ex @@ -61,7 +61,7 @@ defmodule Plausible.Stats.SQL.Expression do }) end - def dimension(key, "time:hour", :sessions, %Query{experimental_session_count?: true} = query) do + def dimension(key, "time:hour", :sessions, query) do wrap_expression([s], %{ key => regular_time_slots(query, 3600) }) @@ -75,8 +75,7 @@ defmodule Plausible.Stats.SQL.Expression do # :NOTE: This is not exposed in Query APIv2 def dimension(key, "time:minute", :sessions, %Query{ - period: "30m", - experimental_session_count?: true + period: "30m" }) do wrap_expression([s], %{ key => @@ -96,7 +95,7 @@ defmodule Plausible.Stats.SQL.Expression do end # :NOTE: This is not exposed in Query APIv2 - def dimension(key, "time:minute", :sessions, %Query{experimental_session_count?: true} = query) do + def dimension(key, "time:minute", :sessions, query) do wrap_expression([s], %{ key => regular_time_slots(query, 60) }) diff --git a/lib/plausible/stats/sql/where_builder.ex b/lib/plausible/stats/sql/where_builder.ex index 71bcdad49519..73eeecaeefc7 100644 --- a/lib/plausible/stats/sql/where_builder.ex +++ b/lib/plausible/stats/sql/where_builder.ex @@ -51,7 +51,7 @@ defmodule Plausible.Stats.SQL.WhereBuilder do ) end - defp filter_site_time_range(:sessions, site, %Query{experimental_session_count?: true} = query) do + defp filter_site_time_range(:sessions, site, query) do {first_datetime, last_datetime} = utc_boundaries(query, site) # Counts each _active_ session in time range even if they started before @@ -61,15 +61,6 @@ defmodule Plausible.Stats.SQL.WhereBuilder do ) end - defp filter_site_time_range(:sessions, site, query) do - {first_datetime, last_datetime} = utc_boundaries(query, site) - - dynamic( - [s], - s.site_id == ^site.id and s.start >= ^first_datetime and s.start < ^last_datetime - ) - end - defp add_filter(:events, _query, [:is, "event:name", list]) do dynamic([e], e.name in ^list) end diff --git a/lib/plausible/stats/timeseries.ex b/lib/plausible/stats/timeseries.ex index eafeb79fe5bf..5fc3938ed0b3 100644 --- a/lib/plausible/stats/timeseries.ex +++ b/lib/plausible/stats/timeseries.ex @@ -26,8 +26,7 @@ defmodule Plausible.Stats.Timeseries do dimensions: [time_dimension(query)], order_by: [{time_dimension(query), :asc}], v2: true, - include: %{time_labels: true, imports: query.include.imports}, - experimental_session_count?: true + include: %{time_labels: true, imports: query.include.imports} ) |> QueryOptimizer.optimize() diff --git a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs index 1f7d5b48688a..a873404032db 100644 --- a/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs +++ b/test/plausible_web/controllers/api/stats_controller/top_stats_test.exs @@ -85,7 +85,7 @@ defmodule PlausibleWeb.Api.StatsController.TopStatsTest do conn = get( conn, - "/api/stats/#{site.domain}/top-stats?period=day&date=2021-01-01&experimental_session_count=true" + "/api/stats/#{site.domain}/top-stats?period=day&date=2021-01-01" ) res = json_response(conn, 200) diff --git a/test/test_helper.exs b/test/test_helper.exs index d9ec1e60a151..60cb5701a9fc 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -11,10 +11,8 @@ Application.ensure_all_started(:double) # Temporary flag to test `experimental_reduced_joins` flag on all tests. if System.get_env("TEST_EXPERIMENTAL_REDUCED_JOINS") == "1" do FunWithFlags.enable(:experimental_reduced_joins) - FunWithFlags.enable(:experimental_session_count) else FunWithFlags.disable(:experimental_reduced_joins) - FunWithFlags.disable(:experimental_session_count) end Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual) From 61ff04380981cbdf26c8a04749e69d834bff3fe5 Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 16:45:02 +0300 Subject: [PATCH 41/42] moduledoc --- lib/plausible/stats/sql/special_metrics.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/plausible/stats/sql/special_metrics.ex b/lib/plausible/stats/sql/special_metrics.ex index 74dc121ac349..c46a5f1d7b76 100644 --- a/lib/plausible/stats/sql/special_metrics.ex +++ b/lib/plausible/stats/sql/special_metrics.ex @@ -1,5 +1,5 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do - @doc """ + @moduledoc """ This module defines how special metrics like `conversion_rate` and `percentage` are calculated. """ From 3ed9b90b63f52bff45e6e384988f253382695c6b Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Wed, 3 Jul 2024 16:55:10 +0300 Subject: [PATCH 42/42] Split interval and time modules --- lib/plausible/stats/interval.ex | 114 ------------------ lib/plausible/stats/query_result.ex | 6 +- lib/plausible/stats/time.ex | 118 +++++++++++++++++++ test/plausible/stats/interval_test.exs | 152 ------------------------ test/plausible/stats/time_test.exs | 157 +++++++++++++++++++++++++ 5 files changed, 278 insertions(+), 269 deletions(-) create mode 100644 lib/plausible/stats/time.ex create mode 100644 test/plausible/stats/time_test.exs diff --git a/lib/plausible/stats/interval.ex b/lib/plausible/stats/interval.ex index ae80ba8eab32..116f711c99e4 100644 --- a/lib/plausible/stats/interval.ex +++ b/lib/plausible/stats/interval.ex @@ -7,8 +7,6 @@ defmodule Plausible.Stats.Interval do `week`, and `month`. """ - alias Plausible.Stats.Query - @type t() :: String.t() @type(opt() :: {:site, Plausible.Site.t()} | {:from, Date.t()}, {:to, Date.t()}) @type opts :: list(opt()) @@ -105,116 +103,4 @@ defmodule Plausible.Stats.Interval do def valid_for_period?(period, interval, opts \\ []) do interval in Map.get(valid_by_period(opts), period, []) end - - def format_datetime(%Date{} = date), do: Date.to_string(date) - - def format_datetime(%DateTime{} = datetime), - do: Timex.format!(datetime, "{YYYY}-{0M}-{0D} {h24}:{m}:{s}") - - # Realtime graphs return numbers - def format_datetime(other), do: other - - @doc """ - Returns list of time bucket labels for the given query. - """ - def time_dimension(query) do - Enum.find(query.dimensions, &String.starts_with?(&1, "time")) - end - - def time_labels(query) do - time_labels_for_dimension(time_dimension(query), query) - end - - defp time_labels_for_dimension("time:month", query) do - n_buckets = - Timex.diff( - query.date_range.last, - Date.beginning_of_month(query.date_range.first), - :months - ) - - Enum.map(n_buckets..0, fn shift -> - query.date_range.last - |> Date.beginning_of_month() - |> Timex.shift(months: -shift) - |> format_datetime() - end) - end - - defp time_labels_for_dimension("time:week", query) do - n_buckets = - Timex.diff( - query.date_range.last, - Date.beginning_of_week(query.date_range.first), - :weeks - ) - - Enum.map(0..n_buckets, fn shift -> - query.date_range.first - |> Timex.shift(weeks: shift) - |> date_or_weekstart(query) - |> format_datetime() - end) - end - - defp time_labels_for_dimension("time:day", query) do - query.date_range - |> Enum.into([]) - |> Enum.map(&format_datetime/1) - end - - @full_day_in_hours 23 - defp time_labels_for_dimension("time:hour", query) do - n_buckets = - if query.date_range.first == query.date_range.last do - @full_day_in_hours - else - end_time = - query.date_range.last - |> Timex.to_datetime() - |> Timex.end_of_day() - - Timex.diff(end_time, query.date_range.first, :hours) - end - - Enum.map(0..n_buckets, fn step -> - query.date_range.first - |> Timex.to_datetime() - |> Timex.shift(hours: step) - |> DateTime.truncate(:second) - |> format_datetime() - end) - end - - # Only supported in dashboards not via API - defp time_labels_for_dimension("time:minute", %Query{period: "30m"}) do - Enum.into(-30..-1, []) - end - - @full_day_in_minutes 24 * 60 - 1 - defp time_labels_for_dimension("time:minute", query) do - n_buckets = - if query.date_range.first == query.date_range.last do - @full_day_in_minutes - else - Timex.diff(query.date_range.last, query.date_range.first, :minutes) - end - - Enum.map(0..n_buckets, fn step -> - query.date_range.first - |> Timex.to_datetime() - |> Timex.shift(minutes: step) - |> format_datetime() - end) - end - - defp date_or_weekstart(date, query) do - weekstart = Timex.beginning_of_week(date) - - if Enum.member?(query.date_range, weekstart) do - weekstart - else - date - end - end end diff --git a/lib/plausible/stats/query_result.ex b/lib/plausible/stats/query_result.ex index e4c8791e81bb..c59d22ddcf41 100644 --- a/lib/plausible/stats/query_result.ex +++ b/lib/plausible/stats/query_result.ex @@ -1,7 +1,6 @@ defmodule Plausible.Stats.QueryResult do @moduledoc false - alias Plausible.Stats.Interval alias Plausible.Stats.Util alias Plausible.Stats.Filters @@ -49,7 +48,7 @@ defmodule Plausible.Stats.QueryResult do defp dimension_label("time:" <> _ = time_dimension, entry, query) do datetime = Map.get(entry, Util.shortname(query, time_dimension)) - Interval.format_datetime(datetime) + Plausible.Stats.Time.format_datetime(datetime) end defp dimension_label(dimension, entry, query) do @@ -72,7 +71,8 @@ defmodule Plausible.Stats.QueryResult do :unsupported_query -> @imports_unsupported_query_warning _ -> nil end, - time_labels: if(query.include.time_labels, do: Interval.time_labels(query), else: nil) + time_labels: + if(query.include.time_labels, do: Plausible.Stats.Time.time_labels(query), else: nil) } |> Enum.reject(fn {_, value} -> is_nil(value) end) |> Enum.into(%{}) diff --git a/lib/plausible/stats/time.ex b/lib/plausible/stats/time.ex new file mode 100644 index 000000000000..c8c0d7a71fee --- /dev/null +++ b/lib/plausible/stats/time.ex @@ -0,0 +1,118 @@ +defmodule Plausible.Stats.Time do + @moduledoc """ + Collection of functions to work with time in queries. + """ + + alias Plausible.Stats.Query + def format_datetime(%Date{} = date), do: Date.to_string(date) + + def format_datetime(%DateTime{} = datetime), + do: Timex.format!(datetime, "{YYYY}-{0M}-{0D} {h24}:{m}:{s}") + + # Realtime graphs return numbers + def format_datetime(other), do: other + + @doc """ + Returns list of time bucket labels for the given query. + """ + def time_dimension(query) do + Enum.find(query.dimensions, &String.starts_with?(&1, "time")) + end + + def time_labels(query) do + time_labels_for_dimension(time_dimension(query), query) + end + + defp time_labels_for_dimension("time:month", query) do + n_buckets = + Timex.diff( + query.date_range.last, + Date.beginning_of_month(query.date_range.first), + :months + ) + + Enum.map(n_buckets..0, fn shift -> + query.date_range.last + |> Date.beginning_of_month() + |> Timex.shift(months: -shift) + |> format_datetime() + end) + end + + defp time_labels_for_dimension("time:week", query) do + n_buckets = + Timex.diff( + query.date_range.last, + Date.beginning_of_week(query.date_range.first), + :weeks + ) + + Enum.map(0..n_buckets, fn shift -> + query.date_range.first + |> Timex.shift(weeks: shift) + |> date_or_weekstart(query) + |> format_datetime() + end) + end + + defp time_labels_for_dimension("time:day", query) do + query.date_range + |> Enum.into([]) + |> Enum.map(&format_datetime/1) + end + + @full_day_in_hours 23 + defp time_labels_for_dimension("time:hour", query) do + n_buckets = + if query.date_range.first == query.date_range.last do + @full_day_in_hours + else + end_time = + query.date_range.last + |> Timex.to_datetime() + |> Timex.end_of_day() + + Timex.diff(end_time, query.date_range.first, :hours) + end + + Enum.map(0..n_buckets, fn step -> + query.date_range.first + |> Timex.to_datetime() + |> Timex.shift(hours: step) + |> DateTime.truncate(:second) + |> format_datetime() + end) + end + + # Only supported in dashboards not via API + defp time_labels_for_dimension("time:minute", %Query{period: "30m"}) do + Enum.into(-30..-1, []) + end + + @full_day_in_minutes 24 * 60 - 1 + defp time_labels_for_dimension("time:minute", query) do + n_buckets = + if query.date_range.first == query.date_range.last do + @full_day_in_minutes + else + Timex.diff(query.date_range.last, query.date_range.first, :minutes) + end + + Enum.map(0..n_buckets, fn step -> + query.date_range.first + |> Timex.to_datetime() + |> Timex.shift(minutes: step) + |> format_datetime() + end) + end + + defp date_or_weekstart(date, query) do + weekstart = Timex.beginning_of_week(date) + + if Enum.member?(query.date_range, weekstart) do + weekstart + else + date + end + end +end diff --git a/test/plausible/stats/interval_test.exs b/test/plausible/stats/interval_test.exs index 57d15969ffa8..36e3c68093de 100644 --- a/test/plausible/stats/interval_test.exs +++ b/test/plausible/stats/interval_test.exs @@ -123,156 +123,4 @@ defmodule Plausible.Stats.IntervalTest do ) end end - - describe "time_labels/1" do - test "with time:month dimension" do - assert time_labels(%{ - dimensions: ["visit:device", "time:month"], - date_range: Date.range(~D[2022-01-17], ~D[2022-02-01]) - }) == [ - "2022-01-01", - "2022-02-01" - ] - - assert time_labels(%{ - dimensions: ["visit:device", "time:month"], - date_range: Date.range(~D[2022-01-01], ~D[2022-03-07]) - }) == [ - "2022-01-01", - "2022-02-01", - "2022-03-01" - ] - end - - test "with time:week dimension" do - assert time_labels(%{ - dimensions: ["time:week"], - date_range: Date.range(~D[2020-12-20], ~D[2021-01-08]) - }) == [ - "2020-12-20", - "2020-12-21", - "2020-12-28", - "2021-01-04" - ] - - assert time_labels(%{ - dimensions: ["time:week"], - date_range: Date.range(~D[2020-12-21], ~D[2021-01-03]) - }) == [ - "2020-12-21", - "2020-12-28" - ] - end - - test "with time:day dimension" do - assert time_labels(%{ - dimensions: ["time:day"], - date_range: Date.range(~D[2022-01-17], ~D[2022-02-02]) - }) == [ - "2022-01-17", - "2022-01-18", - "2022-01-19", - "2022-01-20", - "2022-01-21", - "2022-01-22", - "2022-01-23", - "2022-01-24", - "2022-01-25", - "2022-01-26", - "2022-01-27", - "2022-01-28", - "2022-01-29", - "2022-01-30", - "2022-01-31", - "2022-02-01", - "2022-02-02" - ] - end - - test "with time:hour dimension" do - assert time_labels(%{ - dimensions: ["time:hour"], - date_range: Date.range(~D[2022-01-17], ~D[2022-01-17]) - }) == [ - "2022-01-17 00:00:00", - "2022-01-17 01:00:00", - "2022-01-17 02:00:00", - "2022-01-17 03:00:00", - "2022-01-17 04:00:00", - "2022-01-17 05:00:00", - "2022-01-17 06:00:00", - "2022-01-17 07:00:00", - "2022-01-17 08:00:00", - "2022-01-17 09:00:00", - "2022-01-17 10:00:00", - "2022-01-17 11:00:00", - "2022-01-17 12:00:00", - "2022-01-17 13:00:00", - "2022-01-17 14:00:00", - "2022-01-17 15:00:00", - "2022-01-17 16:00:00", - "2022-01-17 17:00:00", - "2022-01-17 18:00:00", - "2022-01-17 19:00:00", - "2022-01-17 20:00:00", - "2022-01-17 21:00:00", - "2022-01-17 22:00:00", - "2022-01-17 23:00:00" - ] - - assert time_labels(%{ - dimensions: ["time:hour"], - date_range: Date.range(~D[2022-01-17], ~D[2022-01-18]) - }) == [ - "2022-01-17 00:00:00", - "2022-01-17 01:00:00", - "2022-01-17 02:00:00", - "2022-01-17 03:00:00", - "2022-01-17 04:00:00", - "2022-01-17 05:00:00", - "2022-01-17 06:00:00", - "2022-01-17 07:00:00", - "2022-01-17 08:00:00", - "2022-01-17 09:00:00", - "2022-01-17 10:00:00", - "2022-01-17 11:00:00", - "2022-01-17 12:00:00", - "2022-01-17 13:00:00", - "2022-01-17 14:00:00", - "2022-01-17 15:00:00", - "2022-01-17 16:00:00", - "2022-01-17 17:00:00", - "2022-01-17 18:00:00", - "2022-01-17 19:00:00", - "2022-01-17 20:00:00", - "2022-01-17 21:00:00", - "2022-01-17 22:00:00", - "2022-01-17 23:00:00", - "2022-01-18 00:00:00", - "2022-01-18 01:00:00", - "2022-01-18 02:00:00", - "2022-01-18 03:00:00", - "2022-01-18 04:00:00", - "2022-01-18 05:00:00", - "2022-01-18 06:00:00", - "2022-01-18 07:00:00", - "2022-01-18 08:00:00", - "2022-01-18 09:00:00", - "2022-01-18 10:00:00", - "2022-01-18 11:00:00", - "2022-01-18 12:00:00", - "2022-01-18 13:00:00", - "2022-01-18 14:00:00", - "2022-01-18 15:00:00", - "2022-01-18 16:00:00", - "2022-01-18 17:00:00", - "2022-01-18 18:00:00", - "2022-01-18 19:00:00", - "2022-01-18 20:00:00", - "2022-01-18 21:00:00", - "2022-01-18 22:00:00", - "2022-01-18 23:00:00" - ] - end - end end diff --git a/test/plausible/stats/time_test.exs b/test/plausible/stats/time_test.exs new file mode 100644 index 000000000000..e368a0cbc56a --- /dev/null +++ b/test/plausible/stats/time_test.exs @@ -0,0 +1,157 @@ +defmodule Plausible.Stats.TimeTest do + use Plausible.DataCase, async: true + + import Plausible.Stats.Time + + describe "time_labels/1" do + test "with time:month dimension" do + assert time_labels(%{ + dimensions: ["visit:device", "time:month"], + date_range: Date.range(~D[2022-01-17], ~D[2022-02-01]) + }) == [ + "2022-01-01", + "2022-02-01" + ] + + assert time_labels(%{ + dimensions: ["visit:device", "time:month"], + date_range: Date.range(~D[2022-01-01], ~D[2022-03-07]) + }) == [ + "2022-01-01", + "2022-02-01", + "2022-03-01" + ] + end + + test "with time:week dimension" do + assert time_labels(%{ + dimensions: ["time:week"], + date_range: Date.range(~D[2020-12-20], ~D[2021-01-08]) + }) == [ + "2020-12-20", + "2020-12-21", + "2020-12-28", + "2021-01-04" + ] + + assert time_labels(%{ + dimensions: ["time:week"], + date_range: Date.range(~D[2020-12-21], ~D[2021-01-03]) + }) == [ + "2020-12-21", + "2020-12-28" + ] + end + + test "with time:day dimension" do + assert time_labels(%{ + dimensions: ["time:day"], + date_range: Date.range(~D[2022-01-17], ~D[2022-02-02]) + }) == [ + "2022-01-17", + "2022-01-18", + "2022-01-19", + "2022-01-20", + "2022-01-21", + "2022-01-22", + "2022-01-23", + "2022-01-24", + "2022-01-25", + "2022-01-26", + "2022-01-27", + "2022-01-28", + "2022-01-29", + "2022-01-30", + "2022-01-31", + "2022-02-01", + "2022-02-02" + ] + end + + test "with time:hour dimension" do + assert time_labels(%{ + dimensions: ["time:hour"], + date_range: Date.range(~D[2022-01-17], ~D[2022-01-17]) + }) == [ + "2022-01-17 00:00:00", + "2022-01-17 01:00:00", + "2022-01-17 02:00:00", + "2022-01-17 03:00:00", + "2022-01-17 04:00:00", + "2022-01-17 05:00:00", + "2022-01-17 06:00:00", + "2022-01-17 07:00:00", + "2022-01-17 08:00:00", + "2022-01-17 09:00:00", + "2022-01-17 10:00:00", + "2022-01-17 11:00:00", + "2022-01-17 12:00:00", + "2022-01-17 13:00:00", + "2022-01-17 14:00:00", + "2022-01-17 15:00:00", + "2022-01-17 16:00:00", + "2022-01-17 17:00:00", + "2022-01-17 18:00:00", + "2022-01-17 19:00:00", + "2022-01-17 20:00:00", + "2022-01-17 21:00:00", + "2022-01-17 22:00:00", + "2022-01-17 23:00:00" + ] + + assert time_labels(%{ + dimensions: ["time:hour"], + date_range: Date.range(~D[2022-01-17], ~D[2022-01-18]) + }) == [ + "2022-01-17 00:00:00", + "2022-01-17 01:00:00", + "2022-01-17 02:00:00", + "2022-01-17 03:00:00", + "2022-01-17 04:00:00", + "2022-01-17 05:00:00", + "2022-01-17 06:00:00", + "2022-01-17 07:00:00", + "2022-01-17 08:00:00", + "2022-01-17 09:00:00", + "2022-01-17 10:00:00", + "2022-01-17 11:00:00", + "2022-01-17 12:00:00", + "2022-01-17 13:00:00", + "2022-01-17 14:00:00", + "2022-01-17 15:00:00", + "2022-01-17 16:00:00", + "2022-01-17 17:00:00", + "2022-01-17 18:00:00", + "2022-01-17 19:00:00", + "2022-01-17 20:00:00", + "2022-01-17 21:00:00", + "2022-01-17 22:00:00", + "2022-01-17 23:00:00", + "2022-01-18 00:00:00", + "2022-01-18 01:00:00", + "2022-01-18 02:00:00", + "2022-01-18 03:00:00", + "2022-01-18 04:00:00", + "2022-01-18 05:00:00", + "2022-01-18 06:00:00", + "2022-01-18 07:00:00", + "2022-01-18 08:00:00", + "2022-01-18 09:00:00", + "2022-01-18 10:00:00", + "2022-01-18 11:00:00", + "2022-01-18 12:00:00", + "2022-01-18 13:00:00", + "2022-01-18 14:00:00", + "2022-01-18 15:00:00", + "2022-01-18 16:00:00", + "2022-01-18 17:00:00", + "2022-01-18 18:00:00", + "2022-01-18 19:00:00", + "2022-01-18 20:00:00", + "2022-01-18 21:00:00", + "2022-01-18 22:00:00", + "2022-01-18 23:00:00" + ] + end + end +end