Skip to content

Commit

Permalink
Snippet integration verification (#4106)
Browse files Browse the repository at this point in the history
* Allow running browserless.io locally

* Compile tailwind classes based on extra/ too

* Add browserless runtime configuration

* Ignore verification events on ingestion

* Improve extracting HTML text in tests

* Update dependencies

- Floki will be used on production to parse site contents
- Req will be used to handle redundant stuff like retrying etc.

* Add shuttle SVG to generic components

Later on we'll use it to indicate verification errors

* Connect live socket & allow skipping awaiting the first pageview

* Connect live socket in general settings

* Implement verification checks & diagnostics

* Stub remote services with Req for testing

* Change snippet screen copy

* Update tracker script, so that:

1. headless browsers aren't ignored if `window.__plausible` is defined
2. callback optionally supplies the event response HTTP status

This will be later used to check whether the server acknowledged
the verification event.

* Implement LiveView verification UI

* Embed the verification UIs into settings and onboarding

* Implement browserless puppeteer verification script

It:
 - tries to visit the site
 - defines window.__plausible, so the tracker doesn't ignore test events
 - sends a verification event and instruments the callback
 - awaits the callback to fire and returns the result

* Improve diagnostics for CSP

Only report CSP error if the snippet is already found

* Put verification behind a feature flag/env setting

* Contact Us hint only for Enterprise Edition

* For headless code, use JS context instead of EEx interpolation

* Update diagnostics test with WordPress scenarios

* Shorten exception/throw interception

* Rename test

* Tidy up

* Bust URL always on headless check

* Update moduledoc

* Detect official Plausible WordPress Plugin

and act accordingly on diagnostics interoperation

* Stop using 'rating' in favour of 'interpretation'

* Only report CSP error if no proxy is likely

* Update CHANGELOG

* Allow event-* attributes on snippet elements

* Improve naive GTM detection, not to confuse it with GA4

* Update lib/plausible/verification.ex

Co-authored-by: Adrian Gruntkowski <[email protected]>

* Update test/plausible/site/verification/checks_test.exs

Co-authored-by: Adrian Gruntkowski <[email protected]>

* s/perform_wrapped/perform_safe

* Update lib/plausible/verification/checks/installation.ex

Co-authored-by: Adrian Gruntkowski <[email protected]>

* Remove garbage

---------

Co-authored-by: Adrian Gruntkowski <[email protected]>
  • Loading branch information
aerosol and zoldar authored May 23, 2024
1 parent 5881f1c commit c81cb16
Show file tree
Hide file tree
Showing 44 changed files with 2,838 additions and 34 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ All notable changes to this project will be documented in this file.

### Added

- Snippet integration verification

### Removed

### Changed
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ postgres-prod: ## Start a container with the same version of postgres as the one
postgres-stop: ## Stop and remove the postgres container
docker stop plausible_db && docker rm plausible_db

browserless:
docker run -e "TOKEN=dummy_token" -p 3000:3000 --network host ghcr.io/browserless/chromium

minio: ## Start a transient container with a recent version of minio (s3)
docker run -d --rm -p 10000:10000 -p 10001:10001 --name plausible_minio minio/minio server /data --address ":10000" --console-address ":10001"
while ! docker exec plausible_minio mc alias set local http://localhost:10000 minioadmin minioadmin; do sleep 1; done
Expand Down
4 changes: 3 additions & 1 deletion assets/tailwind.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ module.exports = {
content: [
"./js/**/*.js",
"../lib/*_web.ex",
"../lib/*_web/**/*.*ex"
"../lib/*_web/**/*.*ex",
"../extra/*_web.ex",
"../extra/*_web/**/*.*ex"
],
safelist: [
// PlausibleWeb.StatsView.stats_container_class/1 uses this class
Expand Down
2 changes: 2 additions & 0 deletions config/.env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,5 @@ S3_REGION=us-east-1
S3_ENDPOINT=http://localhost:10000
S3_EXPORTS_BUCKET=dev-exports
S3_IMPORTS_BUCKET=dev-imports

VERIFICATION_ENABLED=true
9 changes: 9 additions & 0 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,15 @@ config :plausible, Plausible.PromEx,
grafana: :disabled,
metrics_server: :disabled

config :plausible, Plausible.Verification,
enabled?:
get_var_from_path_or_env(config_dir, "VERIFICATION_ENABLED", "false")
|> String.to_existing_atom()

config :plausible, Plausible.Verification.Checks.Installation,
token: get_var_from_path_or_env(config_dir, "BROWSERLESS_TOKEN", "dummy_token"),
endpoint: get_var_from_path_or_env(config_dir, "BROWSERLESS_ENDPOINT", "http://0.0.0.0:3000")

if not is_selfhost do
site_default_ingest_threshold =
case System.get_env("SITE_DEFAULT_INGEST_THRESHOLD") do
Expand Down
10 changes: 10 additions & 0 deletions config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,13 @@ config :ex_money, api_module: Plausible.ExchangeRateMock
config :plausible, Plausible.Ingestion.Counters, enabled: false

config :plausible, Oban, testing: :manual

config :plausible, Plausible.Verification.Checks.FetchBody,
req_opts: [
plug: {Req.Test, Plausible.Verification.Checks.FetchBody}
]

config :plausible, Plausible.Verification.Checks.Installation,
req_opts: [
plug: {Req.Test, Plausible.Verification.Checks.Installation}
]
3 changes: 2 additions & 1 deletion extra/lib/plausible/ingestion/event/revenue.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ defmodule Plausible.Ingestion.Event.Revenue do
}

matching_goal.currency != revenue_source.currency ->
converted = Money.to_currency!(revenue_source, matching_goal.currency)
converted =
Money.to_currency!(revenue_source, matching_goal.currency)

%{
revenue_source_amount: Money.to_decimal(revenue_source),
Expand Down
14 changes: 14 additions & 0 deletions lib/plausible/ingestion/event.ex
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ defmodule Plausible.Ingestion.Event do
salts: nil,
changeset: nil

@verification_user_agent Plausible.Verification.user_agent()

@type drop_reason() ::
:bot
| :spam_referrer
Expand All @@ -31,6 +33,7 @@ defmodule Plausible.Ingestion.Event do
| :site_country_blocklist
| :site_page_blocklist
| :site_hostname_allowlist
| :verification_agent

@type t() :: %__MODULE__{
domain: String.t() | nil,
Expand Down Expand Up @@ -104,6 +107,7 @@ defmodule Plausible.Ingestion.Event do

defp pipeline() do
[
drop_verification_agent: &drop_verification_agent/1,
drop_datacenter_ip: &drop_datacenter_ip/1,
drop_shield_rule_hostname: &drop_shield_rule_hostname/1,
drop_shield_rule_page: &drop_shield_rule_page/1,
Expand Down Expand Up @@ -167,6 +171,16 @@ defmodule Plausible.Ingestion.Event do
struct!(event, clickhouse_session_attrs: Map.merge(event.clickhouse_session_attrs, attrs))
end

defp drop_verification_agent(%__MODULE__{} = event) do
case event.request.user_agent do
@verification_user_agent ->
drop(event, :verification_agent)

_ ->
event
end
end

defp drop_datacenter_ip(%__MODULE__{} = event) do
case event.request.ip_classification do
"dc_ip" ->
Expand Down
26 changes: 26 additions & 0 deletions lib/plausible/verification.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
defmodule Plausible.Verification do
@moduledoc """
Module defining the user-agent used for site verification.
"""
use Plausible

@feature_flag :verification

def enabled?(user) do
enabled_via_config? =
:plausible |> Application.fetch_env!(__MODULE__) |> Keyword.fetch!(:enabled?)

enabled_for_user? = not is_nil(user) and FunWithFlags.enabled?(@feature_flag, for: user)
enabled_via_config? or enabled_for_user?
end

on_ee do
def user_agent() do
"Plausible Verification Agent - if abused, contact [email protected]"
end
else
def user_agent() do
"Plausible Community Edition"
end
end
end
37 changes: 37 additions & 0 deletions lib/plausible/verification/check.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
defmodule Plausible.Verification.Check do
@moduledoc """
Behaviour to be implemented by specific site verification checks.
`friendly_name()` doesn't necessarily reflect the actual check description,
it serves as a user-facing message grouping mechanism, to prevent frequent message flashing when checks rotate often.
Each check operates on `state()` and is expected to return it, optionally modified, by all means.
`perform_safe/1` is used to guarantee no exceptions are thrown by faulty implementations, not to interrupt LiveView.
"""
@type state() :: Plausible.Verification.State.t()
@callback friendly_name() :: String.t()
@callback perform(state()) :: state()

defmacro __using__(_) do
quote do
import Plausible.Verification.State

alias Plausible.Verification.Checks
alias Plausible.Verification.State
alias Plausible.Verification.Diagnostics

require Logger

@behaviour Plausible.Verification.Check

def perform_safe(state) do
perform(state)
catch
_, e ->
Logger.error(
"Error running check #{inspect(__MODULE__)} on #{state.url}: #{inspect(e)}"
)

put_diagnostics(state, service_error: true)
end
end
end
end
75 changes: 75 additions & 0 deletions lib/plausible/verification/checks.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
defmodule Plausible.Verification.Checks do
@moduledoc """
Checks that are performed during site verification.
Each module defined in `@checks` implements the `Plausible.Verification.Check` behaviour.
Checks are normally run asynchronously, except when synchronous execution is optionally required
for tests. Slowdowns can be optionally added, the user doesn't benefit from running the checks too quickly.
In async execution, each check notifies the caller by sending a message to it.
"""
alias Plausible.Verification.Checks
alias Plausible.Verification.State

require Logger

@checks [
Checks.FetchBody,
Checks.CSP,
Checks.ScanBody,
Checks.Snippet,
Checks.SnippetCacheBust,
Checks.Installation
]

def run(url, data_domain, opts \\ []) do
checks = Keyword.get(opts, :checks, @checks)
report_to = Keyword.get(opts, :report_to, self())
async? = Keyword.get(opts, :async?, true)
slowdown = Keyword.get(opts, :slowdown, 500)

if async? do
Task.start_link(fn -> do_run(url, data_domain, checks, report_to, slowdown) end)
else
do_run(url, data_domain, checks, report_to, slowdown)
end
end

def interpret_diagnostics(%State{} = state) do
Plausible.Verification.Diagnostics.interpret(state.diagnostics, state.url)
end

defp do_run(url, data_domain, checks, report_to, slowdown) do
init_state = %State{url: url, data_domain: data_domain, report_to: report_to}

state =
Enum.reduce(
checks,
init_state,
fn check, state ->
state
|> notify_start(check, slowdown)
|> check.perform_safe()
end
)

notify_verification_end(state, slowdown)
end

defp notify_start(state, check, slowdown) do
if is_pid(state.report_to) do
if is_integer(slowdown) and slowdown > 0, do: :timer.sleep(slowdown)
send(state.report_to, {:verification_check_start, {check, state}})
end

state
end

defp notify_verification_end(state, slowdown) do
if is_pid(state.report_to) do
if is_integer(slowdown) and slowdown > 0, do: :timer.sleep(slowdown)
send(state.report_to, {:verification_end, state})
end

state
end
end
34 changes: 34 additions & 0 deletions lib/plausible/verification/checks/csp.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
defmodule Plausible.Verification.Checks.CSP do
@moduledoc """
Scans the Content Security Policy header to ensure that the Plausible domain is allowed.
See `Plausible.Verification.Checks` for the execution sequence.
"""
use Plausible.Verification.Check

@impl true
def friendly_name, do: "We're visiting your site to ensure that everything is working correctly"

@impl true
def perform(%State{assigns: %{headers: headers}} = state) do
case headers["content-security-policy"] do
[policy] ->
directives = String.split(policy, ";")

allowed? =
Enum.any?(directives, fn directive ->
String.contains?(directive, PlausibleWeb.Endpoint.host())
end)

if allowed? do
state
else
put_diagnostics(state, disallowed_via_csp?: true)
end

_ ->
state
end
end

def perform(state), do: state
end
64 changes: 64 additions & 0 deletions lib/plausible/verification/checks/fetch_body.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
defmodule Plausible.Verification.Checks.FetchBody do
@moduledoc """
Fetches the body of the site and extracts the HTML document, if available, for
further processing.
See `Plausible.Verification.Checks` for the execution sequence.
"""
use Plausible.Verification.Check

@impl true
def friendly_name, do: "We're visiting your site to ensure that everything is working correctly"

@impl true
def perform(%State{url: "https://" <> _ = url} = state) do
fetch_body_opts = Application.get_env(:plausible, __MODULE__)[:req_opts] || []

opts =
Keyword.merge(
[
base_url: url,
max_redirects: 2,
connect_options: [timeout: 4_000],
receive_timeout: 4_000,
max_retries: 3,
retry_log_level: :warning
],
fetch_body_opts
)

req = Req.new(opts)

case Req.get(req) do
{:ok, %Req.Response{status: status, body: body} = response}
when is_binary(body) and status in 200..299 ->
extract_document(state, response)

_ ->
state
end
end

defp extract_document(state, response) when byte_size(response.body) <= 500_000 do
with true <- html?(response),
{:ok, document} <- Floki.parse_document(response.body) do
state
|> assign(raw_body: response.body, document: document, headers: response.headers)
|> put_diagnostics(body_fetched?: true)
else
_ ->
state
end
end

defp extract_document(state, response) when byte_size(response.body) > 500_000 do
state
end

defp html?(%Req.Response{headers: headers}) do
headers
|> Map.get("content-type", "")
|> List.wrap()
|> List.first()
|> String.contains?("text/html")
end
end
Loading

0 comments on commit c81cb16

Please sign in to comment.