Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions config/global.ini.php
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,11 @@
; maximum number of rows for the Products reports
datatable_archiving_maximum_rows_products = 10000

; maximum number of AI Assistants listed in Bot Tracking reports
datatable_archiving_maximum_rows_bots = 250
; maximum number of page/document rows listed per AI Assistant in Bot Tracking reports
datatable_archiving_maximum_rows_subtable_bots = 250

; maximum number of rows for other tables (Providers, User settings configurations)
datatable_archiving_maximum_rows_standard = 500

Expand Down
11 changes: 7 additions & 4 deletions core/ArchiveProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,8 @@ public function getLogAggregator()
* @param array $columnsToRenameAfterAggregation Columns mapped to new names for columns that must change names
* when summed because they cannot be summed, eg,
* `array('nb_uniq_visitors' => 'sum_daily_nb_uniq_visitors')`.
* @param bool|array $countRowsRecursive if set to true, will calculate the recursive rows count for all record names
* which makes it slower. If you only need it for some records pass an array of
* recordNames that defines for which ones you need a recursive row count.
* @param string[]|bool $countRowsRecursive array of recordNames that defines for which ones you need a recursive row count, or true if it should be done for all
* @param string[] $countLeafRows array of recordNames that defines for which ones you need a leaf row count.
* @return array Returns the row counts of each aggregated report before truncation, eg,
*
* array(
Expand All @@ -213,7 +212,8 @@ public function aggregateDataTableRecords(
$defaultColumnToSortByBeforeTruncation = null,
&$columnsAggregationOperation = null,
$columnsToRenameAfterAggregation = null,
$countRowsRecursive = true
$countRowsRecursive = true,
array $countLeafRows = []
) {
/** @var LoggerInterface $logger */
$logger = StaticContainer::get(LoggerInterface::class);
Expand All @@ -239,6 +239,9 @@ public function aggregateDataTableRecords(
if ($countRowsRecursive === true || (is_array($countRowsRecursive) && in_array($recordName, $countRowsRecursive))) {
$nameToCount[$recordName]['recursive'] = $table->getRowsCountRecursive();
}
if (in_array($recordName, $countLeafRows)) {
$nameToCount[$recordName]['leafs'] = $table->getLeafRowsCount();
}

$columnToSortByBeforeTruncation = $defaultColumnToSortByBeforeTruncation;
if (empty($columnToSortByBeforeTruncation)) {
Expand Down
20 changes: 20 additions & 0 deletions core/ArchiveProcessor/Record.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ class Record
*/
private $countOfRecordNameIsRecursive = false;

/**
* @var bool
*/
private $countOfRecordNameIsForLeafs = false;

/**
* @var array|null
*/
Expand Down Expand Up @@ -200,6 +205,13 @@ public function setIsCountOfBlobRecordRows(string $dependentRecordName, bool $is
return $this;
}

public function setIsCountOfBlobRecordLeafRows(string $dependentRecordName): Record
{
$this->countOfRecordName = $dependentRecordName;
$this->countOfRecordNameIsForLeafs = true;
return $this;
}

/**
* @return string|null
*/
Expand All @@ -216,6 +228,14 @@ public function getCountOfRecordNameIsRecursive(): bool
return $this->countOfRecordNameIsRecursive;
}

/**
* @return bool
*/
public function getCountOfRecordNameIsForLeafs(): bool
{
return $this->countOfRecordNameIsForLeafs;
}

/**
* @param array|null $columnToRenameAfterAggregation
* @return Record
Expand Down
27 changes: 17 additions & 10 deletions core/ArchiveProcessor/RecordBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
abstract class RecordBuilder
{
/**
* @var int
* @var int|null
*/
protected $maxRowsInTable;

/**
* @var int
* @var int|null
*/
protected $maxRowsInSubtable;

Expand All @@ -40,7 +40,7 @@ abstract class RecordBuilder
protected $columnAggregationOps;

/**
* @var array|null
* @var array<string|int,string|int>|null
*/
protected $columnToRenameAfterAggregation;

Expand All @@ -49,6 +49,7 @@ abstract class RecordBuilder
* @param int|null $maxRowsInSubtable
* @param string|null $columnToSortByBeforeTruncation
* @param array|null $columnAggregationOps
* @param array<string|int,string|int>|null $columnToRenameAfterAggregation
*/
public function __construct(
?int $maxRowsInTable = null,
Expand Down Expand Up @@ -190,15 +191,18 @@ public function buildForNonDayPeriod(ArchiveProcessor $archiveProcessor): void
$columnToRenameAfterAggregation = $record->getColumnToRenameAfterAggregation() ?? $this->columnToRenameAfterAggregation;
$columnAggregationOps = $record->getBlobColumnAggregationOps() ?? $this->columnAggregationOps;

// only do recursive row count if there is a numeric record that depends on it
$countRecursiveRows = false;
// only do recursive row counts if there is a numeric record that depends on it
$countRecursiveRows = $countLeafRows = [];
foreach ($numericRecords as $numeric) {
if (
$numeric->getCountOfRecordName() == $record->getName()
&& $numeric->getCountOfRecordNameIsRecursive()
) {
$countRecursiveRows = true;
break;
if ($numeric->getCountOfRecordNameIsRecursive()) {
$countRecursiveRows[] = $numeric->getCountOfRecordName();
}
if ($numeric->getCountOfRecordNameIsForLeafs()) {
$countLeafRows[] = $numeric->getCountOfRecordName();
}
}
}

Expand All @@ -209,7 +213,8 @@ public function buildForNonDayPeriod(ArchiveProcessor $archiveProcessor): void
$columnToSortByBeforeTruncation,
$columnAggregationOps,
$columnToRenameAfterAggregation,
$countRecursiveRows
$countRecursiveRows,
$countLeafRows
);

$aggregatedCounts = array_merge($aggregatedCounts, $counts);
Expand Down Expand Up @@ -250,7 +255,9 @@ public function buildForNonDayPeriod(ArchiveProcessor $archiveProcessor): void

$count = $aggregatedCounts[$dependentRecordName];

if ($record->getCountOfRecordNameIsRecursive()) {
if ($record->getCountOfRecordNameIsForLeafs()) {
$recordCountMetricValues[$record->getName()] = $count['leafs'];
} elseif ($record->getCountOfRecordNameIsRecursive()) {
$recordCountMetricValues[$record->getName()] = $count['recursive'];
} else {
$recordCountMetricValues[$record->getName()] = $count['level0'];
Expand Down
28 changes: 24 additions & 4 deletions core/DataTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,26 @@ public function getRowsCountRecursive()
return $totalCount;
}

/**
* Returns the number of leaf rows in the entire DataTable hierarchy. Only rows that do not contain a subtables are counted
*
* @return int
*/
public function getLeafRowsCount()
{
$totalCount = 0;
foreach ($this->rows as $row) {
$subTable = $row->getSubtable();
if ($subTable) {
$totalCount += $subTable->getLeafRowsCount();
} else {
$totalCount++;
}
}

return $totalCount;
}

/**
* Delete a column by name in every row. This change is NOT applied recursively to all
* subtables.
Expand Down Expand Up @@ -1839,10 +1859,10 @@ public function setMaximumAllowedRows($maximumAllowedRows)
* created for path labels that cannot be found.
* @param int $maxSubtableRows The maximum number of allowed rows in new subtables. New
* subtables are only created if `$missingRowColumns` is provided.
* @return array First element is the found row or `false`. Second element is
* the number of path segments walked. If a row is found, this
* will be == to `count($path)`. Otherwise, it will be the index
* of the path segment that we could not find.
* @return array{0: false|Row, 1: int} First element is the found row or `false`. Second element is
* the number of path segments walked. If a row is found, this
* will be == to `count($path)`. Otherwise, it will be the index
* of the path segment that we could not find.
*/
public function walkPath($path, $missingRowColumns = false, $maxSubtableRows = 0)
{
Expand Down
3 changes: 0 additions & 3 deletions plugins/AIAgents/lang/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
"AIAgentVisits": "AI Agent Visits",
"AIAgentVisitsDocumentation": "This is an overview of AI Agent visits. AI agent traffic only includes visits where an AI opens your site in a real browser and behaves like a human visitor. It does not include background requests from ChatGPT or other AI tools that fetch your pages without using a browser.",
"AIAgentsOverviewSubcategoryDescription": "Review how AI agents and human visitors engage with your site at a glance. This overview surfaces combined metrics and trends so you can quickly spot changes before exploring detailed reports.",
"AIAssistantsOverviewHelp1": "The AI Assistant Overview page provide insights into website traffic originating from AI Assistants such as ChatGPT and other large language model–based assistants. These reports track key metrics including the number of requests made by these bots, the pages and documents they access, and any errors encountered. They also offer detailed breakdowns showing which bots visit specific page URLs, helping you understand how AI assistants interact with your content and identify opportunities to improve visibility and accessibility for AI-driven users.",
"AIAssistantsOverviewHelp2": "It’s important to note that none of these pages were actually viewed by humans in the traditional way — all requests originate from AI assistants fetching content automatically.",
"AIAssistantsOverviewHelp3": "Currently, these reports exclusively include requests from AI bots that do not execute JavaScript. They do not include traffic from AI crawlers used for training AI models or from AI agents capable of executing JavaScript.",
"ColumnAIAgentActions": "Actions by AI Agent Visits",
"ColumnAIAgentAverageVisitDuration": "Avg. Duration of an AI Agent Visit (in sec)",
"ColumnAIAgentAvgActionsPerVisit": "Avg. Actions per AI Agent Visit",
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
119 changes: 119 additions & 0 deletions plugins/BotTracking/API.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
<?php

/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/

declare(strict_types=1);

namespace Piwik\Plugins\BotTracking;

use Piwik\Archive;
use Piwik\DataTable;
use Piwik\DataTable\DataTableInterface;
use Piwik\DataTable\Filter\ColumnDelete;
use Piwik\Piwik;

class API extends \Piwik\Plugin\API
{
/**
* @param string|int|int[] $idSite
* @param null|string|string[] $columns
*/
public function get($idSite, string $period, string $date, $columns = null): DataTableInterface
{
Piwik::checkUserHasViewAccess($idSite);

$archive = Archive::build($idSite, $period, $date, '');

$metrics = Metrics::getReportMetricColumns();

if ($period !== 'day') {
$metrics = array_filter($metrics, function ($metric) {
return !in_array($metric, [Metrics::METRIC_AI_ASSISTANTS_UNIQUE_DOCUMENT_URLS, Metrics::METRIC_AI_ASSISTANTS_UNIQUE_PAGE_URLS]);
});
}

$dataTable = $archive->getDataTableFromNumeric($metrics);

$this->filterColumns($dataTable, $columns);

return $dataTable;
}

/**
* Returns a report about AI assistants crawling your site and how many hits each one generates. Depending on the provided secondary dimension
* the subtable will either contain all requested page urls or document urls.
*
* @param string|int|int[] $idSite
* @param null|'pages'|'documents' $secondaryDimension can be either `pages` (default) or `documents`
* @return DataTable|DataTable\Map
*/
public function getAIAssistantRequests($idSite, string $period, string $date, bool $expanded = false, bool $flat = false, ?string $secondaryDimension = null): DataTableInterface
{
Piwik::checkUserHasViewAccess($idSite);

$archiveName = Archiver::AI_ASSISTANTS_PAGES_RECORD;

if ($secondaryDimension === 'documents') {
$archiveName = Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD;
}

$dataTable = Archive::createDataTableFromArchive($archiveName, $idSite, $period, $date, '', $expanded, $flat);

// When flattening a report, remove all main table rows, where no subtable exists
if ($flat) {
$dataTable->filter(function (DataTable $table) {
foreach ($table->getRows() as $key => $row) {
if (!$row->getIdSubDataTable()) {
$table->deleteRow($key);
}
}
});
}

return $dataTable;
}

/**
* @param string|int|int[] $idSite
* @return DataTable|DataTable\Map
*/
public function getPageUrlsForAIAssistant($idSite, string $period, string $date, int $idSubtable): DataTableInterface
{
Piwik::checkUserHasViewAccess($idSite);

return Archive::createDataTableFromArchive(Archiver::AI_ASSISTANTS_PAGES_RECORD, $idSite, $period, $date, '', false, false, $idSubtable);
}

/**
* @param string|int|int[] $idSite
* @return DataTable|DataTable\Map
*/
public function getDocumentUrlsForAIAssistant($idSite, string $period, string $date, int $idSubtable): DataTableInterface
{
Piwik::checkUserHasViewAccess($idSite);

return Archive::createDataTableFromArchive(Archiver::AI_ASSISTANTS_DOCUMENTS_RECORD, $idSite, $period, $date, '', false, false, $idSubtable);
}

/**
* @param null|string|string[] $columns
*/
private function filterColumns(DataTableInterface $table, $columns): void
{
if (empty($columns)) {
return;
}

$columnsToKeep = Piwik::getArrayFromApiParameter($columns);
if (empty($columnsToKeep)) {
return;
}

$table->filter(ColumnDelete::class, [[], $columnsToKeep]);
}
}
25 changes: 25 additions & 0 deletions plugins/BotTracking/Archiver.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?php

/**
* Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license https://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*/

declare(strict_types=1);

namespace Piwik\Plugins\BotTracking;

class Archiver extends \Piwik\Plugin\Archiver
{
public const AI_ASSISTANTS_PAGES_RECORD = 'BotTracking_AIAssistantsPages';
public const AI_ASSISTANTS_DOCUMENTS_RECORD = 'BotTracking_AIAssistantsDocuments';
public const AI_ASSISTANTS_REQUESTED_PAGES_RECORD = 'BotTracking_AIAssistantsRequestedPages';
public const AI_ASSISTANTS_REQUESTED_DOCUMENTS_RECORD = 'BotTracking_AIAssistantsRequestedDocuments';

public static function shouldRunEvenWhenNoVisits(): bool
{
return true;
}
}
16 changes: 9 additions & 7 deletions plugins/BotTracking/BotDetector.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
*/
class BotDetector
{
public const BOT_TYPE_AI_ASSISTANT = 'ai_assistant';

/** @var null|array{bot_name: string, bot_type: string} */
private $detectionResult;

Expand All @@ -25,13 +27,13 @@ class BotDetector
* @var array<string, string>
*/
private $aiAssistantPatterns = [
'ChatGPT-User' => 'ai_assistant',
'MistralAI-User' => 'ai_assistant',
'Gemini-Deep-Research' => 'ai_assistant',
'Claude-User' => 'ai_assistant',
'Perplexity-User' => 'ai_assistant',
'Google-NotebookLM' => 'ai_assistant',
'Devin' => 'ai_assistant',
'ChatGPT-User' => self::BOT_TYPE_AI_ASSISTANT,
'MistralAI-User' => self::BOT_TYPE_AI_ASSISTANT,
'Gemini-Deep-Research' => self::BOT_TYPE_AI_ASSISTANT,
'Claude-User' => self::BOT_TYPE_AI_ASSISTANT,
'Perplexity-User' => self::BOT_TYPE_AI_ASSISTANT,
'Google-NotebookLM' => self::BOT_TYPE_AI_ASSISTANT,
'Devin' => self::BOT_TYPE_AI_ASSISTANT,
];

public function __construct(string $userAgent)
Expand Down
Loading
Loading