Skip to content

Commit

Permalink
Support querying quarry_p database
Browse files Browse the repository at this point in the history
quarry_p is a new database containing read-only views mirroring 4 tables
from Quarry's own database:
- query
- query_revision
- query_run
- star

The use case for this view is described in T367415: doing stats on query
execution times, and on which tables are being queried.

These views should not expose any information that is not already
visible from the public Quarry web interface. The "user" and
"user_group" table do not have corresponding views, to preserve the
privacy of users that have logged in to Quarry but did not create any
queries.

Bug: T367415
  • Loading branch information
dhinus committed Aug 20, 2024
1 parent edfb685 commit 0008b70
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 6 deletions.
Binary file modified helm-quarry/prod-config.yaml
Binary file not shown.
4 changes: 2 additions & 2 deletions helm-quarry/values.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
web:
repository: 'quay.io/wikimedia-quarry/quarry'
tag: pr-62 # web tag managed by github actions
tag: pr-61 # web tag managed by github actions

worker:
repository: 'quay.io/wikimedia-quarry/quarry'
tag: pr-62 # worker tag managed by github actions
tag: pr-61 # worker tag managed by github actions
5 changes: 5 additions & 0 deletions quarry/default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ TOOLS_DB_PORT: 3306
TOOLS_DB_USER: ''
TOOLS_DB_PASSWORD: ''

QUARRY_P_HOST: 'db'
QUARRY_P_PORT: 3306
QUARRY_P_USER: ''
QUARRY_P_PASSWORD: ''

OUTPUT_PATH_TEMPLATE: '/results/%s/%s/%s.sqlite'
REDIS_HOST: 'redis'
REDIS_PORT: 6379
Expand Down
13 changes: 12 additions & 1 deletion quarry/web/replica.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,17 @@ def __init__(self, config):

def _db_name_mangler(self):
self.is_tools_db = False
self.is_quarry_p = False
if self.dbname == "":
raise ReplicaConnectionException(
"Attempting connection before a database is selected"
)
if "__" in self.dbname and self.dbname.endswith("_p"):
self.is_tools_db = True
self.database_p = self.dbname
elif self.dbname == "quarry" or self.dbname == "quarry_p":
self.is_quarry_p = True
self.database_p = "quarry_p"
elif self.dbname == "meta" or self.dbname == "meta_p":
self.database_name = "s7"
self.database_p = "meta_p"
Expand All @@ -41,6 +45,8 @@ def _db_name_mangler(self):
def get_host_name(self):
if self.is_tools_db:
return self.config["TOOLS_DB_HOST"]
if self.is_quarry_p:
return self.config["DB_HOST"]
if self.config["REPLICA_DOMAIN"]:
return f"{self.database_name}.{self.config['REPLICA_DOMAIN']}"
return self.database_name
Expand All @@ -62,7 +68,12 @@ def connection(self, db):
self.dbname = db
self._db_name_mangler()
host = self.get_host_name()
conf_prefix = "TOOLS_DB" if self.is_tools_db else "REPLICA"
if self.is_tools_db:
conf_prefix = "TOOLS_DB"
elif self.is_quarry_p:
conf_prefix = "QUARRY_P"
else:
conf_prefix = "REPLICA"
port = self.config[f"{conf_prefix}_PORT"]
connect_opts = {
"db": self.database_p,
Expand Down
2 changes: 1 addition & 1 deletion quarry/web/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


VALID_DB_NAMES = re.compile(
r"^(?:(?:(?:centralauth|meta|[0-9a-z_]*wik[a-z]+)(?:_p)?)|quarry|s\d+__\w+_p)$"
r"^(?:(?:(?:centralauth|meta|[0-9a-z_]*wik[a-z]+)(?:_p)?)|quarry(?:_p)?|s\d+__\w+_p)$"
)


Expand Down
9 changes: 7 additions & 2 deletions quarry/web/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,13 @@


def get_replag(cur):
cur.execute("SELECT lag FROM heartbeat_p.heartbeat;")
return int(cur.fetchall()[0][0])
cur.execute("SELECT * FROM information_schema.tables WHERE table_schema='heartbeat_p' and table_name='heartbeat';")
if cur.rowcount:
cur.execute("SELECT lag FROM heartbeat_p.heartbeat;")
return int(cur.fetchall()[0][0])
else:
# there is not a heartbeat table on this database
return 0


@worker_process_init.connect
Expand Down
6 changes: 6 additions & 0 deletions schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,9 @@ CREATE TABLE IF NOT EXISTS star(
CREATE INDEX IF NOT EXISTS star_user_id_index ON star(user_id);
CREATE INDEX IF NOT EXISTS star_query_id_index ON star(query_id);
CREATE UNIQUE INDEX IF NOT EXISTS star_user_query_index ON star(user_id, query_id);

CREATE DATABASE IF NOT EXISTS quarry_p CHARACTER SET utf8;
CREATE VIEW IF NOT EXISTS quarry_p.query AS SELECT * FROM quarry.query;
CREATE VIEW IF NOT EXISTS quarry_p.query_revision AS SELECT * FROM quarry.query_revision;
CREATE VIEW IF NOT EXISTS quarry_p.query_run AS SELECT * FROM quarry.query_run;
CREATE VIEW IF NOT EXISTS quarry_p.star AS SELECT * FROM quarry.star;

0 comments on commit 0008b70

Please sign in to comment.