Skip to content

Commit

Permalink
chore: add the tutorial of lauching a HTAP cluster based on PostgreSQ…
Browse files Browse the repository at this point in the history
…L, MyDuck server and PGPool (#185)

* fix: intercept the queries on pg_catalog and return mock data (#182)

* chore: add HTAP tutorial for pg protocol on MyDuck

* fix: add mock paramter status message packages to be compatible with clients like pgpool (#183)

* chore: do not set password for PostgreSQL in HTAP tutorial

* doc: add the tutorial of lauching a HTAP cluster based on PostgreSQL, MyDuck server and PGPool

* doc: add content

* adopt CR feedback
  • Loading branch information
VWagen1989 authored Nov 20, 2024
1 parent 2fe2999 commit b17d01f
Show file tree
Hide file tree
Showing 20 changed files with 507 additions and 29 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ MyDuck Server supports setting up replicas from common cloud-based MySQL & Postg

### HTAP Setup

With MyDuck's powerful analytics capabilities, you can create an hybrid transactional/analytical processing system where high-frequency data writes are directed to a standard MySQL or Postgres instance, while analytical queries are handled by a MyDuck Server instance. Follow our HTAP setup instructions based on [ProxySQL](docs/tutorial/htap-proxysql-setup.md) or [MariaDB MaxScale](docs/tutorial/htap-maxscale-setup.md) to easily set up an HTAP demonstration.
With MyDuck's powerful analytics capabilities, you can create an hybrid transactional/analytical processing system where high-frequency data writes are directed to a standard MySQL or Postgres instance, while analytical queries are handled by a MyDuck Server instance. Follow our HTAP setup instructions to easily set up an HTAP demonstration:
* For MySQL HTAP, we can build the cluster based on [ProxySQL](docs/tutorial/mysql-htap-proxysql-setup.md) and [MariaDB MaxScale](docs/tutorial/mysql-htap-maxscale-setup.md).
* For PostgreSQL HTAP, we build it based on [PGPool-II](docs/tutorial/pg-htap-pgpool-setup.md)

### Query & Load Parquet Files

Expand Down
123 changes: 123 additions & 0 deletions catalog/internal_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,39 @@ func (it *InternalTable) SelectStmt() string {
return b.String()
}

func (it *InternalTable) SelectAllStmt() string {
var b strings.Builder
b.Grow(128)
b.WriteString("SELECT * FROM ")
b.WriteString(it.Schema)
b.WriteByte('.')
b.WriteString(it.Name)
return b.String()
}

func (it *InternalTable) CountAllStmt() string {
var b strings.Builder
b.Grow(128)
b.WriteString("SELECT COUNT(*)")
b.WriteString(" FROM ")
b.WriteString(it.Schema)
b.WriteByte('.')
b.WriteString(it.Name)
return b.String()
}

var InternalTables = struct {
PersistentVariable InternalTable
BinlogPosition InternalTable
PgReplicationLSN InternalTable
GlobalStatus InternalTable
// TODO(sean): This is a temporary work around for clients that query the 'pg_catalog.pg_stat_replication'.
// Once we add 'pg_catalog' and support views for PG, replace this by a view.
// https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-PG-STAT-REPLICATION-VIEW
PGStatReplication InternalTable
// This is a mock table of pg_catalog.current_setting(...)
// https://www.postgresql.org/docs/current/functions-admin.html#FUNCTIONS-ADMIN-SET
PGCurrentSetting InternalTable
}{
PersistentVariable: InternalTable{
Schema: "__sys__",
Expand Down Expand Up @@ -111,11 +139,106 @@ var InternalTables = struct {
{"Innodb_redo_log_enabled", "OFF"}, // Queried by MySQL Shell
},
},
// postgres=# \d+ pg_catalog.pg_stat_replication
// View "pg_catalog.pg_stat_replication"
// Column | Type | Collation | Nullable | Default | Storage | Description
//------------------+--------------------------+-----------+----------+---------+----------+-------------
// pid | integer | | | | plain |
// usesysid | oid | | | | plain |
// usename | name | | | | plain |
// application_name | text | | | | extended |
// client_addr | inet | | | | main |
// client_hostname | text | | | | extended |
// client_port | integer | | | | plain |
// backend_start | timestamp with time zone | | | | plain |
// backend_xmin | xid | | | | plain |
// state | text | | | | extended |
// sent_lsn | pg_lsn | | | | plain |
// write_lsn | pg_lsn | | | | plain |
// flush_lsn | pg_lsn | | | | plain |
// replay_lsn | pg_lsn | | | | plain |
// write_lag | interval | | | | plain |
// flush_lag | interval | | | | plain |
// replay_lag | interval | | | | plain |
// sync_priority | integer | | | | plain |
// sync_state | text | | | | extended |
// reply_time | timestamp with time zone | | | | plain |
//View definition:
// SELECT s.pid,
// s.usesysid,
// u.rolname AS usename,
// s.application_name,
// s.client_addr,
// s.client_hostname,
// s.client_port,
// s.backend_start,
// s.backend_xmin,
// w.state,
// w.sent_lsn,
// w.write_lsn,
// w.flush_lsn,
// w.replay_lsn,
// w.write_lag,
// w.flush_lag,
// w.replay_lag,
// w.sync_priority,
// w.sync_state,
// w.reply_time
// FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, gss_delegation, leader_pid, query_id)
// JOIN pg_stat_get_wal_senders() w(pid, state, sent_lsn, write_lsn, flush_lsn, replay_lsn, write_lag, flush_lag, replay_lag, sync_priority, sync_state, reply_time) ON s.pid = w.pid
// LEFT JOIN pg_authid u ON s.usesysid = u.oid;
PGStatReplication: InternalTable{
// Since the "pg_catalog" is the system catalog on DuckDB, we use "__sys__" as the schema name.
Schema: "__sys__",
Name: "pg_stat_replication",
KeyColumns: []string{
"pid",
},
ValueColumns: []string{
"usesysid",
"usename",
"application_name",
"client_addr",
"client_hostname",
"client_port",
"backend_start",
"backend_xmin",
"state",
"sent_lsn",
"write_lsn",
"flush_lsn",
"replay_lsn",
"write_lag",
"flush_lag",
"replay_lag",
"sync_priority",
"sync_state",
"reply_time",
},
DDL: "pid INTEGER PRIMARY KEY, usesysid TEXT, usename TEXT, application_name TEXT, client_addr TEXT, client_hostname TEXT, client_port INTEGER, backend_start TIMESTAMP, backend_xmin INTEGER, state TEXT, sent_lsn TEXT, write_lsn TEXT, flush_lsn TEXT, replay_lsn TEXT, write_lag INTERVAL, flush_lag INTERVAL, replay_lag INTERVAL, sync_priority INTEGER, sync_state TEXT, reply_time TIMESTAMP",
},
// pg_catalog.current_setting(...)
PGCurrentSetting: InternalTable{
Schema: "__sys__",
Name: "current_setting",
KeyColumns: []string{
"name",
},
ValueColumns: []string{
"setting",
},
DDL: "name TEXT PRIMARY KEY, setting TEXT",
InitialData: [][]any{
{"server_version_num", "170000"},
},
},
}

var internalTables = []InternalTable{
InternalTables.PersistentVariable,
InternalTables.BinlogPosition,
InternalTables.PgReplicationLSN,
InternalTables.GlobalStatus,
InternalTables.PGStatReplication,
InternalTables.PGCurrentSetting,
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ services:
- --gtid_mode=ON
- --binlog_expire_logs_seconds=7200
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
interval: 5s
timeout: 5s
retries: 5
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ services:
- --gtid_mode=ON
- --binlog_expire_logs_seconds=7200
healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
interval: 5s
timeout: 5s
retries: 5
Expand Down
File renamed without changes.
78 changes: 78 additions & 0 deletions devtools/htap-setup-pg/pgpool2/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
services:
pgsql:
image: postgres:latest
container_name: htap-pg
environment:
POSTGRES_HOST_AUTH_METHOD: trust
command:
- -c
- wal_level=logical
- -c
- max_wal_senders=30
- -c
- max_replication_slots=10
healthcheck:
test: ["CMD", "pg_isready", "-h", "localhost", "-p", "5432", "-U", "postgres"]
interval: 5s
timeout: 5s
retries: 5

pgsql-setup:
image: postgres:latest
container_name: htap-pg-setup
command: >
sh -c "
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"CREATE PUBLICATION myduck_subscription FOR ALL TABLES;\" &&
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"SELECT PG_CREATE_LOGICAL_REPLICATION_SLOT('myduck_subscription', 'pgoutput');\" &&
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"CREATE TABLE test (id INT PRIMARY KEY, name VARCHAR(255));\" &&
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"INSERT INTO test (id, name) VALUES (1, 'test');\"
"
restart: "no"
depends_on:
pgsql:
condition: service_healthy

myduck:
image: apecloud/myduckserver:latest
pull_policy: always
container_name: htap-myduck
environment:
PGSQL_PRIMARY_DSN: postgres://postgres:postgres@pgsql:5432/postgres?sslmode=disable
PGSQL_SLOT_NAME: myduck_subscription
depends_on:
pgsql:
condition: service_healthy
pgsql-setup:
condition: service_completed_successfully
healthcheck:
test: ["CMD", "psql", "-h", "localhost", "-p", "5432", "-U", "postgres", "-d", "postgres", "-c", "SELECT 1;"]
interval: 5s
timeout: 5s
retries: 5

pgpool:
image: bitnami/pgpool:4.5.4
container_name: htap-pgpool
ports:
- "54321:9999"
environment:
# The PGPOOL_BACKEND_NODES is the list of PostgreSQL servers that pgpool will connect to.
# NodeID:Hostname:Port:Weight
- PGPOOL_BACKEND_NODES=0:pgsql:5432:0,1:myduck:5432:1
- PGPOOL_ENABLE_LOAD_BALANCING=yes
- PGPOOL_SR_CHECK_USER=postgres
- PGPOOL_SR_CHECK_PASSWORD=postgres
- PGPOOL_POSTGRES_USERNAME=postgres
- PGPOOL_POSTGRES_PASSWORD=postgres
- PGPOOL_ADMIN_USERNAME=admin
- PGPOOL_ADMIN_PASSWORD=adminpassword
depends_on:
pgsql:
condition: service_healthy
myduck:
condition: service_healthy
healthcheck:
test: ["CMD", "/opt/bitnami/scripts/pgpool/healthcheck.sh"]
interval: 10s
timeout: 5s
retries: 5
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
17 changes: 14 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ RUN --mount=type=cache,target=/go/pkg/mod \
# Copy the remaining source code
COPY . .

RUN apt-get update && apt-get install -y \
RUN apt-get update && \
apt-get install -y debian-archive-keyring && \
apt-get update && \
apt-get install -y \
gcc-aarch64-linux-gnu \
g++-aarch64-linux-gnu \
--no-install-recommends \
Expand Down Expand Up @@ -82,6 +85,13 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \
&& rm duckdb_cli-linux-$ARCH.zip \
&& duckdb -c 'SELECT extension_name, loaded, install_path FROM duckdb_extensions() where installed'

RUN apt-get update && \
apt-get install -y debian-archive-keyring && \
apt-get update && \
apt-get install -y libpq-dev postgresql-client \
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/*

RUN duckdb -version

RUN useradd --create-home --user-group --shell /bin/bash admin \
Expand All @@ -95,13 +105,14 @@ WORKDIR /home/admin
# Copy the compiled Go binary from the builder stage
COPY --from=builder /myduckserver /usr/local/bin/myduckserver
COPY --chown=admin:admin --chmod=755 docker/*.sh .
COPY --chown=admin:admin --chmod=755 devtools/replica-setup ./replica-setup
COPY --chown=admin:admin --chmod=755 devtools/replica-setup-mysql ./replica-setup-mysql

# ENV LC_CTYPE="en_US.UTF-8"
# ENV LANG="en_US.UTF-8"

# Expose the port your server will run on (if applicable)
# Expose the ports your server will run on (if applicable)
EXPOSE 3306
EXPOSE 5432

# Set the default command to run the Go server
ENTRYPOINT /home/admin/entrypoint.sh
35 changes: 27 additions & 8 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,23 @@

export DATA_PATH="${HOME}/data"
export LOG_PATH="${HOME}/log"
export REPLICA_SETUP_PATH="${HOME}/replica-setup"
export REPLICA_SETUP_PATH="${HOME}/replica-setup-mysql"
export PID_FILE="${LOG_PATH}/myduck.pid"

if [ -n "$PGSQL_PRIMARY_DSN" ]; then
export PGSQL_PRIMARY_DSN_ARG="-pg-primary-dsn $PGSQL_PRIMARY_DSN"
fi

if [ -n "$PGSQL_SLOT_NAME" ]; then
export PGSQL_SLOT_NAME_ARG="-pg-slot-name $PGSQL_SLOT_NAME"
fi

if [ -n "$LOG_LEVEL" ]; then
export LOG_LEVEL="-loglevel $LOG_LEVEL"
fi

# Function to run replica setup
run_replica_setup() {
run_mysql_replica_setup() {
if [ -z "$MYSQL_HOST" ] || [ -z "$MYSQL_PORT" ] || [ -z "$MYSQL_USER" ]; then
echo "Error: Missing required MySQL connection variables for replica setup."
exit 1
Expand All @@ -25,13 +37,13 @@ run_replica_setup() {

run_server_in_background() {
cd "$DATA_PATH" || { echo "Error: Could not change directory to ${DATA_PATH}"; exit 1; }
nohup myduckserver >> "${LOG_PATH}"/server.log 2>&1 &
nohup myduckserver $PGSQL_PRIMARY_DSN_ARG $PGSQL_SLOT_NAME_ARG $LOG_LEVEL >> "${LOG_PATH}"/server.log 2>&1 &
echo "$!" > "${PID_FILE}"
}

run_server_in_foreground() {
cd "$DATA_PATH" || { echo "Error: Could not change directory to ${DATA_PATH}"; exit 1; }
myduckserver
myduckserver $PGSQL_PRIMARY_DSN_ARG $PGSQL_SLOT_NAME_ARG $LOG_LEVEL
}

wait_for_my_duck_server_ready() {
Expand Down Expand Up @@ -89,11 +101,18 @@ setup() {
run_server_in_foreground
;;

"REPLICA")
echo "Starting MyDuck Server and running replica setup in REPLICA mode..."
"MYSQL_REPLICA")
echo "Starting MyDuck Server and running replica setup in MySQL REPLICA mode..."
run_server_in_background
wait_for_my_duck_server_ready
run_mysql_replica_setup
;;

"PGSQL_REPLICA")
echo "Starting MyDuck Server and running replica setup in PGSQL REPLICA mode..."
run_server_in_background
wait_for_my_duck_server_ready
run_replica_setup
# TODO: run pgsql replica setup
;;

*)
Expand All @@ -105,7 +124,7 @@ setup() {

setup

while [[ "$SETUP_MODE" == "REPLICA" ]]; do
while [[ "$SETUP_MODE" == "MYSQL_REPLICA" ]]; do
# Check if the processes have started
check_process_alive "$PID_FILE" "MyDuck Server"
MY_DUCK_SERVER_STATUS=$?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ This a tutorial to build an HTAP service based on MySQL, MyDuck Server, and Mari
Go the root path of this project and run the following commands:

```sh
cd devtools/htap-setup/maxscale
docker-compose up
cd devtools/htap-setup-mysql/maxscale
docker-compose up -d
```

Then you'll get a HTAP cluster. And an account 'lol' with password 'lol' has been created for connecting. Have fun!
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ This is a tutorial to build an HTAP service based on MySQL, MyDuck Server, and P
Go the root path of this project and run the following commands:

```
cd devtools/htap-setup/proxysql
docker-compose up
cd devtools/htap-setup-mysql/proxysql
docker-compose up -d
```

Then you'll get a HTAP cluster. And an account 'lol' with password 'lol' has been created for connecting. Have fun!
Expand Down
Loading

0 comments on commit b17d01f

Please sign in to comment.