Skip to content

Commit ed6ef5a

Browse files
committed
Merge remote-tracking branch 'origin/copy-to-stdout' into copy-to-stdout
2 parents 1858ae2 + dd8b25c commit ed6ef5a

20 files changed

+506
-29
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,9 @@ MyDuck Server supports setting up replicas from common cloud-based MySQL & Postg
135135

136136
### HTAP Setup
137137

138-
With MyDuck's powerful analytics capabilities, you can create an hybrid transactional/analytical processing system where high-frequency data writes are directed to a standard MySQL or Postgres instance, while analytical queries are handled by a MyDuck Server instance. Follow our HTAP setup instructions based on [ProxySQL](docs/tutorial/htap-proxysql-setup.md) or [MariaDB MaxScale](docs/tutorial/htap-maxscale-setup.md) to easily set up an HTAP demonstration.
138+
With MyDuck's powerful analytics capabilities, you can create an hybrid transactional/analytical processing system where high-frequency data writes are directed to a standard MySQL or Postgres instance, while analytical queries are handled by a MyDuck Server instance. Follow our HTAP setup instructions to easily set up an HTAP demonstration:
139+
* For MySQL HTAP, we can build the cluster based on [ProxySQL](docs/tutorial/mysql-htap-proxysql-setup.md) and [MariaDB MaxScale](docs/tutorial/mysql-htap-maxscale-setup.md).
140+
* For PostgreSQL HTAP, we build it based on [PGPool-II](docs/tutorial/pg-htap-pgpool-setup.md)
139141

140142
### Query & Load Parquet Files
141143

catalog/internal_tables.go

+123
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,39 @@ func (it *InternalTable) SelectStmt() string {
7474
return b.String()
7575
}
7676

77+
func (it *InternalTable) SelectAllStmt() string {
78+
var b strings.Builder
79+
b.Grow(128)
80+
b.WriteString("SELECT * FROM ")
81+
b.WriteString(it.Schema)
82+
b.WriteByte('.')
83+
b.WriteString(it.Name)
84+
return b.String()
85+
}
86+
87+
func (it *InternalTable) CountAllStmt() string {
88+
var b strings.Builder
89+
b.Grow(128)
90+
b.WriteString("SELECT COUNT(*)")
91+
b.WriteString(" FROM ")
92+
b.WriteString(it.Schema)
93+
b.WriteByte('.')
94+
b.WriteString(it.Name)
95+
return b.String()
96+
}
97+
7798
var InternalTables = struct {
7899
PersistentVariable InternalTable
79100
BinlogPosition InternalTable
80101
PgReplicationLSN InternalTable
81102
GlobalStatus InternalTable
103+
// TODO(sean): This is a temporary work around for clients that query the 'pg_catalog.pg_stat_replication'.
104+
// Once we add 'pg_catalog' and support views for PG, replace this by a view.
105+
// https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-PG-STAT-REPLICATION-VIEW
106+
PGStatReplication InternalTable
107+
// This is a mock table of pg_catalog.current_setting(...)
108+
// https://www.postgresql.org/docs/current/functions-admin.html#FUNCTIONS-ADMIN-SET
109+
PGCurrentSetting InternalTable
82110
}{
83111
PersistentVariable: InternalTable{
84112
Schema: "__sys__",
@@ -111,11 +139,106 @@ var InternalTables = struct {
111139
{"Innodb_redo_log_enabled", "OFF"}, // Queried by MySQL Shell
112140
},
113141
},
142+
// postgres=# \d+ pg_catalog.pg_stat_replication
143+
// View "pg_catalog.pg_stat_replication"
144+
// Column | Type | Collation | Nullable | Default | Storage | Description
145+
//------------------+--------------------------+-----------+----------+---------+----------+-------------
146+
// pid | integer | | | | plain |
147+
// usesysid | oid | | | | plain |
148+
// usename | name | | | | plain |
149+
// application_name | text | | | | extended |
150+
// client_addr | inet | | | | main |
151+
// client_hostname | text | | | | extended |
152+
// client_port | integer | | | | plain |
153+
// backend_start | timestamp with time zone | | | | plain |
154+
// backend_xmin | xid | | | | plain |
155+
// state | text | | | | extended |
156+
// sent_lsn | pg_lsn | | | | plain |
157+
// write_lsn | pg_lsn | | | | plain |
158+
// flush_lsn | pg_lsn | | | | plain |
159+
// replay_lsn | pg_lsn | | | | plain |
160+
// write_lag | interval | | | | plain |
161+
// flush_lag | interval | | | | plain |
162+
// replay_lag | interval | | | | plain |
163+
// sync_priority | integer | | | | plain |
164+
// sync_state | text | | | | extended |
165+
// reply_time | timestamp with time zone | | | | plain |
166+
//View definition:
167+
// SELECT s.pid,
168+
// s.usesysid,
169+
// u.rolname AS usename,
170+
// s.application_name,
171+
// s.client_addr,
172+
// s.client_hostname,
173+
// s.client_port,
174+
// s.backend_start,
175+
// s.backend_xmin,
176+
// w.state,
177+
// w.sent_lsn,
178+
// w.write_lsn,
179+
// w.flush_lsn,
180+
// w.replay_lsn,
181+
// w.write_lag,
182+
// w.flush_lag,
183+
// w.replay_lag,
184+
// w.sync_priority,
185+
// w.sync_state,
186+
// w.reply_time
187+
// FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, gss_delegation, leader_pid, query_id)
188+
// JOIN pg_stat_get_wal_senders() w(pid, state, sent_lsn, write_lsn, flush_lsn, replay_lsn, write_lag, flush_lag, replay_lag, sync_priority, sync_state, reply_time) ON s.pid = w.pid
189+
// LEFT JOIN pg_authid u ON s.usesysid = u.oid;
190+
PGStatReplication: InternalTable{
191+
// Since the "pg_catalog" is the system catalog on DuckDB, we use "__sys__" as the schema name.
192+
Schema: "__sys__",
193+
Name: "pg_stat_replication",
194+
KeyColumns: []string{
195+
"pid",
196+
},
197+
ValueColumns: []string{
198+
"usesysid",
199+
"usename",
200+
"application_name",
201+
"client_addr",
202+
"client_hostname",
203+
"client_port",
204+
"backend_start",
205+
"backend_xmin",
206+
"state",
207+
"sent_lsn",
208+
"write_lsn",
209+
"flush_lsn",
210+
"replay_lsn",
211+
"write_lag",
212+
"flush_lag",
213+
"replay_lag",
214+
"sync_priority",
215+
"sync_state",
216+
"reply_time",
217+
},
218+
DDL: "pid INTEGER PRIMARY KEY, usesysid TEXT, usename TEXT, application_name TEXT, client_addr TEXT, client_hostname TEXT, client_port INTEGER, backend_start TIMESTAMP, backend_xmin INTEGER, state TEXT, sent_lsn TEXT, write_lsn TEXT, flush_lsn TEXT, replay_lsn TEXT, write_lag INTERVAL, flush_lag INTERVAL, replay_lag INTERVAL, sync_priority INTEGER, sync_state TEXT, reply_time TIMESTAMP",
219+
},
220+
// pg_catalog.current_setting(...)
221+
PGCurrentSetting: InternalTable{
222+
Schema: "__sys__",
223+
Name: "current_setting",
224+
KeyColumns: []string{
225+
"name",
226+
},
227+
ValueColumns: []string{
228+
"setting",
229+
},
230+
DDL: "name TEXT PRIMARY KEY, setting TEXT",
231+
InitialData: [][]any{
232+
{"server_version_num", "170000"},
233+
},
234+
},
114235
}
115236

116237
var internalTables = []InternalTable{
117238
InternalTables.PersistentVariable,
118239
InternalTables.BinlogPosition,
119240
InternalTables.PgReplicationLSN,
120241
InternalTables.GlobalStatus,
242+
InternalTables.PGStatReplication,
243+
InternalTables.PGCurrentSetting,
121244
}

devtools/htap-setup/maxscale/docker-compose.yml renamed to devtools/htap-setup-mysql/maxscale/docker-compose.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ services:
1111
- --gtid_mode=ON
1212
- --binlog_expire_logs_seconds=7200
1313
healthcheck:
14-
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
14+
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
1515
interval: 5s
1616
timeout: 5s
1717
retries: 5

devtools/htap-setup/proxysql/docker-compose.yml renamed to devtools/htap-setup-mysql/proxysql/docker-compose.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ services:
1212
- --gtid_mode=ON
1313
- --binlog_expire_logs_seconds=7200
1414
healthcheck:
15-
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
15+
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
1616
interval: 5s
1717
timeout: 5s
1818
retries: 5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
services:
2+
pgsql:
3+
image: postgres:latest
4+
container_name: htap-pg
5+
environment:
6+
POSTGRES_HOST_AUTH_METHOD: trust
7+
command:
8+
- -c
9+
- wal_level=logical
10+
- -c
11+
- max_wal_senders=30
12+
- -c
13+
- max_replication_slots=10
14+
healthcheck:
15+
test: ["CMD", "pg_isready", "-h", "localhost", "-p", "5432", "-U", "postgres"]
16+
interval: 5s
17+
timeout: 5s
18+
retries: 5
19+
20+
pgsql-setup:
21+
image: postgres:latest
22+
container_name: htap-pg-setup
23+
command: >
24+
sh -c "
25+
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"CREATE PUBLICATION myduck_subscription FOR ALL TABLES;\" &&
26+
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"SELECT PG_CREATE_LOGICAL_REPLICATION_SLOT('myduck_subscription', 'pgoutput');\" &&
27+
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"CREATE TABLE test (id INT PRIMARY KEY, name VARCHAR(255));\" &&
28+
PGPASSWORD=postgres psql -h pgsql -p 5432 -U postgres -d postgres -c \"INSERT INTO test (id, name) VALUES (1, 'test');\"
29+
"
30+
restart: "no"
31+
depends_on:
32+
pgsql:
33+
condition: service_healthy
34+
35+
myduck:
36+
image: apecloud/myduckserver:latest
37+
pull_policy: always
38+
container_name: htap-myduck
39+
environment:
40+
PGSQL_PRIMARY_DSN: postgres://postgres:postgres@pgsql:5432/postgres?sslmode=disable
41+
PGSQL_SLOT_NAME: myduck_subscription
42+
depends_on:
43+
pgsql:
44+
condition: service_healthy
45+
pgsql-setup:
46+
condition: service_completed_successfully
47+
healthcheck:
48+
test: ["CMD", "psql", "-h", "localhost", "-p", "5432", "-U", "postgres", "-d", "postgres", "-c", "SELECT 1;"]
49+
interval: 5s
50+
timeout: 5s
51+
retries: 5
52+
53+
pgpool:
54+
image: bitnami/pgpool:4.5.4
55+
container_name: htap-pgpool
56+
ports:
57+
- "54321:9999"
58+
environment:
59+
# The PGPOOL_BACKEND_NODES is the list of PostgreSQL servers that pgpool will connect to.
60+
# NodeID:Hostname:Port:Weight
61+
- PGPOOL_BACKEND_NODES=0:pgsql:5432:0,1:myduck:5432:1
62+
- PGPOOL_ENABLE_LOAD_BALANCING=yes
63+
- PGPOOL_SR_CHECK_USER=postgres
64+
- PGPOOL_SR_CHECK_PASSWORD=postgres
65+
- PGPOOL_POSTGRES_USERNAME=postgres
66+
- PGPOOL_POSTGRES_PASSWORD=postgres
67+
- PGPOOL_ADMIN_USERNAME=admin
68+
- PGPOOL_ADMIN_PASSWORD=adminpassword
69+
depends_on:
70+
pgsql:
71+
condition: service_healthy
72+
myduck:
73+
condition: service_healthy
74+
healthcheck:
75+
test: ["CMD", "/opt/bitnami/scripts/pgpool/healthcheck.sh"]
76+
interval: 10s
77+
timeout: 5s
78+
retries: 5

docker/Dockerfile

+14-3
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ RUN --mount=type=cache,target=/go/pkg/mod \
1919
# Copy the remaining source code
2020
COPY . .
2121

22-
RUN apt-get update && apt-get install -y \
22+
RUN apt-get update && \
23+
apt-get install -y debian-archive-keyring && \
24+
apt-get update && \
25+
apt-get install -y \
2326
gcc-aarch64-linux-gnu \
2427
g++-aarch64-linux-gnu \
2528
--no-install-recommends \
@@ -82,6 +85,13 @@ RUN if [ "$TARGETARCH" = "arm64" ]; then \
8285
&& rm duckdb_cli-linux-$ARCH.zip \
8386
&& duckdb -c 'SELECT extension_name, loaded, install_path FROM duckdb_extensions() where installed'
8487

88+
RUN apt-get update && \
89+
apt-get install -y debian-archive-keyring && \
90+
apt-get update && \
91+
apt-get install -y libpq-dev postgresql-client \
92+
--no-install-recommends \
93+
&& rm -rf /var/lib/apt/lists/*
94+
8595
RUN duckdb -version
8696

8797
RUN useradd --create-home --user-group --shell /bin/bash admin \
@@ -95,13 +105,14 @@ WORKDIR /home/admin
95105
# Copy the compiled Go binary from the builder stage
96106
COPY --from=builder /myduckserver /usr/local/bin/myduckserver
97107
COPY --chown=admin:admin --chmod=755 docker/*.sh .
98-
COPY --chown=admin:admin --chmod=755 devtools/replica-setup ./replica-setup
108+
COPY --chown=admin:admin --chmod=755 devtools/replica-setup-mysql ./replica-setup-mysql
99109

100110
# ENV LC_CTYPE="en_US.UTF-8"
101111
# ENV LANG="en_US.UTF-8"
102112

103-
# Expose the port your server will run on (if applicable)
113+
# Expose the ports your server will run on (if applicable)
104114
EXPOSE 3306
115+
EXPOSE 5432
105116

106117
# Set the default command to run the Go server
107118
ENTRYPOINT /home/admin/entrypoint.sh

docker/entrypoint.sh

+27-8
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,23 @@
22

33
export DATA_PATH="${HOME}/data"
44
export LOG_PATH="${HOME}/log"
5-
export REPLICA_SETUP_PATH="${HOME}/replica-setup"
5+
export REPLICA_SETUP_PATH="${HOME}/replica-setup-mysql"
66
export PID_FILE="${LOG_PATH}/myduck.pid"
77

8+
if [ -n "$PGSQL_PRIMARY_DSN" ]; then
9+
export PGSQL_PRIMARY_DSN_ARG="-pg-primary-dsn $PGSQL_PRIMARY_DSN"
10+
fi
11+
12+
if [ -n "$PGSQL_SLOT_NAME" ]; then
13+
export PGSQL_SLOT_NAME_ARG="-pg-slot-name $PGSQL_SLOT_NAME"
14+
fi
15+
16+
if [ -n "$LOG_LEVEL" ]; then
17+
export LOG_LEVEL="-loglevel $LOG_LEVEL"
18+
fi
19+
820
# Function to run replica setup
9-
run_replica_setup() {
21+
run_mysql_replica_setup() {
1022
if [ -z "$MYSQL_HOST" ] || [ -z "$MYSQL_PORT" ] || [ -z "$MYSQL_USER" ]; then
1123
echo "Error: Missing required MySQL connection variables for replica setup."
1224
exit 1
@@ -25,13 +37,13 @@ run_replica_setup() {
2537

2638
run_server_in_background() {
2739
cd "$DATA_PATH" || { echo "Error: Could not change directory to ${DATA_PATH}"; exit 1; }
28-
nohup myduckserver >> "${LOG_PATH}"/server.log 2>&1 &
40+
nohup myduckserver $PGSQL_PRIMARY_DSN_ARG $PGSQL_SLOT_NAME_ARG $LOG_LEVEL >> "${LOG_PATH}"/server.log 2>&1 &
2941
echo "$!" > "${PID_FILE}"
3042
}
3143

3244
run_server_in_foreground() {
3345
cd "$DATA_PATH" || { echo "Error: Could not change directory to ${DATA_PATH}"; exit 1; }
34-
myduckserver
46+
myduckserver $PGSQL_PRIMARY_DSN_ARG $PGSQL_SLOT_NAME_ARG $LOG_LEVEL
3547
}
3648

3749
wait_for_my_duck_server_ready() {
@@ -89,11 +101,18 @@ setup() {
89101
run_server_in_foreground
90102
;;
91103

92-
"REPLICA")
93-
echo "Starting MyDuck Server and running replica setup in REPLICA mode..."
104+
"MYSQL_REPLICA")
105+
echo "Starting MyDuck Server and running replica setup in MySQL REPLICA mode..."
106+
run_server_in_background
107+
wait_for_my_duck_server_ready
108+
run_mysql_replica_setup
109+
;;
110+
111+
"PGSQL_REPLICA")
112+
echo "Starting MyDuck Server and running replica setup in PGSQL REPLICA mode..."
94113
run_server_in_background
95114
wait_for_my_duck_server_ready
96-
run_replica_setup
115+
# TODO: run pgsql replica setup
97116
;;
98117

99118
*)
@@ -105,7 +124,7 @@ setup() {
105124

106125
setup
107126

108-
while [[ "$SETUP_MODE" == "REPLICA" ]]; do
127+
while [[ "$SETUP_MODE" == "MYSQL_REPLICA" ]]; do
109128
# Check if the processes have started
110129
check_process_alive "$PID_FILE" "MyDuck Server"
111130
MY_DUCK_SERVER_STATUS=$?

docs/tutorial/htap-maxscale-setup.md renamed to docs/tutorial/mysql-htap-maxscale-setup.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ This a tutorial to build an HTAP service based on MySQL, MyDuck Server, and Mari
1313
Go the root path of this project and run the following commands:
1414

1515
```sh
16-
cd devtools/htap-setup/maxscale
17-
docker-compose up
16+
cd devtools/htap-setup-mysql/maxscale
17+
docker-compose up -d
1818
```
1919

2020
Then you'll get a HTAP cluster. And an account 'lol' with password 'lol' has been created for connecting. Have fun!

docs/tutorial/htap-proxysql-setup.md renamed to docs/tutorial/mysql-htap-proxysql-setup.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ This is a tutorial to build an HTAP service based on MySQL, MyDuck Server, and P
1414
Go the root path of this project and run the following commands:
1515

1616
```
17-
cd devtools/htap-setup/proxysql
18-
docker-compose up
17+
cd devtools/htap-setup-mysql/proxysql
18+
docker-compose up -d
1919
```
2020

2121
Then you'll get a HTAP cluster. And an account 'lol' with password 'lol' has been created for connecting. Have fun!

0 commit comments

Comments
 (0)