Skip to content

Commit 5ecd48e

Browse files
author
Lyubo Kamenov
authored
avro schema generator (#174)
* schema generator * snapshot emits schema * add logrepl schema * add uuid type formatting
1 parent 125c029 commit 5ecd48e

File tree

19 files changed

+667
-68
lines changed

19 files changed

+667
-68
lines changed

go.mod

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module github.com/conduitio/conduit-connector-postgres
22

3-
go 1.22
3+
go 1.22.0
44

55
require (
66
github.com/Masterminds/sprig/v3 v3.2.3
@@ -11,6 +11,7 @@ require (
1111
github.com/golangci/golangci-lint v1.59.1
1212
github.com/google/go-cmp v0.6.0
1313
github.com/google/uuid v1.6.0
14+
github.com/hamba/avro/v2 v2.22.1
1415
github.com/jackc/pgerrcode v0.0.0-20240316143900-6e2875d9b438
1516
github.com/jackc/pglogrepl v0.0.0-20240307033717-828fbfe908e9
1617
github.com/jackc/pgx/v5 v5.6.0
@@ -110,6 +111,7 @@ require (
110111
github.com/jirfag/go-printf-func-name v0.0.0-20200119135958-7558a9eaa5af // indirect
111112
github.com/jjti/go-spancheck v0.6.1 // indirect
112113
github.com/jpillora/backoff v1.0.0 // indirect
114+
github.com/json-iterator/go v1.1.12 // indirect
113115
github.com/julz/importas v0.1.0 // indirect
114116
github.com/karamaru-alpha/copyloopvar v1.1.0 // indirect
115117
github.com/kisielk/errcheck v1.7.0 // indirect
@@ -139,6 +141,8 @@ require (
139141
github.com/mitchellh/go-testing-interface v1.14.1 // indirect
140142
github.com/mitchellh/mapstructure v1.5.0 // indirect
141143
github.com/mitchellh/reflectwalk v1.0.0 // indirect
144+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
145+
github.com/modern-go/reflect2 v1.0.2 // indirect
142146
github.com/moricho/tparallel v0.3.1 // indirect
143147
github.com/nakabonne/nestif v0.3.1 // indirect
144148
github.com/nishanths/exhaustive v0.12.0 // indirect

go.sum

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,8 @@ github.com/gostaticanalysis/nilerr v0.1.1/go.mod h1:wZYb6YI5YAxxq0i1+VJbY0s2YONW
305305
github.com/gostaticanalysis/testutil v0.3.1-0.20210208050101-bfb5c8eec0e4/go.mod h1:D+FIZ+7OahH3ePw/izIEeH5I06eKs1IKI4Xr64/Am3M=
306306
github.com/gostaticanalysis/testutil v0.4.0 h1:nhdCmubdmDF6VEatUNjgUZBJKWRqugoISdUv3PPQgHY=
307307
github.com/gostaticanalysis/testutil v0.4.0/go.mod h1:bLIoPefWXrRi/ssLFWX1dx7Repi5x3CuviD3dgAZaBU=
308+
github.com/hamba/avro/v2 v2.22.1 h1:q1rAbfJsrbMaZPDLQvwUQMfQzp6H+hGXvckmU/lXemk=
309+
github.com/hamba/avro/v2 v2.22.1/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg=
308310
github.com/hashicorp/go-hclog v1.5.0 h1:bI2ocEMgcVlz55Oj1xZNBsVi900c7II+fWDyV9o+13c=
309311
github.com/hashicorp/go-hclog v1.5.0/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
310312
github.com/hashicorp/go-plugin v1.6.0 h1:wgd4KxHJTVGGqWBq4QPB1i5BZNEx9BR8+OFmHDmTk8A=
@@ -356,6 +358,7 @@ github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX
356358
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
357359
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
358360
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
361+
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
359362
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
360363
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
361364
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
@@ -440,9 +443,11 @@ github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RR
440443
github.com/mitchellh/reflectwalk v1.0.0 h1:9D+8oIskB4VJBN5SFlmc27fSlIBZaov1Wpk/IfikLNY=
441444
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
442445
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
446+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
443447
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
444448
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
445449
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
450+
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
446451
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
447452
github.com/moricho/tparallel v0.3.1 h1:fQKD4U1wRMAYNngDonW5XupoB/ZGJHdpzrWqgyg9krA=
448453
github.com/moricho/tparallel v0.3.1/go.mod h1:leENX2cUv7Sv2qDgdi0D0fCftN8fRC67Bcn8pqzeYNI=

source.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ func (s *Source) Open(ctx context.Context, pos sdk.Position) error {
104104
TableKeys: s.tableKeys,
105105
WithSnapshot: s.config.SnapshotMode == source.SnapshotModeInitial,
106106
SnapshotFetchSize: s.config.SnapshotFetchSize,
107+
WithAvroSchema: s.config.WithAvroSchema,
107108
})
108109
if err != nil {
109110
return fmt.Errorf("failed to create logical replication iterator: %w", err)

source/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ const (
4949
type Config struct {
5050
// URL is the connection string for the Postgres database.
5151
URL string `json:"url" validate:"required"`
52+
5253
// Tables is a List of table names to read from, separated by a comma, e.g.:"table1,table2".
5354
// Use "*" if you'd like to listen to all tables.
5455
Tables []string `json:"tables"`
@@ -74,6 +75,10 @@ type Config struct {
7475
// LogreplAutoCleanup determines if the replication slot and publication should be
7576
// removed when the connector is deleted.
7677
LogreplAutoCleanup bool `json:"logrepl.autoCleanup" default:"true"`
78+
79+
// WithAvroSchema determines whether the connector should attach an avro schema on each
80+
// record.
81+
WithAvroSchema bool `json:"logrepl.withAvroSchema" default:"false"`
7782
}
7883

7984
// Validate validates the provided config values.

source/logrepl/cdc.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ type CDCConfig struct {
3333
PublicationName string
3434
Tables []string
3535
TableKeys map[string]string
36+
WithAvroSchema bool
3637
}
3738

3839
// CDCIterator asynchronously listens for events from the logical replication
@@ -63,6 +64,7 @@ func NewCDCIterator(ctx context.Context, pool *pgxpool.Pool, c CDCConfig) (*CDCI
6364
}
6465

6566
records := make(chan sdk.Record)
67+
handler := NewCDCHandler(internal.NewRelationSet(), c.TableKeys, c.WithAvroSchema, records)
6668

6769
sub, err := internal.CreateSubscription(
6870
ctx,
@@ -71,7 +73,7 @@ func NewCDCIterator(ctx context.Context, pool *pgxpool.Pool, c CDCConfig) (*CDCI
7173
c.PublicationName,
7274
c.Tables,
7375
c.LSN,
74-
NewCDCHandler(internal.NewRelationSet(), c.TableKeys, records).Handle,
76+
handler.Handle,
7577
)
7678
if err != nil {
7779
return nil, fmt.Errorf("failed to initialize subscription: %w", err)

source/logrepl/combined.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type Config struct {
4848
TableKeys map[string]string
4949
WithSnapshot bool
5050
SnapshotFetchSize int
51+
WithAvroSchema bool
5152
}
5253

5354
// Validate performs validation tasks on the config.
@@ -177,6 +178,7 @@ func (c *CombinedIterator) initCDCIterator(ctx context.Context, pos position.Pos
177178
PublicationName: c.conf.PublicationName,
178179
Tables: c.conf.Tables,
179180
TableKeys: c.conf.TableKeys,
181+
WithAvroSchema: c.conf.WithAvroSchema,
180182
})
181183
if err != nil {
182184
return fmt.Errorf("failed to create CDC iterator: %w", err)
@@ -198,11 +200,12 @@ func (c *CombinedIterator) initSnapshotIterator(ctx context.Context, pos positio
198200
}
199201

200202
snapshotIterator, err := snapshot.NewIterator(ctx, c.pool, snapshot.Config{
201-
Position: c.conf.Position,
202-
Tables: c.conf.Tables,
203-
TableKeys: c.conf.TableKeys,
204-
TXSnapshotID: c.cdcIterator.TXSnapshotID(),
205-
FetchSize: c.conf.SnapshotFetchSize,
203+
Position: c.conf.Position,
204+
Tables: c.conf.Tables,
205+
TableKeys: c.conf.TableKeys,
206+
TXSnapshotID: c.cdcIterator.TXSnapshotID(),
207+
FetchSize: c.conf.SnapshotFetchSize,
208+
WithAvroSchema: c.conf.WithAvroSchema,
206209
})
207210
if err != nil {
208211
return fmt.Errorf("failed to create snapshot iterator: %w", err)

source/logrepl/handler.go

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ import (
2020

2121
"github.com/conduitio/conduit-connector-postgres/source/logrepl/internal"
2222
"github.com/conduitio/conduit-connector-postgres/source/position"
23+
"github.com/conduitio/conduit-connector-postgres/source/schema"
2324
sdk "github.com/conduitio/conduit-connector-sdk"
25+
"github.com/hamba/avro/v2"
2426
"github.com/jackc/pglogrepl"
2527
)
2628

@@ -31,17 +33,23 @@ type CDCHandler struct {
3133
relationSet *internal.RelationSet
3234
out chan<- sdk.Record
3335
lastTXLSN pglogrepl.LSN
36+
37+
relAvroSchema map[string]avro.Schema
38+
withAvroSchema bool
3439
}
3540

3641
func NewCDCHandler(
3742
rs *internal.RelationSet,
3843
tableKeys map[string]string,
44+
withAvroSchema bool,
3945
out chan<- sdk.Record,
4046
) *CDCHandler {
4147
return &CDCHandler{
42-
tableKeys: tableKeys,
43-
relationSet: rs,
44-
out: out,
48+
tableKeys: tableKeys,
49+
relationSet: rs,
50+
out: out,
51+
withAvroSchema: withAvroSchema,
52+
relAvroSchema: make(map[string]avro.Schema),
4553
}
4654
}
4755

@@ -100,6 +108,10 @@ func (h *CDCHandler) handleInsert(
100108
return fmt.Errorf("failed to decode new values: %w", err)
101109
}
102110

111+
if err := h.updateAvroSchema(rel, msg.Tuple); err != nil {
112+
return fmt.Errorf("failed to update avro schema: %w", err)
113+
}
114+
103115
rec := sdk.Util.Source.NewRecordCreate(
104116
h.buildPosition(lsn),
105117
h.buildRecordMetadata(rel),
@@ -127,6 +139,10 @@ func (h *CDCHandler) handleUpdate(
127139
return fmt.Errorf("failed to decode new values: %w", err)
128140
}
129141

142+
if err := h.updateAvroSchema(rel, msg.NewTuple); err != nil {
143+
return fmt.Errorf("failed to update avro schema: %w", err)
144+
}
145+
130146
oldValues, err := h.relationSet.Values(msg.RelationID, msg.OldTuple)
131147
if err != nil {
132148
// this is not a critical error, old values are optional, just log it
@@ -180,10 +196,16 @@ func (h *CDCHandler) send(ctx context.Context, rec sdk.Record) error {
180196
}
181197
}
182198

183-
func (h *CDCHandler) buildRecordMetadata(relation *pglogrepl.RelationMessage) map[string]string {
184-
return map[string]string{
185-
sdk.MetadataCollection: relation.RelationName,
199+
func (h *CDCHandler) buildRecordMetadata(rel *pglogrepl.RelationMessage) map[string]string {
200+
m := map[string]string{
201+
sdk.MetadataCollection: rel.RelationName,
186202
}
203+
204+
if h.withAvroSchema {
205+
m[schema.AvroMetadataKey] = h.relAvroSchema[rel.RelationName].String()
206+
}
207+
208+
return m
187209
}
188210

189211
// buildRecordKey takes the values from the message and extracts the key that
@@ -209,9 +231,27 @@ func (h *CDCHandler) buildRecordPayload(values map[string]any) sdk.Data {
209231
return sdk.StructuredData(values)
210232
}
211233

234+
// buildPosition stores the LSN in position and converts it to bytes.
212235
func (*CDCHandler) buildPosition(lsn pglogrepl.LSN) sdk.Position {
213236
return position.Position{
214237
Type: position.TypeCDC,
215238
LastLSN: lsn.String(),
216239
}.ToSDKPosition()
217240
}
241+
242+
// updateAvroSchema generates and stores avro schema based on the relation's row,
243+
// when usage of avro schema is requested.
244+
func (h *CDCHandler) updateAvroSchema(rel *pglogrepl.RelationMessage, row *pglogrepl.TupleData) error {
245+
if !h.withAvroSchema {
246+
return nil
247+
}
248+
249+
sch, err := schema.Avro.ExtractLogrepl(rel, row)
250+
if err != nil {
251+
return err
252+
}
253+
254+
h.relAvroSchema[rel.RelationName] = sch
255+
256+
return nil
257+
}

source/logrepl/internal/relationset.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ func (rs *RelationSet) Values(id uint32, row *pglogrepl.TupleData) (map[string]a
7171
return nil, fmt.Errorf("failed to decode tuple %d: %w", i, err)
7272
}
7373

74-
v, err := types.Format(val)
74+
v, err := types.Format(col.DataType, val)
7575
if err != nil {
7676
return nil, fmt.Errorf("failed to format column %q type %T: %w", col.Name, val, err)
7777
}

source/logrepl/internal/relationset_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ func isValuesAllTypes(is *is.I, got map[string]any) {
344344
"col_timestamptz": time.Date(2022, 3, 14, 15+8, 16, 17, 0, time.UTC).UTC(),
345345
"col_tsquery": "'fat' & ( 'rat' | 'cat' )",
346346
"col_tsvector": "'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat'",
347-
"col_uuid": [16]uint8{0xbd, 0x94, 0xee, 0x0b, 0x56, 0x4f, 0x40, 0x88, 0xbf, 0x4e, 0x8d, 0x5e, 0x62, 0x6c, 0xaf, 0x66},
347+
"col_uuid": "bd94ee0b-564f-4088-bf4e-8d5e626caf66", // [16]uint8{0xbd, 0x94, 0xee, 0x0b, 0x56, 0x4f, 0x40, 0x88, 0xbf, 0x4e, 0x8d, 0x5e, 0x62, 0x6c, 0xaf, 0x66}
348348
"col_xml": "<foo>bar</foo>",
349349
}
350350
is.Equal("", cmp.Diff(want, got,
@@ -440,7 +440,7 @@ func isValuesAllTypesStandalone(is *is.I, got map[string]any) {
440440
"col_timestamptz": time.Date(2022, 3, 14, 15+8, 16, 17, 0, time.UTC).UTC().String(),
441441
"col_tsquery": "'fat' & ( 'rat' | 'cat' )",
442442
"col_tsvector": "'a' 'and' 'ate' 'cat' 'fat' 'mat' 'on' 'rat' 'sat'",
443-
"col_uuid": [16]uint8{0xbd, 0x94, 0xee, 0x0b, 0x56, 0x4f, 0x40, 0x88, 0xbf, 0x4e, 0x8d, 0x5e, 0x62, 0x6c, 0xaf, 0x66},
443+
"col_uuid": "bd94ee0b-564f-4088-bf4e-8d5e626caf66", // [16]uint8{0xbd, 0x94, 0xee, 0x0b, 0x56, 0x4f, 0x40, 0x88, 0xbf, 0x4e, 0x8d, 0x5e, 0x62, 0x6c, 0xaf, 0x66}
444444
"col_xml": "<foo>bar</foo>",
445445
}
446446
is.Equal("", cmp.Diff(want, got,

source/paramgen.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)