ShadowTraffic license key (free). Write the
file as detailed in the instructions and store it under theshadowtraffic/
folder. -
ngrok API key (free). Store this as a
file in this folder:NGROK_AUTH_TOKEN=xxxxxxx
TIP: Read this article for information about running ngrok locally for serving Kafka to a remote client, and be aware of this possible issue with DNS and ngrok.
Data is written to Kafka from Shadowtraffic.
$ docker compose up
Get host/ip of broker
$ curl -s localhost:4040/api/tunnels | jq -r '.tunnels[0].public_url' | sed 's/tcp:\/\///g'
Create connection (update the bootstrap-servers
based on your Kafka broker, e.g. 0.tcp.eu.ngrok.io:17956
if using ngrok as above)
decodable connection create \
--name kafka-basket \
--type source \
--connector kafka \
--prop bootstrap.servers=0.tcp.eu.ngrok.io:17956 \
--prop value.format=json \
--prop key.fields=basketId \
--prop key.format=json \
--prop parse-error-policy=FAIL \
--prop properties.auto.offset.reset=none \
--prop scan.startup.mode=earliest-offset \
--prop topic=supermarketBaskets \
--prop value.fields-include=EXCEPT_KEY \
--field basketId="STRING" \
--field customerId="STRING" \
--field customerName="STRING" \
--field customerAddress="STRING" \
--field storeId="STRING" \
--field storeName="STRING" \
--field storeLocation="STRING" \
--field products="ARRAY<ROW( productName STRING, quantity INT, unitPrice FLOAT, category STRING )>" \
--field timestamp="STRING"
Created connection kafka-basket (4cc241e6)
Start the connection
decodable connection activate $(decodable query --name kafka-basket --keep-ids | yq '.metadata.id')
Check its status
decodable query --name kafka-basket --no-spec
kind: connection
name: kafka-basket
spec_version: v1
create_time: 2024-05-09T16:22:21.733+00:00
update_time: 2024-05-09T16:22:21.733+00:00
target_state: RUNNING
actual_state: STARTING
requested_tasks: 1
actual_tasks: 1
requested_task_size: M
actual_task_size: M
message: ""
raw_exception: ""
timestamp: null
last_activated_time: 2024-05-09T16:25:48.876+00:00
Check the data
decodable stream preview --count 1 $(decodable query --keep-ids --name $(decodable query --name kafka-basket | yq '.spec.stream_name') | yq '.metadata.id') | jq '.'
Records received: 1
"basketId": "299fee47-e935-7979-dae0-f2614bc986ec",
"customerId": "f2ce5720-6308-bedd-8e34-c1540fd0386b",
"products": [
"productId": "f21297f4-f240-ba7a-f028-5dfe2d2b132a",
"quantity": 3,
"unitPrice": 6
"productId": "86c27500-0731-afe7-e03b-c538cfa198e6",
"quantity": 2,
"unitPrice": 94
"productId": "1415f12e-26c8-5805-b5da-09fc2cbf442e",
"quantity": 4,
"unitPrice": 98
"productId": "3de8bc56-e2d2-b8a6-f6e5-9f3c57c2ee1e",
"quantity": 2,
"unitPrice": 95
"productId": "5d19183b-1080-d234-f07d-0b4474ff4090",
"quantity": 4,
"unitPrice": 39
"productId": "04e7416f-e27c-9aef-3c8a-3b819de0dfbc",
"quantity": 5,
"unitPrice": 80
"storeId": "280734ba-7a71-f250-114f-3602e058fe2a",
"timestamp": 1715268993022
You'll need to put your own database, region, and role-arn in here.
decodable connection create \
--name basket-iceberg \
--type sink \
--connector iceberg \
--prop catalog-database=my_db \
--prop catalog-table=basket \
--prop catalog-type=glue \
--prop format=parquet \
--prop region=us-west-2 \
--prop role-arn= \
--prop warehouse=s3://foo/iceberg-test/ \
--stream-id $(decodable query --keep-ids --name \
$(decodable query --name kafka-basket | \
yq '.spec.stream_name') | \
yq '.metadata.id') \
--field basketId="STRING" \
--field customerId="STRING" \
--field customerName="STRING" \
--field customerAddress="STRING" \
--field storeId="STRING" \
--field storeName="STRING" \
--field storeLocation="STRING" \
--field products="ARRAY<ROW( productName STRING, quantity INT, unitPrice FLOAT, category STRING )>" \
--field timestamp="STRING"
Start the connection
decodable connection activate $(decodable query --name basket-iceberg --keep-ids | yq '.metadata.id') --start-position earliest
Check its status
decodable query --name basket-iceberg --no-spec
$ aws s3 ls s3://foo/iceberg-test/foo.db/foo_basket02/
PRE data/
PRE metadata/
$ aws s3 ls s3://foo/iceberg-test/foo.db/foo_basket02/data/
2024-06-05 18:07:22 30440 00000-0-dd5fc5f4-9821-448a-8bf6-b3b0a4e3d267-00001.parquet
$ aws s3 ls s3://foo/iceberg-test/foo.db/foo_basket02/metadata/
2024-06-05 18:05:37 3021 00000-63ca0b75-1511-4d8f-b67e-97d8695a9ebe.metadata.json
2024-06-05 18:07:25 4244 00001-d14c9e6e-e9c1-4fcf-a521-c80fd5c3c2a5.metadata.json
2024-06-05 18:09:14 5308 00002-1978c64b-5031-42a9-97b7-6eac6e652a02.metadata.json