Skip to content

Commit

Permalink
🗃️ define a cdevents lake table to store incoming cdevents as json
Browse files Browse the repository at this point in the history
  • Loading branch information
davidB committed Jan 6, 2024
1 parent ba70ed1 commit 45cdb54
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 26 deletions.
174 changes: 168 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Created by https://www.toptal.com/developers/gitignore/api/git,bazel,vim,emacs,visualstudiocode,jetbrains+all,helm,rust
# Edit at https://www.toptal.com/developers/gitignore?templates=git,bazel,vim,emacs,visualstudiocode,jetbrains+all,helm,rust
# Created by https://www.toptal.com/developers/gitignore/api/git,vim,helm,rust,bazel,emacs,jetbrains+all,visualstudiocode,node,go
# Edit at https://www.toptal.com/developers/gitignore?templates=git,vim,helm,rust,bazel,emacs,jetbrains+all,visualstudiocode,node,go

### Bazel ###
# gitignore template for Bazel build system
Expand Down Expand Up @@ -66,6 +66,7 @@ flycheck_*.el
# network security
/network-security.data


### Git ###
# Created by git for backups. To disable backups in Git:
# $ git config --global mergetool.keepBackup false
Expand All @@ -81,6 +82,29 @@ flycheck_*.el
*_LOCAL_*.txt
*_REMOTE_*.txt

### Go ###
# If you prefer the allow list template instead of the deny list, see community template:
# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
#
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/

# Go workspace file
go.work

### Helm ###
# Chart dependencies
**/charts/*.tgz
Expand Down Expand Up @@ -173,6 +197,146 @@ fabric.properties
!.idea/codeStyles
!.idea/runConfigurations

### Node ###
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage
*.lcov

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# Snowpack dependency directory (https://snowpack.dev/)
web_modules/

# TypeScript cache
*.tsbuildinfo

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional stylelint cache
.stylelintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local

# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache

# Next.js build output
.next
out

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public

# vuepress build output
.vuepress/dist

# vuepress v2.x temp and cache directory
.temp

# Docusaurus cache and generated files
.docusaurus

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

# Stores VSCode versions used for testing VSCode extensions
.vscode-test

# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

### Node Patch ###
# Serverless Webpack directories
.webpack/

# Optional stylelint cache

# SvelteKit build / generate output
.svelte-kit

### Rust ###
# Generated by Cargo
# will have compiled files and executables
Expand All @@ -192,7 +356,7 @@ Cargo.lock
### Vim ###
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
Expand Down Expand Up @@ -228,9 +392,7 @@ tags
.history
.ionide

# Support for Project snippet scope

# End of https://www.toptal.com/developers/gitignore/api/git,bazel,vim,emacs,visualstudiocode,jetbrains+all,helm,rust
# End of https://www.toptal.com/developers/gitignore/api/git,vim,helm,rust,bazel,emacs,jetbrains+all,visualstudiocode,node,go

# ignore downloaded charts
*.tgz
Expand Down

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cdviz-collector/src/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pub(crate) async fn store_event(pg_pool: &PgPool, event: Event) -> Result<()> {

sqlx::query!(
r#"
INSERT INTO events (timestamp, raw)
INSERT INTO cdevents_lake (timestamp, payload)
VALUES ($1, $2)
"#,
event.timestamp,
Expand Down
24 changes: 20 additions & 4 deletions migrations/20240101182725_init.up.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
-- Add up migration script here
CREATE TABLE IF NOT EXISTS events (
id serial PRIMARY KEY,
timestamp timestamptz NOT NULL,
raw jsonb NOT NULL
CREATE TABLE IF NOT EXISTS cdevents_lake (
timestamp TIMESTAMP WITH TIME ZONE NOT NULL,
payload JSONB NOT NULL
);

-- TODO switch to brin index when more data (see [Avoiding the Pitfalls of BRIN Indexes in Postgres](https://www.crunchydata.com/blog/avoiding-the-pitfalls-of-brin-indexes-in-postgres))
CREATE INDEX IF NOT EXISTS cdevents_lake_timestamp_idx ON cdevents_lake (timestamp);

-- create a view based on fields in the json payload
-- source: [Postgresql json column to view - Database Administrators Stack Exchange](https://dba.stackexchange.com/questions/151838/postgresql-json-column-to-view?newreg=ed0a9389843a45699bfb02559dd32038)
-- DO $$
-- DECLARE l_keys text;
-- BEGIN
-- drop view if exists YOUR_VIEW_NAME cascade;

-- select string_agg(distinct format('jerrayel ->> %L as %I',jkey, jkey), ', ')
-- into l_keys
-- from cdevents_lake, jsonb_array_elements(payload) as t(jerrayel), jsonb_object_keys(t.jerrayel) as a(jkey);

-- execute 'create view cdevents_flatten as select '||l_keys||' from cdevents_lake, jsonb_array_elements(payload) as t(jerrayel)';
-- END$$;

0 comments on commit 45cdb54

Please sign in to comment.