Skip to content

Commit

Permalink
feat: CLI command to autogenerate JSON Schema for PL, RQ and lineage (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
kgutwin committed Jul 5, 2024
1 parent 6fa675e commit 6545978
Show file tree
Hide file tree
Showing 29 changed files with 387 additions and 99 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@

**Features**:

- Added `prqlc debug json-schema` command to auto-generate JSON Schema
representations of commonly exposed IR types such as PL and RQ. (@kgutwin,
#4698)

**Fixes**:

- Using `in` with an empty array pattern (e.g. `expr | in []`) will now output a
Expand Down
64 changes: 64 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ insta = {version = "1.39.0", features = ["colors", "glob", "yaml"]}
insta-cmd = "0.6.0"
itertools = "0.13.0"
log = "0.4.22"
schemars = "1.0.0-alpha.2"
semver = {version = "1.0.23", features = ["serde"]}
serde = {version = "1.0.203", features = ["derive"]}
serde_json = "1.0.120"
Expand Down
1 change: 1 addition & 0 deletions prqlc/prqlc-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ doctest = false
enum-as-inner = {workspace = true}
itertools = {workspace = true}
log = {workspace = true}
schemars = {workspace = true}
semver = {version = "1.0.23", features = ["serde"]}
serde = {workspace = true}
serde_yaml = {workspace = true, optional = true}
Expand Down
7 changes: 4 additions & 3 deletions prqlc/prqlc-parser/src/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
// hasn't been used much since, and I'm not sure carries its weight. So we
// could consider rolling back to only concrete implementations to delayer the
// code.
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

/// Inclusive-inclusive range.
/// Missing bound means unbounded range.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub struct Range<T> {
pub start: Option<T>,
pub end: Option<T>,
Expand Down Expand Up @@ -37,7 +38,7 @@ impl<T> Range<T> {
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
pub enum InterpolateItem<T> {
String(String),
Expr {
Expand Down Expand Up @@ -68,7 +69,7 @@ impl<T> InterpolateItem<T> {
}
}

#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct SwitchCase<T> {
pub condition: T,
pub value: T,
Expand Down
14 changes: 9 additions & 5 deletions prqlc/prqlc-parser/src/lexer/lr/token.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
use enum_as_inner::EnumAsInner;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

#[derive(Clone, PartialEq, Serialize, Deserialize, Eq)]
#[derive(Clone, PartialEq, Serialize, Deserialize, Eq, JsonSchema)]
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
}

#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
#[derive(Clone, PartialEq, Debug, Serialize, Deserialize, JsonSchema)]
pub enum TokenKind {
NewLine,

Ident(String),
Keyword(String),
#[cfg_attr(
feature = "serde_yaml",
serde(with = "serde_yaml::with::singleton_map")
serde(with = "serde_yaml::with::singleton_map"),
schemars(with = "Literal")
)]
Literal(Literal),
Param(String),
Expand Down Expand Up @@ -64,7 +66,9 @@ pub enum TokenKind {
LineWrap(Vec<TokenKind>),
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr)]
#[derive(
Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr, JsonSchema,
)]
pub enum Literal {
Null,
Integer(i64),
Expand All @@ -86,7 +90,7 @@ impl TokenKind {
}
}
// Compound units, such as "2 days 3 hours" can be represented as `2days + 3hours`
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, JsonSchema)]
pub struct ValueAndUnit {
pub n: i64, // Do any DBs use floats or decimals for this?
pub unit: String, // Could be an enum IntervalType,
Expand Down
26 changes: 15 additions & 11 deletions prqlc/prqlc-parser/src/parser/pr/expr.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::HashMap;

use enum_as_inner::EnumAsInner;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

use crate::generic;
Expand All @@ -23,7 +24,7 @@ impl Expr {

/// Expr is anything that has a value and thus a type.
/// Most of these can contain other [Expr] themselves; literals should be [ExprKind::Literal].
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct Expr {
#[serde(flatten)]
pub kind: ExprKind,
Expand All @@ -35,7 +36,9 @@ pub struct Expr {
pub alias: Option<String>,
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr)]
#[derive(
Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, strum::AsRefStr, JsonSchema,
)]
pub enum ExprKind {
Ident(String),

Expand All @@ -47,7 +50,8 @@ pub enum ExprKind {
},
#[cfg_attr(
feature = "serde_yaml",
serde(with = "serde_yaml::with::singleton_map")
serde(with = "serde_yaml::with::singleton_map"),
schemars(with = "Literal")
)]
Literal(Literal),
Pipeline(Pipeline),
Expand Down Expand Up @@ -81,28 +85,28 @@ impl ExprKind {
}
}

#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, EnumAsInner, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub enum IndirectionKind {
Name(String),
Position(i64),
Star,
}

#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct BinaryExpr {
pub left: Box<Expr>,
pub op: BinOp,
pub right: Box<Expr>,
}

#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct UnaryExpr {
pub op: UnOp,
pub expr: Box<Expr>,
}

/// Function call.
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct FuncCall {
pub name: Box<Expr>,
pub args: Vec<Expr>,
Expand All @@ -112,7 +116,7 @@ pub struct FuncCall {

/// Function called with possibly missing positional arguments.
/// May also contain environment that is needed to evaluate the body.
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct Func {
/// Type requirement for the function body expression.
pub return_ty: Option<Ty>,
Expand All @@ -130,7 +134,7 @@ pub struct Func {
pub generic_type_params: Vec<GenericTypeParam>,
}

#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct FuncParam {
pub name: String,

Expand All @@ -140,7 +144,7 @@ pub struct FuncParam {
pub default_value: Option<Box<Expr>>,
}

#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct GenericTypeParam {
/// Assigned name of this generic type argument.
pub name: String,
Expand All @@ -149,7 +153,7 @@ pub struct GenericTypeParam {
}

/// A value and a series of functions that are to be applied to that value one after another.
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, JsonSchema)]
pub struct Pipeline {
pub exprs: Vec<Expr>,
}
Expand Down
3 changes: 2 additions & 1 deletion prqlc/prqlc-parser/src/parser/pr/ident.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::fmt::Write;

use schemars::JsonSchema;
use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer};

/// A name. Generally columns, tables, functions, variables.
/// This is glorified way of writing a "vec with at least one element".
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone)]
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Clone, JsonSchema)]
pub struct Ident {
pub path: Vec<String>,
pub name: String,
Expand Down
3 changes: 3 additions & 0 deletions prqlc/prqlc-parser/src/parser/pr/ops.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};

#[derive(
Expand All @@ -11,6 +12,7 @@ use serde::{Deserialize, Serialize};
Deserialize,
strum::Display,
strum::EnumString,
JsonSchema,
)]
pub enum UnOp {
#[strum(to_string = "-")]
Expand All @@ -34,6 +36,7 @@ pub enum UnOp {
Deserialize,
strum::Display,
strum::EnumString,
JsonSchema,
)]
pub enum BinOp {
#[strum(to_string = "*")]
Expand Down
Loading

0 comments on commit 6545978

Please sign in to comment.