Skip to content

Commit

Permalink
Use 'Numeric' instead of 'Float' as BQ column type when field type in…
Browse files Browse the repository at this point in the history
… the json schema is number (close #112)
  • Loading branch information
voropaevp committed Apr 18, 2023
1 parent bedb796 commit 0652ab4
Show file tree
Hide file tree
Showing 11 changed files with 300 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,21 @@ object Field {
case Some(types) if types.possiblyWithNull(CommonProperties.Type.Object) =>
val subfields = topSchema.properties.map(_.value).getOrElse(Map.empty)
if (subfields.isEmpty) {
Suggestion.finalSuggestion(topSchema, required && !types.nullable)(name)
Suggestion.finalSuggestion(topSchema, required || types.nullable)(name)
} else {
val requiredKeys = topSchema.required.toList.flatMap(_.value)
val fields = subfields.map { case (key, schema) =>
build(key, schema, requiredKeys.contains(key))
}
val subFields = fields.toList.sortBy(field => (Mode.sort(field.mode), field.name))
Field(name, Type.Record(subFields), Mode.required(required && !types.nullable))
Field(name, Type.Record(subFields), Mode.required(required || types.nullable))
}
case Some(types) if types.possiblyWithNull(CommonProperties.Type.Array) =>
topSchema.items match {
case Some(ArrayProperty.Items.ListItems(schema)) =>
build(name, schema, false).copy(mode = Mode.Repeated)
case _ =>
Suggestion.finalSuggestion(topSchema, required && !types.nullable)(name)
Suggestion.finalSuggestion(topSchema, required || types.nullable)(name)
}
case _ =>
Suggestion.suggestions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ object Row {
value.asNumber.flatMap(_.toLong).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Float =>
value.asNumber.flatMap(_.toBigDecimal.map(_.bigDecimal)).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Numeric =>
case Type.Numeric(_, _) =>
value.asNumber.flatMap(_.toBigDecimal.map(_.bigDecimal)).fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
case Type.Timestamp =>
value.asString.fold(WrongType(value, fieldType).invalidNel[Row])(Primitive(_).validNel)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
package com.snowplowanalytics.iglu.schemaddl.bigquery

import io.circe._

import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{CommonProperties, StringProperty}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.decimals
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{CommonProperties, StringProperty, NumberProperty}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.baseTypes.NullableWrapper

object Suggestion {

Expand All @@ -24,7 +25,7 @@ object Suggestion {
case Some(CommonProperties.Type.String) =>
Some(name => Field(name, Type.String, Mode.required(required)))
case Some(types) if types.nullable(CommonProperties.Type.String) =>
Some(name => Field(name, Type.String, Mode.Nullable) )
Some(name => Field(name, Type.String, Mode.Nullable))
case _ => None
}

Expand All @@ -40,19 +41,55 @@ object Suggestion {
val integerSuggestion: Suggestion = (schema, required) =>
schema.`type` match {
case Some(CommonProperties.Type.Integer) =>
Some(name => Field(name, Type.Integer, Mode.required(required)))
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Integer) =>
Some(name => Field(name, Type.Integer, Mode.Nullable))
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case _ => None
}

val numericSuggestion: Suggestion = (schema, required) => schema.`type` match {
case Some(CommonProperties.Type.Number) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.required(required)))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Number) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.Nullable))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, true) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.Nullable))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.Nullable))
case None => None
}
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
schema.multipleOf match {
case Some(NumberProperty.MultipleOf.IntegerMultipleOf(_)) =>
Some(name => Field(name, Type.fromGenericType(decimals.integerType(schema)), Mode.required(required)))
case Some(mult: NumberProperty.MultipleOf.NumberMultipleOf) =>
Some(name => Field(name, numericWithMultiple(mult, schema.maximum, schema.minimum), Mode.required(required)))
case None => None
}
case _ => None
}

val floatSuggestion: Suggestion = (schema, required) =>
schema.`type` match {
case Some(CommonProperties.Type.Number) =>
Some(name => Field(name, Type.Float, Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, true) =>
Some(name => Field(name, Type.Float, Mode.Nullable))
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
case Some(CommonProperties.Type.Union(types)) if onlyNumeric(types, false) =>
Some(name => Field(name, Type.Float, Mode.required(required)))
case Some(CommonProperties.Type.Union(types)) if withNull(types, CommonProperties.Type.Number) =>
Some(name => Field(name, Type.Float, Mode.Nullable))
Expand All @@ -66,6 +103,15 @@ object Suggestion {
case _ => None
}

private def numericWithMultiple(mult: NumberProperty.MultipleOf.NumberMultipleOf,
maximum: Option[NumberProperty.Maximum],
minimum: Option[NumberProperty.Minimum]): Type =
Type.fromGenericType(decimals.numericWithMultiple(mult, maximum, minimum))


// (Field.JsonNullability.fromNullableWrapper)


// `date-time` format usually means zoned format, which corresponds to BQ Timestamp
val timestampSuggestion: Suggestion = (schema, required) =>
(schema.`type`, schema.format) match {
Expand Down Expand Up @@ -95,22 +141,34 @@ object Suggestion {
booleanSuggestion,
stringSuggestion,
integerSuggestion,
numericSuggestion,
floatSuggestion,
complexEnumSuggestion
)

private[iglu] def fromEnum(enums: List[Json], required: Boolean): String => Field = {
def isString(json: Json) = json.isString || json.isNull

def isInteger(json: Json) = json.asNumber.exists(_.toBigInt.isDefined) || json.isNull

def isNumeric(json: Json) = json.isNumber || json.isNull

val noNull: Boolean = !enums.contains(Json.Null)

if (enums.forall(isString)) {
name => Field(name, Type.String, Mode.required(required && noNull))
} else if (enums.forall(isInteger)) {
name => Field(name, Type.Integer, Mode.required(required && noNull))
} else if (enums.forall(isNumeric)) {
name => Field(name, Type.Float, Mode.required(required && noNull))
name =>
decimals.numericEnum(enums).map {
case NullableWrapper.NullableValue(t) => Field(name, Type.fromGenericType(t), Mode.required(required && noNull))
case NullableWrapper.NotNullValue(t) => Field(name, Type.fromGenericType(t), Mode.required(required && noNull))
} match {
case Some(value) => value
// Unreachable as `None` here would mean that some `enums.forall(isNumeric)` did not work.
case None => Field(name, Type.Float, Mode.required(required && noNull))
}
} else {
name => Field(name, Type.String, Mode.required(required && noNull))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,34 @@
*/
package com.snowplowanalytics.iglu.schemaddl.bigquery

import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.baseTypes._
/** BigQuery field type; "array" and "null" are expressed via `Mode` */
sealed trait Type extends Product with Serializable

object Type {
case object String extends Type

case object Boolean extends Type

case object Integer extends Type

case object Float extends Type
case object Numeric extends Type

case class Numeric(precision: Int, scale: Int) extends Type

case object Date extends Type

case object DateTime extends Type

case object Timestamp extends Type

case class Record(fields: List[Field]) extends Type

def fromGenericType(`type`: BaseType) = `type` match {
case BaseType.Double => Float
case BaseType.Int32 => Integer
case BaseType.Int64 => Integer
case BaseType.Decimal(precision, scale) => Numeric(precision, scale)
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion

object baseTypes {
sealed trait BaseType extends Product with Serializable

object BaseType {
case object Double extends BaseType

case object Int32 extends BaseType

case object Int64 extends BaseType

case class Decimal(precision: Int, scale: Int) extends BaseType
}

sealed trait NullableWrapper

object NullableWrapper {

case class NullableValue(`type`: BaseType) extends NullableWrapper

case class NotNullValue(`type`: BaseType) extends NullableWrapper

def fromBool(`type`: BaseType, nullable: Boolean): NullableWrapper =
if (nullable) NullableWrapper.NullableValue(`type`)
else NullableWrapper.NotNullValue(`type`)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion

import baseTypes._
import io.circe.Json
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.NumberProperty
import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema

object decimals {

def integerType(schema: Schema): BaseType =
(schema.minimum, schema.maximum) match {
case (Some(min), Some(max)) =>
val minDecimal = min.getAsDecimal
val maxDecimal = max.getAsDecimal
if (maxDecimal <= Int.MaxValue && minDecimal >= Int.MinValue) BaseType.Int32
else if (maxDecimal <= Long.MaxValue && minDecimal >= Long.MinValue) BaseType.Int64
else BaseType.Decimal(
(maxDecimal.precision - maxDecimal.scale).max(minDecimal.precision - minDecimal.scale), 0
)
case _ => BaseType.Int64
}

def numericWithMultiple(mult: NumberProperty.MultipleOf.NumberMultipleOf,
maximum: Option[NumberProperty.Maximum],
minimum: Option[NumberProperty.Minimum]): BaseType =
(maximum, minimum) match {
case (Some(max), Some(min)) =>
val topPrecision = max match {
case NumberProperty.Maximum.IntegerMaximum(max) =>
BigDecimal(max).precision + mult.value.scale
case NumberProperty.Maximum.NumberMaximum(max) =>
max.precision - max.scale + mult.value.scale
}
val bottomPrecision = min match {
case NumberProperty.Minimum.IntegerMinimum(min) =>
BigDecimal(min).precision + mult.value.scale
case NumberProperty.Minimum.NumberMinimum(min) =>
min.precision - min.scale + mult.value.scale
}

BaseType.Decimal(topPrecision.max(bottomPrecision), mult.value.scale)

case _ =>
BaseType.Double
}


def numericEnum(enums: List[Json]): Option[NullableWrapper] = {
def go(scale: Int, max: BigDecimal, nullable: Boolean, enums: List[Json]): Option[NullableWrapper] =
enums match {
case Nil =>
val t = if ((scale == 0) && (max <= Int.MaxValue)) BaseType.Int32
else if ((scale == 0) && (max <= Long.MaxValue)) BaseType.Int64
else BaseType.Decimal(max.precision - max.scale + scale, scale)

Some(NullableWrapper.fromBool(t, nullable))
case Json.Null :: tail => go(scale, max, true, tail)
case h :: tail =>
h.asNumber.flatMap(_.toBigDecimal) match {
case Some(bigDecimal) =>
val nextScale = scale.max(bigDecimal.scale)
val nextMax = (if (bigDecimal > 0) bigDecimal else -bigDecimal).max(max)
go(nextScale, nextMax, nullable, tail)
case None => None
}
}

go(0, 0, false, enums)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
*/
package com.snowplowanalytics.iglu.schemaddl.parquet

import Type._
import com.snowplowanalytics.iglu.schemaddl.StringUtils
import com.snowplowanalytics.iglu.schemaddl.jsonschema.Schema
import com.snowplowanalytics.iglu.schemaddl.jsonschema.properties.{ArrayProperty, CommonProperties}
import com.snowplowanalytics.iglu.schemaddl.jsonschema.mutate.Mutate
import com.snowplowanalytics.iglu.schemaddl.jsonschema.suggestion.baseTypes.NullableWrapper

case class Field(name: String,
fieldType: Type,
case class Field(name: String,
fieldType: Type,
nullability: Type.Nullability)

object Field {
Expand Down Expand Up @@ -67,8 +69,20 @@ object Field {

private[parquet] object JsonNullability {
case object ExplicitlyNullable extends JsonNullability

case object NoExplicitNull extends JsonNullability

def fromNullableWrapper(wrapper: NullableWrapper): NullableType = wrapper match {
case NullableWrapper.NullableValue(t) => NullableType(
value = fromGenericType(t),
nullability = JsonNullability.ExplicitlyNullable
)
case NullableWrapper.NotNullValue(t) => NullableType(
value = fromGenericType(t),
nullability = JsonNullability.NoExplicitNull
)
}

def extractFrom(`type`: CommonProperties.Type): JsonNullability = {
if (`type`.nullable) {
JsonNullability.ExplicitlyNullable
Expand All @@ -81,13 +95,13 @@ object Field {
topSchema.`type` match {
case Some(types) if types.possiblyWithNull(CommonProperties.Type.Object) =>
NullableType(
value = buildObjectType(topSchema),
value = buildObjectType(topSchema),
nullability = JsonNullability.extractFrom(types)
)

case Some(types) if types.possiblyWithNull(CommonProperties.Type.Array) =>
NullableType(
value = buildArrayType(topSchema),
NullableType(
value = buildArrayType(topSchema),
nullability = JsonNullability.extractFrom(types)
)

Expand Down
Loading

0 comments on commit 0652ab4

Please sign in to comment.