Skip to content

Commit

Permalink
[issue#256]Implement SQL++ Query Generator (#286)
Browse files Browse the repository at this point in the history
* Add implementation of SQLPPGenerator and corresponding tests

* fix MetaSchema

* Address review comments

* postpone the implementation of datetime and round from AsterixImpl to SQLPPImpl
  • Loading branch information
luochen01 authored Apr 4, 2017
1 parent fea3892 commit eed1bd8
Show file tree
Hide file tree
Showing 5 changed files with 1,232 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
package edu.uci.ics.cloudberry.zion.model.impl

import edu.uci.ics.cloudberry.zion.model.datastore.{IQLGenerator, QueryParsingException}
import edu.uci.ics.cloudberry.zion.model.schema._


/**
* Defines constant strings for query languages supported by AsterixDB
*/
trait AsterixImpl {

val aggregateFuncMap: Map[AggregateFunc, String]

def getAggregateStr(aggregate: AggregateFunc): String = {
aggregateFuncMap.get(aggregate) match {
case Some(impl) => impl
case None => throw new QueryParsingException(s"No implementation is provided for aggregate function ${aggregate.name}")
}
}

val datetime: String
val round: String

val dayTimeDuration: String
val yearMonthDuration: String
val getIntervalStartDatetime: String
val intervalBin: String

val spatialIntersect: String
val createRectangle: String
val createPoint: String
val spatialCell: String
val getPoints: String


val similarityJaccard: String
val contains: String
val wordTokens: String

}

object AsterixImpl {


}


abstract class AsterixQueryGenerator extends IQLGenerator {

/**
* represent the expression for a [[Field]]
*
* @param refExpr the expression for referring this field by the subsequent statements
* @param defExpr the expression the defines the field
*
*/
case class FieldExpr(refExpr: String, defExpr: String)

/**
* Partial parse results after parsing each [[Statement]]
*
* @param strs a sequence of parsed query strings, which would be composed together later.
* @param exprMap a new field expression map
*/
case class ParsedResult(strs: Seq[String], exprMap: Map[String, FieldExpr])

protected def typeImpl: AsterixImpl

protected def quote: String

protected def sourceVar: String

protected def lookupVar: String

protected def unnestVar: String

protected def groupVar: String

protected def globalAggrVar: String

protected def outerSelectVar: String

/**
* The suffix (such as ";") appended to the query string
* @return
*/
protected def suffix: String


/**
* Returns a query string query after parsing the query object.
*
* @param query [[IQuery]] object containing query details
* @param schemaMap a map of Dataset name to it's [[Schema]]
* @return query string
**/
def generate(query: IQuery, schemaMap: Map[String, Schema]): String = {
val result = query match {
case q: Query =>
parseQuery(q, schemaMap)
case q: CreateView => parseCreate(q, schemaMap)
case q: AppendView => parseAppend(q, schemaMap)
case q: UpsertRecord => parseUpsert(q, schemaMap)
case q: DropView => ???
case _ => ???
}
s"$result$suffix"
}

protected def parseQuery(query: Query, schemaMap: Map[String, Schema]): String

protected def parseCreate(query: CreateView, schemaMap: Map[String, Schema]): String

protected def parseAppend(query: AppendView, schemaMap: Map[String, Schema]): String

protected def parseUpsert(query: UpsertRecord, schemaMap: Map[String, Schema]): String


def calcResultSchema(query: Query, schema: Schema): Schema = {
if (query.lookup.isEmpty && query.groups.isEmpty && query.select.isEmpty) {
schema.copy()
} else {
???
}
}

protected def initExprMap(query: Query, schemaMap: Map[String, Schema]): Map[String, FieldExpr] = {
val schema = schemaMap(query.dataset)
schema.fieldMap.mapValues { f =>
f.dataType match {
case DataType.Record => FieldExpr(sourceVar, sourceVar)
case DataType.Hierarchy => FieldExpr(sourceVar, sourceVar) // TODO rethink this type: a type or just a relation between types?
case _ => {
//Add the quote to wrap the name in order to not touch the SQL reserved keyword
val quoted = f.name.split('.').map(name => s"$quote$name$quote").mkString(".")
FieldExpr(s"$sourceVar.$quoted", s"$sourceVar.$quoted")
}
}
}
}


//TODO possibly using /*+ skip-index */ hint if the relation selectivity is not high enough
protected def parseFilterRelation(filter: FilterStatement, fieldExpr: String): String = {
filter.field.dataType match {
case DataType.Number =>
parseNumberRelation(filter, fieldExpr)
case DataType.Time =>
parseTimeRelation(filter, fieldExpr)
case DataType.Point =>
parsePointRelation(filter, fieldExpr)
case DataType.Boolean => ???
case DataType.String => ???
case DataType.Text =>
parseTextRelation(filter, fieldExpr)
case DataType.Bag => ???
case DataType.Hierarchy =>
throw new QueryParsingException("the Hierarchy type doesn't support any relations.")
case _ => throw new QueryParsingException(s"unknown datatype: ${filter.field.dataType}")
}
}


protected def parseNumberRelation(filter: FilterStatement,
fieldExpr: String): String

protected def parseTimeRelation(filter: FilterStatement,
fieldExpr: String): String = {
filter.relation match {
case Relation.inRange => {
s"$fieldExpr >= ${typeImpl.datetime}('${filter.values(0)}') and $fieldExpr < ${typeImpl.datetime}('${filter.values(1)}')"
}
case _ => {
s"$fieldExpr ${filter.relation} ${typeImpl.datetime}('${filter.values(0)}')"
}
}
}


protected def parsePointRelation(filter: FilterStatement,
fieldExpr: String): String = {
val values = filter.values.map(_.asInstanceOf[Seq[Double]])
filter.relation match {
case Relation.inRange => {
s"""${typeImpl.spatialIntersect}($fieldExpr,
| ${typeImpl.createRectangle}(${typeImpl.createPoint}(${values(0)(0)},${values(0)(1)}),
| ${typeImpl.createPoint}(${values(1)(0)},${values(1)(1)})))
|""".stripMargin
}
}
}

protected def parseTextRelation(filter: FilterStatement,
fieldExpr: String): String = {
val first = s"${typeImpl.similarityJaccard}(${typeImpl.wordTokens}($fieldExpr), ${typeImpl.wordTokens}('${filter.values.head}')) > 0.0"
val rest = filter.values.tail.map(keyword => s"""and ${typeImpl.contains}($fieldExpr, "$keyword")""")
(first +: rest).mkString("\n")
}


protected def parseGeoCell(scale: Double, fieldExpr: String, dataType: DataType.Value): String = {
val origin = s"${typeImpl.createPoint}(0.0,0.0)"
s"${typeImpl.getPoints}(${typeImpl.spatialCell}(${fieldExpr}, $origin, ${1 / scale}, ${1 / scale}))[0]"
}

protected def parseAggregateFunc(aggregate: AggregateStatement,
fieldExpr: String): String


protected def parseIntervalDuration(interval: Interval): String = {
import TimeUnit._
//PnYnMnDTnHnMn.mmmS
interval.unit match {
case Second => s""" ${typeImpl.dayTimeDuration}("PT${interval.x}S") """
case Minute => s""" ${typeImpl.dayTimeDuration}("PT${interval.x}M") """
case Hour => s""" ${typeImpl.dayTimeDuration}("PT${interval.x}H") """
case Day => s""" ${typeImpl.dayTimeDuration}("P${interval.x}D") """
case Week => s""" ${typeImpl.dayTimeDuration}("P${interval.x * 7}D") """
case Month => s""" ${typeImpl.yearMonthDuration}("P${interval.x}M") """
case Year => s""" ${typeImpl.yearMonthDuration}("P${interval.x}Y") """
}
}

protected def parseGroupByFunc(groupBy: ByStatement, fieldExpr: String): String = {
groupBy.funcOpt match {
case Some(func) =>
func match {
case bin: Bin => s"${typeImpl.round}($fieldExpr/${bin.scale})*${bin.scale}"
case interval: Interval =>
val duration = parseIntervalDuration(interval)
s"${typeImpl.getIntervalStartDatetime}(${typeImpl.intervalBin}($fieldExpr, ${typeImpl.datetime}('1990-01-01T00:00:00.000Z'), $duration))"
case level: Level =>
//TODO remove this data type
val hierarchyField = groupBy.field.asInstanceOf[HierarchyField]
val field = hierarchyField.levels.find(_._1 == level.levelTag).get
s"$fieldExpr.${field._2}"
case GeoCellTenth => parseGeoCell(10, fieldExpr, groupBy.field.dataType)
case GeoCellHundredth => parseGeoCell(100, fieldExpr, groupBy.field.dataType)
case GeoCellThousandth => parseGeoCell(1000, fieldExpr, groupBy.field.dataType)
case _ => throw new QueryParsingException(s"unknown function: ${func.name}")
}
case None => fieldExpr
}
}


protected def genDDL(schema: Schema): String = {
//FIXME this function is wrong for nested types if it contains multiple sub-fields
def mkNestDDL(names: List[String], typeStr: String): String = {
names match {
case List(e) => s" $e : $typeStr"
case e :: tail => s" $e : { ${mkNestDDL(tail, typeStr)} }"
}
}

val fields = schema.fieldMap.values.filter(f => f.dataType != DataType.Hierarchy && f != AllField).map {
f => mkNestDDL(f.name.split("\\.").toList, fieldType2ADMType(f) + (if (f.isOptional) "?" else ""))
}
s"""
|create type ${schema.typeName} if not exists as open {
|${fields.mkString(",\n")}
|}
""".stripMargin
}

protected def fieldType2ADMType(field: Field): String = {
field.dataType match {
case DataType.Number => "double"
case DataType.Time => "datetime"
case DataType.Point => "point"
case DataType.Boolean => "boolean"
case DataType.String => "string"
case DataType.Text => "string"
case DataType.Bag => s"{{${fieldType2ADMType(Field("", field.asInstanceOf[BagField].innerType))}}}"
case DataType.Hierarchy => ??? // should be skipped
case DataType.Record => ???
}
}
}
Loading

0 comments on commit eed1bd8

Please sign in to comment.