Skip to content

Commit

Permalink
WIP: Embedding Data
Browse files Browse the repository at this point in the history
  • Loading branch information
davesmith00000 committed Sep 17, 2023
1 parent d7d4291 commit d6d334f
Show file tree
Hide file tree
Showing 2 changed files with 273 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
package indigoplugin.generators

import scala.util.matching.Regex

// import scala.annotation.tailrec

object EmbedData {

// Has a standard format, first row is headers, first column is keys.
// Strings delimited with single or double quotes preserve the delimited
// value, the quotes are dropped, but the other kind of quote within that
// string is kept.
def generate(
outDir: os.Path,
moduleName: String,
fullyQualifiedPackage: String,
filePath: os.Path,
delimiter: String,
rowFilter: String => Boolean
): Seq[os.Path] = {

val lines =
if (!os.exists(filePath)) throw new Exception("Path to data file not found: " + filePath.toString())
else {
os.read.lines(filePath).filter(rowFilter)
}

val headers =
lines.map(row => extractRowData(row, delimiter))

println(headers)

val wd = outDir / Generators.OutputDirName

os.makeDir.all(wd)

val file = wd / s"$moduleName.scala"

val contents =
s"""package $fullyQualifiedPackage
|
|// DO NOT EDIT: Generated by Indigo.
|object $moduleName:
|
| val text: String =
| ${Generators.TripleQuotes}""${Generators.TripleQuotes}
|""".stripMargin

os.write.over(file, contents)

Seq(file)
}

def extractRows(rows: List[String], delimiter: String): List[List[DataType]] =
rows.map(r => extractRowData(r, delimiter))

def extractRowData(row: String, delimiter: String): List[DataType] =
parse(delimiter)(row).map(_._1)

// A parser of things,
// is a function from strings,
// to a list of pairs
// of things and strings.
def parse(delimiter: String): String => List[(DataType, String)] = {
val takeUpToDelimiter = s"^(.*?)${delimiter}(.*)".r
val takeMatchingSingleQuotes = s"^'(.*?)'${delimiter}(.*)".r
val takeMatchingDoubleQuotes = s"""^\"(.*?)\"${delimiter}(.*)""".r

(in: String) =>
in match {
case takeMatchingDoubleQuotes(take, left) =>
List(DataType.decideType(take.trim) -> left) ++ parse(delimiter)(left.trim)

case takeMatchingSingleQuotes(take, left) =>
List(DataType.decideType(take.trim) -> left) ++ parse(delimiter)(left.trim)

case takeUpToDelimiter(take, left) =>
List(DataType.decideType(take.trim) -> left) ++ parse(delimiter)(left.trim)

case take =>
List(DataType.decideType(take.trim) -> "")
}
}
}

sealed trait DataType {

def isString: Boolean =
this match {
case _: DataType.StringData => true
case _ => false
}

def isDouble: Boolean =
this match {
case _: DataType.DoubleData => true
case _ => false
}

def isInt: Boolean =
this match {
case _: DataType.IntData => true
case _ => false
}

def isBoolean: Boolean =
this match {
case _: DataType.BooleanData => true
case _ => false
}

def toStringData: DataType =
this match {
case s: DataType.StringData => s
case DataType.BooleanData(value) => DataType.StringData(value.toString)
case DataType.DoubleData(value) => DataType.StringData(value.toString)
case DataType.IntData(value) => DataType.StringData(value.toString)
}

}
object DataType {

// Most to least specific: Boolean, Int, Double, String
final case class BooleanData(value: Boolean) extends DataType
final case class IntData(value: Int) extends DataType {
def toDoubleData: DoubleData = DoubleData(value.toDouble)
}
final case class DoubleData(value: Double) extends DataType
final case class StringData(value: String) extends DataType

private val isBoolean: Regex = """^(true|false)$""".r
private val isDouble: Regex = """^([0-9]*?).([0-9]*)$""".r
private val isInt: Regex = """^([0-9]+)$""".r

def decideType: String => DataType = {
case isBoolean(v) => BooleanData(v.toBoolean)
case isInt(v) => IntData(v.toInt)
case isDouble(v1, v2) => DoubleData(s"$v1.$v2".toDouble)
case v => StringData(v)
}

def sameType(a: DataType, b: DataType): Boolean =
(a, b) match {
case (_: DataType.StringData, _: DataType.StringData) => true
case (_: DataType.BooleanData, _: DataType.BooleanData) => true
case (_: DataType.DoubleData, _: DataType.DoubleData) => true
case (_: DataType.IntData, _: DataType.IntData) => true
case _ => false
}

def allSameType(l: List[DataType]): Boolean =
l match {
case Nil => true
case h :: t => t.forall(d => sameType(h, d))
}

def allNumericTypes(l: List[DataType]): Boolean =
l.forall(d => d.isDouble || d.isInt)

def convertToBestType(l: List[DataType]): List[DataType] =
// Cases we can manage:
// - They're all the same!
// - Doubles and Ints, convert Ints to Doubles
// - Fallback is that everything is a string.
if (allSameType(l)) {
// All the same! Great!
l
} else if (allNumericTypes(l)) {
l.map {
case v @ DataType.DoubleData(_) => v
case v @ DataType.IntData(_) => v.toDoubleData
case s => throw new Exception(s"Unexpected non-numeric type '$s'") // Shouldn't get here.
}
} else {
// Nothing else to do, but make everything a string
l.map(_.toStringData)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package indigoplugin.generators

class EmbedDataTests extends munit.FunSuite {

test("Extract row data - csv - simple") {
val row = " abc,123, def,456.5 ,ghi789,true "

val actual =
EmbedData.extractRowData(row, ",")

val expected =
List(
DataType.StringData("abc"),
DataType.IntData(123),
DataType.StringData("def"),
DataType.DoubleData(456.5),
DataType.StringData("ghi789"),
DataType.BooleanData(true)
)

assertEquals(actual, expected)
}

test("Extract row data - md - simple") {
val row = "abc | 123| def|456.5 |ghi789|true"

val actual =
EmbedData.extractRowData(row, "\\|")

val expected =
List(
DataType.StringData("abc"),
DataType.IntData(123),
DataType.StringData("def"),
DataType.DoubleData(456.5),
DataType.StringData("ghi789"),
DataType.BooleanData(true)
)

assertEquals(actual, expected)
}

test("Extract row data - csv - with quotes") {
val row = """abc,"123,def",456,ghi789"""

val actual =
EmbedData.extractRowData(row, ",")

val expected =
List(
DataType.StringData("abc"),
DataType.StringData("123,def"),
DataType.IntData(456),
DataType.StringData("ghi789")
)

assertEquals(actual, expected)
}

test("Extract row data - csv - with double quotes and single quotes") {
val row = """abc,"123,'def'",456,ghi789"""

val actual =
EmbedData.extractRowData(row, ",")

val expected =
List(
DataType.StringData("abc"),
DataType.StringData("123,'def'"),
DataType.IntData(456),
DataType.StringData("ghi789")
)

assertEquals(actual, expected)
}

test("Extract row data - csv - with single quotes and double quotes") {
val row = """abc,'123,"def"',456,ghi789"""

val actual =
EmbedData.extractRowData(row, ",")

val expected =
List(
DataType.StringData("abc"),
DataType.StringData("123,\"def\""),
DataType.IntData(456),
DataType.StringData("ghi789")
)

assertEquals(actual, expected)
}

}

0 comments on commit d6d334f

Please sign in to comment.