Skip to content

Commit

Permalink
feat: Add std.parseCsv and std.manifestCsv
Browse files Browse the repository at this point in the history
  • Loading branch information
rohitjangid committed Jun 7, 2023
1 parent 868d9c6 commit 5cf731c
Show file tree
Hide file tree
Showing 17 changed files with 201 additions and 7 deletions.
167 changes: 167 additions & 0 deletions builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bytes"
"crypto/md5"
"encoding/base64"
"encoding/csv"
"encoding/hex"
"encoding/json"
"fmt"
Expand Down Expand Up @@ -1425,6 +1426,170 @@ func builtinParseYAML(i *interpreter, str value) (value, error) {
return jsonToValue(i, elems[0])
}

func builtinParseCSVWithHeader(i *interpreter, arguments []value) (value, error) {
strv := arguments[0]
dv := arguments[1]

sval, err := i.getString(strv)
if err != nil {
return nil, err
}
s := sval.getGoString()

d := ',' // default delimiter
if dv.getType() != nullType {
dval, err := i.getString(dv)
if err != nil {
return nil, err
}
ds := dval.getGoString()
if len(ds) != 1 {
return nil, i.Error(fmt.Sprintf("Delimiter %s is invalid", ds))
}
d = rune(ds[0]) // conversion to rune
}

json := make([]interface{}, 0)
var keys []string

reader := csv.NewReader(strings.NewReader(s))
reader.Comma = d

for row := 0; ; row++ {
record, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, i.Error(fmt.Sprintf("failed to parse CSV: %s", err.Error()))
}

if row == 0 { // consider first row as header
// detect and handle duplicate headers
keyCount := map[string]int{}
for _, k := range record {
keyCount[k]++
if c := keyCount[k]; c > 1 {
keys = append(keys, fmt.Sprintf("%s__%d", k, c-1))
} else {
keys = append(keys, k)
}
}
} else {
j := make(map[string]interface{})
for i, k := range keys {
j[k] = record[i]
}
json = append(json, j)
}
}
return jsonToValue(i, json)
}

func builtinManifestCsv(i *interpreter, arguments []value) (value, error) {
arrv := arguments[0]
hv := arguments[1]

arr, err := i.getArray(arrv)
if err != nil {
return nil, err
}

var headers []string
if hv.getType() == nullType {
if len(arr.elements) == 0 { // no elements to select headers
return makeValueString(""), nil
}

// default to all headers
obj, err := i.evaluateObject(arr.elements[0])
if err != nil {
return nil, err
}

simpleObj := obj.uncached.(*simpleObject)
for fieldName := range simpleObj.fields {
headers = append(headers, fieldName)
}
} else {
// headers are provided
ha, err := i.getArray(hv)
if err != nil {
return nil, err
}

for _, elem := range ha.elements {
header, err := i.evaluateString(elem)
if err != nil {
return nil, err
}
headers = append(headers, header.getGoString())
}
}

var buf bytes.Buffer
w := csv.NewWriter(&buf)

// Write headers
w.Write(headers)

// Write rest of the rows
for _, elem := range arr.elements {
obj, err := i.evaluateObject(elem)
if err != nil {
return nil, err
}

record := make([]string, len(headers))
for c, h := range headers {
val, err := obj.index(i, h)
if err != nil { // no corresponding column
// skip to next column
continue
}

s, err := stringFromValue(i, val)
if err != nil {
return nil, err
}
record[c] = s
}
w.Write(record)
}

w.Flush()

return makeValueString(buf.String()), nil
}

func stringFromValue(i *interpreter, v value) (string, error) {
switch v.getType() {
case stringType:
s, err := i.getString(v)
if err != nil {
return "", err
}
return s.getGoString(), nil
case numberType:
n, err := i.getNumber(v)
if err != nil {
return "", err
}
return fmt.Sprint(n.value), nil
case booleanType:
b, err := i.getBoolean(v)
if err != nil {
return "", err
}
return fmt.Sprint(b.value), nil
case nullType:
return "", nil
default:
// for functionType, objectType and arrayType
return "", i.Error("invalid string conversion")
}
}

func jsonEncode(v interface{}) (string, error) {
buf := new(bytes.Buffer)
enc := json.NewEncoder(buf)
Expand Down Expand Up @@ -2290,6 +2455,8 @@ var funcBuiltins = buildBuiltinMap([]builtin{
&unaryBuiltin{name: "parseInt", function: builtinParseInt, params: ast.Identifiers{"str"}},
&unaryBuiltin{name: "parseJson", function: builtinParseJSON, params: ast.Identifiers{"str"}},
&unaryBuiltin{name: "parseYaml", function: builtinParseYAML, params: ast.Identifiers{"str"}},
&generalBuiltin{name: "parseCsvWithHeader", function: builtinParseCSVWithHeader, params: []generalBuiltinParameter{{name: "str"}, {name: "delimiter", defaultValue: &nullValue}}},
&generalBuiltin{name: "manifestCsv", function: builtinManifestCsv, params: []generalBuiltinParameter{{name: "json"}, {name: "headers", defaultValue: &nullValue}}},
&generalBuiltin{name: "manifestJsonEx", function: builtinManifestJSONEx, params: []generalBuiltinParameter{{name: "value"}, {name: "indent"},
{name: "newline", defaultValue: &valueFlatString{value: []rune("\n")}},
{name: "key_val_sep", defaultValue: &valueFlatString{value: []rune(": ")}}}},
Expand Down
16 changes: 9 additions & 7 deletions linter/internal/types/stdlib.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,14 @@ func prepareStdlib(g *typeGraph) {

// Parsing

"parseInt": g.newSimpleFuncType(numberType, "str"),
"parseOctal": g.newSimpleFuncType(numberType, "str"),
"parseHex": g.newSimpleFuncType(numberType, "str"),
"parseJson": g.newSimpleFuncType(jsonType, "str"),
"parseYaml": g.newSimpleFuncType(jsonType, "str"),
"encodeUTF8": g.newSimpleFuncType(numberArrayType, "str"),
"decodeUTF8": g.newSimpleFuncType(stringType, "arr"),
"parseInt": g.newSimpleFuncType(numberType, "str"),
"parseOctal": g.newSimpleFuncType(numberType, "str"),
"parseHex": g.newSimpleFuncType(numberType, "str"),
"parseJson": g.newSimpleFuncType(jsonType, "str"),
"parseYaml": g.newSimpleFuncType(jsonType, "str"),
"parseCsvWithHeader": g.newFuncType(jsonType, []ast.Parameter{required("str"), optional("delimiter")}),
"encodeUTF8": g.newSimpleFuncType(numberArrayType, "str"),
"decodeUTF8": g.newSimpleFuncType(stringType, "arr"),

// Manifestation

Expand All @@ -116,6 +117,7 @@ func prepareStdlib(g *typeGraph) {
"manifestJsonMinified": g.newSimpleFuncType(stringType, "value"),
"manifestYamlDoc": g.newSimpleFuncType(stringType, "value"),
"manifestYamlStream": g.newSimpleFuncType(stringType, "value"),
"manifestCsv": g.newFuncType(stringType, []ast.Parameter{required("json"), optional("headers")}),
"manifestXmlJsonml": g.newSimpleFuncType(stringType, "value"),

// Arrays
Expand Down
1 change: 1 addition & 0 deletions testdata/builtinManifestCsv.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"head1,head2\nval1,val2\n,1\nval3,\n"
1 change: 1 addition & 0 deletions testdata/builtinManifestCsv.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
std.manifestCsv([{ "head1": "val1", "head2": "val2", "head3": "foo" }, { "head2": 1, "head3": "bar" }, { "head1": "val3" }], ["head1", "head2"])
Empty file.
1 change: 1 addition & 0 deletions testdata/builtinManifestCsv2.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"head1\nval1\nval2\n"
1 change: 1 addition & 0 deletions testdata/builtinManifestCsv2.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
std.manifestCsv([{ "head1": "val1" }, { "head1": "val2" }])
Empty file.
6 changes: 6 additions & 0 deletions testdata/builtinParseCsvWithHeader.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
{
"head1": "val1",
"head2": "val2"
}
]
1 change: 1 addition & 0 deletions testdata/builtinParseCsvWithHeader.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
std.parseCsvWithHeader("head1,head2\nval1,val2")
Empty file.
6 changes: 6 additions & 0 deletions testdata/builtinParseCsvWithHeader2.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
{
"head1": "val1",
"head1__1": "val2"
}
]
1 change: 1 addition & 0 deletions testdata/builtinParseCsvWithHeader2.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
std.parseCsvWithHeader("head1,head1\nval1,val2")
Empty file.
6 changes: 6 additions & 0 deletions testdata/builtinParseCsvWithHeader3.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
{
"head1": "val1",
"head2": "val2"
}
]
1 change: 1 addition & 0 deletions testdata/builtinParseCsvWithHeader3.jsonnet
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
std.parseCsvWithHeader("head1;head2\nval1;val2", ";")
Empty file.

0 comments on commit 5cf731c

Please sign in to comment.