diff --git a/builtins.go b/builtins.go index 875fd5f2..936486c5 100644 --- a/builtins.go +++ b/builtins.go @@ -20,6 +20,7 @@ import ( "bytes" "crypto/md5" "encoding/base64" + "encoding/csv" "encoding/hex" "encoding/json" "fmt" @@ -1425,6 +1426,170 @@ func builtinParseYAML(i *interpreter, str value) (value, error) { return jsonToValue(i, elems[0]) } +func builtinParseCSVWithHeader(i *interpreter, arguments []value) (value, error) { + strv := arguments[0] + dv := arguments[1] + + sval, err := i.getString(strv) + if err != nil { + return nil, err + } + s := sval.getGoString() + + d := ',' // default delimiter + if dv.getType() != nullType { + dval, err := i.getString(dv) + if err != nil { + return nil, err + } + ds := dval.getGoString() + if len(ds) != 1 { + return nil, i.Error(fmt.Sprintf("Delimiter %s is invalid", ds)) + } + d = rune(ds[0]) // conversion to rune + } + + json := make([]interface{}, 0) + var keys []string + + reader := csv.NewReader(strings.NewReader(s)) + reader.Comma = d + + for row := 0; ; row++ { + record, err := reader.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, i.Error(fmt.Sprintf("failed to parse CSV: %s", err.Error())) + } + + if row == 0 { // consider first row as header + // detect and handle duplicate headers + keyCount := map[string]int{} + for _, k := range record { + keyCount[k]++ + if c := keyCount[k]; c > 1 { + keys = append(keys, fmt.Sprintf("%s__%d", k, c-1)) + } else { + keys = append(keys, k) + } + } + } else { + j := make(map[string]interface{}) + for i, k := range keys { + j[k] = record[i] + } + json = append(json, j) + } + } + return jsonToValue(i, json) +} + +func builtinManifestCsv(i *interpreter, arguments []value) (value, error) { + arrv := arguments[0] + hv := arguments[1] + + arr, err := i.getArray(arrv) + if err != nil { + return nil, err + } + + var headers []string + if hv.getType() == nullType { + if len(arr.elements) == 0 { // no elements to select headers + return makeValueString(""), nil + } + + // default to all headers + obj, err := i.evaluateObject(arr.elements[0]) + if err != nil { + return nil, err + } + + simpleObj := obj.uncached.(*simpleObject) + for fieldName := range simpleObj.fields { + headers = append(headers, fieldName) + } + } else { + // headers are provided + ha, err := i.getArray(hv) + if err != nil { + return nil, err + } + + for _, elem := range ha.elements { + header, err := i.evaluateString(elem) + if err != nil { + return nil, err + } + headers = append(headers, header.getGoString()) + } + } + + var buf bytes.Buffer + w := csv.NewWriter(&buf) + + // Write headers + w.Write(headers) + + // Write rest of the rows + for _, elem := range arr.elements { + obj, err := i.evaluateObject(elem) + if err != nil { + return nil, err + } + + record := make([]string, len(headers)) + for c, h := range headers { + val, err := obj.index(i, h) + if err != nil { // no corresponding column + // skip to next column + continue + } + + s, err := stringFromValue(i, val) + if err != nil { + return nil, err + } + record[c] = s + } + w.Write(record) + } + + w.Flush() + + return makeValueString(buf.String()), nil +} + +func stringFromValue(i *interpreter, v value) (string, error) { + switch v.getType() { + case stringType: + s, err := i.getString(v) + if err != nil { + return "", err + } + return s.getGoString(), nil + case numberType: + n, err := i.getNumber(v) + if err != nil { + return "", err + } + return fmt.Sprint(n.value), nil + case booleanType: + b, err := i.getBoolean(v) + if err != nil { + return "", err + } + return fmt.Sprint(b.value), nil + case nullType: + return "", nil + default: + // for functionType, objectType and arrayType + return "", i.Error("invalid string conversion") + } +} + func jsonEncode(v interface{}) (string, error) { buf := new(bytes.Buffer) enc := json.NewEncoder(buf) @@ -2290,6 +2455,8 @@ var funcBuiltins = buildBuiltinMap([]builtin{ &unaryBuiltin{name: "parseInt", function: builtinParseInt, params: ast.Identifiers{"str"}}, &unaryBuiltin{name: "parseJson", function: builtinParseJSON, params: ast.Identifiers{"str"}}, &unaryBuiltin{name: "parseYaml", function: builtinParseYAML, params: ast.Identifiers{"str"}}, + &generalBuiltin{name: "parseCsvWithHeader", function: builtinParseCSVWithHeader, params: []generalBuiltinParameter{{name: "str"}, {name: "delimiter", defaultValue: &nullValue}}}, + &generalBuiltin{name: "manifestCsv", function: builtinManifestCsv, params: []generalBuiltinParameter{{name: "json"}, {name: "headers", defaultValue: &nullValue}}}, &generalBuiltin{name: "manifestJsonEx", function: builtinManifestJSONEx, params: []generalBuiltinParameter{{name: "value"}, {name: "indent"}, {name: "newline", defaultValue: &valueFlatString{value: []rune("\n")}}, {name: "key_val_sep", defaultValue: &valueFlatString{value: []rune(": ")}}}}, diff --git a/linter/internal/types/stdlib.go b/linter/internal/types/stdlib.go index dee12b27..533b364d 100644 --- a/linter/internal/types/stdlib.go +++ b/linter/internal/types/stdlib.go @@ -98,13 +98,14 @@ func prepareStdlib(g *typeGraph) { // Parsing - "parseInt": g.newSimpleFuncType(numberType, "str"), - "parseOctal": g.newSimpleFuncType(numberType, "str"), - "parseHex": g.newSimpleFuncType(numberType, "str"), - "parseJson": g.newSimpleFuncType(jsonType, "str"), - "parseYaml": g.newSimpleFuncType(jsonType, "str"), - "encodeUTF8": g.newSimpleFuncType(numberArrayType, "str"), - "decodeUTF8": g.newSimpleFuncType(stringType, "arr"), + "parseInt": g.newSimpleFuncType(numberType, "str"), + "parseOctal": g.newSimpleFuncType(numberType, "str"), + "parseHex": g.newSimpleFuncType(numberType, "str"), + "parseJson": g.newSimpleFuncType(jsonType, "str"), + "parseYaml": g.newSimpleFuncType(jsonType, "str"), + "parseCsvWithHeader": g.newFuncType(jsonType, []ast.Parameter{required("str"), optional("delimiter")}), + "encodeUTF8": g.newSimpleFuncType(numberArrayType, "str"), + "decodeUTF8": g.newSimpleFuncType(stringType, "arr"), // Manifestation @@ -116,6 +117,7 @@ func prepareStdlib(g *typeGraph) { "manifestJsonMinified": g.newSimpleFuncType(stringType, "value"), "manifestYamlDoc": g.newSimpleFuncType(stringType, "value"), "manifestYamlStream": g.newSimpleFuncType(stringType, "value"), + "manifestCsv": g.newFuncType(stringType, []ast.Parameter{required("json"), optional("headers")}), "manifestXmlJsonml": g.newSimpleFuncType(stringType, "value"), // Arrays diff --git a/testdata/builtinManifestCsv.golden b/testdata/builtinManifestCsv.golden new file mode 100644 index 00000000..b87af8a5 --- /dev/null +++ b/testdata/builtinManifestCsv.golden @@ -0,0 +1 @@ +"head1,head2\nval1,val2\n,1\nval3,\n" diff --git a/testdata/builtinManifestCsv.jsonnet b/testdata/builtinManifestCsv.jsonnet new file mode 100644 index 00000000..24a8c075 --- /dev/null +++ b/testdata/builtinManifestCsv.jsonnet @@ -0,0 +1 @@ +std.manifestCsv([{ "head1": "val1", "head2": "val2", "head3": "foo" }, { "head2": 1, "head3": "bar" }, { "head1": "val3" }], ["head1", "head2"]) \ No newline at end of file diff --git a/testdata/builtinManifestCsv.linter.golden b/testdata/builtinManifestCsv.linter.golden new file mode 100644 index 00000000..e69de29b diff --git a/testdata/builtinManifestCsv2.golden b/testdata/builtinManifestCsv2.golden new file mode 100644 index 00000000..17dbafb1 --- /dev/null +++ b/testdata/builtinManifestCsv2.golden @@ -0,0 +1 @@ +"head1\nval1\nval2\n" diff --git a/testdata/builtinManifestCsv2.jsonnet b/testdata/builtinManifestCsv2.jsonnet new file mode 100644 index 00000000..724e6b60 --- /dev/null +++ b/testdata/builtinManifestCsv2.jsonnet @@ -0,0 +1 @@ +std.manifestCsv([{ "head1": "val1" }, { "head1": "val2" }]) \ No newline at end of file diff --git a/testdata/builtinManifestCsv2.linter.golden b/testdata/builtinManifestCsv2.linter.golden new file mode 100644 index 00000000..e69de29b diff --git a/testdata/builtinParseCsvWithHeader.golden b/testdata/builtinParseCsvWithHeader.golden new file mode 100644 index 00000000..468a887a --- /dev/null +++ b/testdata/builtinParseCsvWithHeader.golden @@ -0,0 +1,6 @@ +[ + { + "head1": "val1", + "head2": "val2" + } +] diff --git a/testdata/builtinParseCsvWithHeader.jsonnet b/testdata/builtinParseCsvWithHeader.jsonnet new file mode 100644 index 00000000..8d50422e --- /dev/null +++ b/testdata/builtinParseCsvWithHeader.jsonnet @@ -0,0 +1 @@ +std.parseCsvWithHeader("head1,head2\nval1,val2") \ No newline at end of file diff --git a/testdata/builtinParseCsvWithHeader.linter.golden b/testdata/builtinParseCsvWithHeader.linter.golden new file mode 100644 index 00000000..e69de29b diff --git a/testdata/builtinParseCsvWithHeader2.golden b/testdata/builtinParseCsvWithHeader2.golden new file mode 100644 index 00000000..9bf9bc85 --- /dev/null +++ b/testdata/builtinParseCsvWithHeader2.golden @@ -0,0 +1,6 @@ +[ + { + "head1": "val1", + "head1__1": "val2" + } +] diff --git a/testdata/builtinParseCsvWithHeader2.jsonnet b/testdata/builtinParseCsvWithHeader2.jsonnet new file mode 100644 index 00000000..8bdb2d6a --- /dev/null +++ b/testdata/builtinParseCsvWithHeader2.jsonnet @@ -0,0 +1 @@ +std.parseCsvWithHeader("head1,head1\nval1,val2") \ No newline at end of file diff --git a/testdata/builtinParseCsvWithHeader2.linter.golden b/testdata/builtinParseCsvWithHeader2.linter.golden new file mode 100644 index 00000000..e69de29b diff --git a/testdata/builtinParseCsvWithHeader3.golden b/testdata/builtinParseCsvWithHeader3.golden new file mode 100644 index 00000000..468a887a --- /dev/null +++ b/testdata/builtinParseCsvWithHeader3.golden @@ -0,0 +1,6 @@ +[ + { + "head1": "val1", + "head2": "val2" + } +] diff --git a/testdata/builtinParseCsvWithHeader3.jsonnet b/testdata/builtinParseCsvWithHeader3.jsonnet new file mode 100644 index 00000000..8826aed7 --- /dev/null +++ b/testdata/builtinParseCsvWithHeader3.jsonnet @@ -0,0 +1 @@ +std.parseCsvWithHeader("head1;head2\nval1;val2", ";") \ No newline at end of file diff --git a/testdata/builtinParseCsvWithHeader3.linter.golden b/testdata/builtinParseCsvWithHeader3.linter.golden new file mode 100644 index 00000000..e69de29b