Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add xml support as xq (from python yq) #215

Open
wants to merge 28 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8417b4e
add xml support using mxj
Jun 4, 2023
efed1c6
fix compact mode for xml output
Jun 4, 2023
2b4ad32
updates from mxj
Jun 4, 2023
da965e1
add paths to force-list
Jun 4, 2023
31bbd2a
update mxj version
Jun 5, 2023
205ad7a
fix missing tags in xml compact mode
Jun 5, 2023
81145e7
replace mxj wth my own xqml which is much faster
Jun 6, 2023
b359294
fix issue with elems/attrs
Jun 6, 2023
347b8f6
add xml attributes/html options
Jun 6, 2023
3a58c73
change too long options names
Jun 6, 2023
d6ed42c
update xqml version
Jun 7, 2023
ae0a59e
fix missing changes in methods
Jun 7, 2023
3184d9d
update github.com/momiji/xqml version to v0.0.6
Jun 16, 2023
88834bd
update github.com/momiji/xqml version to v0.0.7
Jun 17, 2023
140499b
update xqml to 0.0.8 with new Encoder/Decoder methods
Jun 19, 2023
a278590
update xqml to 0.0.9
Jun 19, 2023
043e9ab
add -V to get version
Jun 19, 2023
8ea86d6
fix: handle xml error in parser
Jun 19, 2023
a4ff272
feat: auto-detect format JSON/YAML/XML
Jun 23, 2023
9c11250
fix: add missing "---" yaml detection
Jun 26, 2023
407db29
fix: add missing "- " yaml detection
Jul 4, 2023
912da02
Merge branch 'itchyny:main' into main
momiji Aug 18, 2023
77e0319
Merge branch 'itchyny:main' into main
momiji Mar 9, 2024
d258e7a
Merge remote-tracking branch 'upstream/main'
Mar 24, 2024
a389322
Merge branch 'itchyny:main' into main
Apr 7, 2024
1facbbc
fix: restore -v and remove -V
Apr 7, 2024
ccb52a4
Merge remote-tracking branch 'upstream/main'
Oct 13, 2024
f2db4f4
fix: remove unused outputJSON
Oct 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions builtin.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 48 additions & 9 deletions cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ package cli
import (
"errors"
"fmt"
"github.com/itchyny/gojq"
"github.com/mattn/go-isatty"
"io"
"os"
"runtime"
"strings"

"github.com/mattn/go-isatty"

"github.com/itchyny/gojq"
)

const name = "gojq"
Expand Down Expand Up @@ -40,11 +38,20 @@ type cli struct {
outputCompact bool
outputIndent *int
outputTab bool
outputXML bool
outputYAML bool
inputRaw bool
inputStream bool
inputJSON bool
inputXML bool
inputYAML bool
inputSlurp bool
stripSpaceXML bool
stripAttrsXML bool
forceListXML []string
htmlXML bool
rootXML string
elementXML string

argnames []string
argvalues []any
Expand All @@ -60,13 +67,22 @@ type flagopts struct {
OutputCompact bool `short:"c" long:"compact-output" description:"output without pretty-printing"`
OutputIndent *int `long:"indent" description:"number of spaces for indentation"`
OutputTab bool `long:"tab" description:"use tabs for indentation"`
OutputYAML bool `long:"yaml-output" description:"output in YAML format"`
OutputYAML bool `short:"y" long:"yaml-output" description:"output in YAML format"`
OutputXML bool `short:"x" long:"xml-output" description:"output in XML format"`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Note: I'm not the author of gojq, just an interested third party.)

I think it makes sense to add even more formats in the future (toml, msgpack, bson, etc), but this list of boolean flags will grow unmanageable.

What do you think about --input-format=xml and --output-format=xml instead of these format-specific parameters?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, looks to be a very good idea :)

OutputColor bool `short:"C" long:"color-output" description:"output with colors even if piped"`
OutputMono bool `short:"M" long:"monochrome-output" description:"output without colors"`
InputNull bool `short:"n" long:"null-input" description:"use null as input value"`
InputRaw bool `short:"R" long:"raw-input" description:"read input as raw strings"`
InputStream bool `long:"stream" description:"parse input in stream fashion"`
InputYAML bool `long:"yaml-input" description:"read input as YAML format"`
InputJSON bool `short:"J" long:"json-input" description:"read input as JSON format"`
InputXML bool `short:"X" long:"xml-input" description:"read input as XML format"`
StripAttrsXML bool `long:"xml-no-attributes" description:"remove attributes from XML elements"`
StripSpaceXML bool `long:"xml-no-namespaces" description:"remove namespace from XML elements and attributes"`
ForceListXML []string `long:"xml-force-list" description:"force XML elements as array"`
RootXML string `long:"xml-root" description:"root XML element name"`
ElementXML string `long:"xml-element" description:"element XML element name"`
HtmlXML bool `short:"H" long:"xml-html" description:"read input as XML with HTML compatibility mode"`
InputYAML bool `short:"Y" long:"yaml-input" description:"read input as YAML format"`
InputSlurp bool `short:"s" long:"slurp" description:"read all inputs into an array"`
FromFile bool `short:"f" long:"from-file" description:"load query from file"`
ModulePaths []string `short:"L" description:"directory to search modules from"`
Expand Down Expand Up @@ -123,9 +139,9 @@ Usage:
return nil
}
cli.outputRaw, cli.outputRaw0, cli.outputJoin,
cli.outputCompact, cli.outputIndent, cli.outputTab, cli.outputYAML =
cli.outputCompact, cli.outputIndent, cli.outputTab, cli.outputXML, cli.outputYAML =
opts.OutputRaw, opts.OutputRaw0, opts.OutputJoin,
opts.OutputCompact, opts.OutputIndent, opts.OutputTab, opts.OutputYAML
opts.OutputCompact, opts.OutputIndent, opts.OutputTab, opts.OutputXML, opts.OutputYAML
defer func(x bool) { noColor = x }(noColor)
if opts.OutputColor || opts.OutputMono {
noColor = opts.OutputMono
Expand Down Expand Up @@ -154,6 +170,8 @@ Usage:
}
cli.inputRaw, cli.inputStream, cli.inputYAML, cli.inputSlurp =
opts.InputRaw, opts.InputStream, opts.InputYAML, opts.InputSlurp
cli.inputJSON, cli.inputXML, cli.stripAttrsXML, cli.stripSpaceXML, cli.forceListXML, cli.rootXML, cli.elementXML, cli.htmlXML =
opts.InputJSON, opts.InputXML, opts.StripAttrsXML, opts.StripSpaceXML, opts.ForceListXML, opts.RootXML, opts.ElementXML, opts.HtmlXML
for k, v := range opts.Arg {
cli.argnames = append(cli.argnames, "$"+k)
cli.argvalues = append(cli.argvalues, v)
Expand Down Expand Up @@ -300,10 +318,28 @@ func (cli *cli) createInputIter(args []string) (iter inputIter) {
}
case cli.inputStream:
newIter = newStreamInputIter
case cli.inputJSON:
newIter = newJSONInputIter
case cli.inputXML || cli.htmlXML:
newIter = func(r io.Reader, fname string) inputIter {
return newXMLInputIter(r, fname, !cli.stripAttrsXML, !cli.stripSpaceXML, cli.forceListXML, cli.htmlXML)
}
case cli.inputYAML:
newIter = newYAMLInputIter
default:
newIter = newJSONInputIter
// automatically detect between JSON / YAML / XML format
newIter = func(r io.Reader, fname string) inputIter {
rd, f := detectInputType(r, 100)
switch f {
case JsonFormat:
return newJSONInputIter(rd, fname)
case YamlFormat:
return newYAMLInputIter(rd, fname)
case XmlFormat:
return newXMLInputIter(rd, fname, !cli.stripAttrsXML, !cli.stripSpaceXML, cli.forceListXML, cli.htmlXML)
}
return newJSONInputIter(rd, fname)
}
}
if cli.inputSlurp {
defer func() {
Expand Down Expand Up @@ -404,6 +440,9 @@ func (cli *cli) createMarshaler() marshaler {
} else if i := cli.outputIndent; i != nil {
indent = *i
}
if cli.outputXML {
return xmlFormatter(&indent, cli.rootXML, cli.elementXML)
}
f := newEncoder(cli.outputTab, indent)
if cli.outputRaw || cli.outputRaw0 || cli.outputJoin {
return &rawMarshaler{f, cli.outputRaw0}
Expand Down
184 changes: 184 additions & 0 deletions cli/detect.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package cli

import (
"bytes"
"io"
)

type DetectedFormat int

func (d DetectedFormat) String() string {
switch d {
case JsonFormat:
return "json"
case YamlFormat:
return "yaml"
case XmlFormat:
return "xml"
}
return ""
}

const (
JsonFormat DetectedFormat = iota
YamlFormat
XmlFormat
)

func detectInputType(r io.Reader, bufSize int) (io.Reader, DetectedFormat) {
readers := make([]io.Reader, 0)
var buf []byte
index := 0
length := 0
var err error
result := func(t DetectedFormat) (io.Reader, DetectedFormat) {
readers = append(readers, r)
return io.MultiReader(readers...), t
}
readByte := func() (byte, error) {
if index == length {
if err != nil {
return 0, err
}
buf = make([]byte, bufSize)
length, err = r.Read(buf)
if length == 0 && err != nil {
return 0, err
}
readers = append(readers, bytes.NewReader(buf[0:length]))
index = 0
}
i := index
index = index + 1
return buf[i], nil
}

// state machine
state := "loop"
var b, c byte
loop:
for {
switch state {
// main loop
case "loop":
for {
b, err = readByte()
if err != nil {
return result(JsonFormat)
}
switch b {
case ' ', '\t', '\r', '\n':
case '{', '[', '/':
return result(JsonFormat)
case '#':
return result(YamlFormat)
case '<':
return result(XmlFormat)
case '-':
// yaml if "- " or "---"
c, err = readByte()
if err != nil {
return result(JsonFormat)
}
if c == ' ' {
return result(YamlFormat)
}
if c != '-' {
return result(JsonFormat)
}
c, err = readByte()
if err != nil || c != '-' {
return result(JsonFormat)
}
return result(YamlFormat)
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
return result(JsonFormat)
case '"':
// string can be either a json/yaml text
state = "string"
c = b
continue loop
case 't':
// json if true
for _, c = range []byte("rue") {
b, err = readByte()
if err != nil || b != c {
return result(YamlFormat)
}
}
state = "after"
continue loop
case 'f':
// json if false
for _, c = range []byte("alse") {
b, err = readByte()
if err != nil || b != c {
return result(YamlFormat)
}
}
state = "after"
continue loop
case 'n':
// json if false
for _, c = range []byte("ull") {
b, err = readByte()
if err != nil || b != c {
return result(YamlFormat)
}
}
state = "after"
continue loop
default:
// neither a number or string with "
return result(YamlFormat)
}
}
// string, started by "
case "string":
escape := false
for {
b, err = readByte()
if err != nil {
return result(JsonFormat)
}
if escape {
continue
}
switch b {
case ' ', '\t':
case '\r', '\n':
// new line not allowed in yaml tags
result(JsonFormat)
case '\\':
// escape next character
escape = true
case c:
// close string, look for next char to identify if it is yaml tag
state = "after"
continue loop
}
}
// close string, look for next char to identify if it is yaml tag
case "after":
for {
b, err = readByte()
if err != nil {
return result(JsonFormat)
}
switch b {
case ' ', '\t':
case '\r', '\n':
// new line not allowed in yaml tags
return result(JsonFormat)
case ':':
// it is a yaml tag
return result(YamlFormat)
default:
// it is not a yaml tag
return result(JsonFormat)
}
}
}

}
}
56 changes: 56 additions & 0 deletions cli/detect_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package cli

import (
"bytes"
"io"
"strings"
"testing"
)

func TestDetectInputType(t *testing.T) {
for _, s := range []string{"", "\t", "\r", "\n", " ", " \t", " \r", " \n", " \t ", " \r ", " \n "} {
testDetectInputType(t, s+"", JsonFormat)
testDetectInputType(t, s+"{", JsonFormat)
testDetectInputType(t, s+"#", YamlFormat)
testDetectInputType(t, s+"<", XmlFormat)
testDetectInputType(t, s+"a", YamlFormat)
testDetectInputType(t, s+"a:", YamlFormat)
testDetectInputType(t, s+"a: 1", YamlFormat)
testDetectInputType(t, s+"true", JsonFormat)
testDetectInputType(t, s+"true true", JsonFormat)
testDetectInputType(t, s+"true:", YamlFormat)
testDetectInputType(t, s+"null", JsonFormat)
testDetectInputType(t, s+"null null", JsonFormat)
testDetectInputType(t, s+"null:", YamlFormat)
testDetectInputType(t, s+"false", JsonFormat)
testDetectInputType(t, s+"false false", JsonFormat)
testDetectInputType(t, s+"false:", YamlFormat)
testDetectInputType(t, s+"1", JsonFormat)
testDetectInputType(t, s+"-1", JsonFormat)
testDetectInputType(t, s+"-1e3", JsonFormat)
testDetectInputType(t, s+"- ", YamlFormat)
testDetectInputType(t, s+"--", JsonFormat)
testDetectInputType(t, s+"---", YamlFormat)
testDetectInputType(t, s+`"hello"`, JsonFormat)
testDetectInputType(t, s+`"hello":1`, YamlFormat)
testDetectInputType(t, s+`"hello": 1`, YamlFormat)
testDetectInputType(t, s+`'hello'`, YamlFormat)
testDetectInputType(t, s+`'hello':1`, YamlFormat)
testDetectInputType(t, s+`'hello': 1`, YamlFormat)
}
}

func testDetectInputType(t *testing.T, s string, format DetectedFormat) {
r, f := detectInputType(strings.NewReader(s), 1)
if f != format {
t.Fatalf("failed: invalid format '%s' expected '%s' for string '%s'", f, format, s)
}
buf := new(bytes.Buffer)
_, err := io.Copy(buf, r)
if err != nil {
t.Fatalf("failed: copy error for string '%s'", s)
}
if buf.String() != s {
t.Fatalf("failed: invalid reader content '%s'' for string '%s'", buf.String(), s)
}
}
Loading