Skip to content

Commit ca928f2

Browse files
committed
Support parsing application/json-seq
Support parsing RS-separated streams, as per RFC 7464.
1 parent 3580e9d commit ca928f2

File tree

2 files changed

+99
-25
lines changed

2 files changed

+99
-25
lines changed

jq.pyx

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ cdef extern from "jv.h":
1919
JV_KIND_ARRAY,
2020
JV_KIND_OBJECT
2121

22+
ctypedef enum:
23+
JV_PARSE_SEQ,
24+
JV_PARSE_STREAMING,
25+
JV_PARSE_STREAM_ERRORS
26+
2227
ctypedef struct jv:
2328
pass
2429

@@ -49,6 +54,7 @@ cdef extern from "jv.h":
4954
jv_parser* jv_parser_new(int)
5055
void jv_parser_free(jv_parser*)
5156
void jv_parser_set_buf(jv_parser*, const char*, int, int)
57+
int jv_parser_remaining(jv_parser*)
5258
jv jv_parser_next(jv_parser*)
5359

5460
jv jv_parse(const char*)
@@ -247,27 +253,34 @@ cdef class _Program(object):
247253
self._program_bytes = program_bytes
248254
self._jq_state_pool = _JqStatePool(program_bytes, args=args)
249255

250-
def input(self, value=_NO_VALUE, text=_NO_VALUE, *, slurp=False):
256+
def input(self, value=_NO_VALUE, text=_NO_VALUE, *,
257+
slurp=False, seq=False):
251258
if (value is _NO_VALUE) == (text is _NO_VALUE):
252259
raise ValueError("Either the value or text argument should be set")
253260

254261
if text is not _NO_VALUE:
255-
return self.input_text(text, slurp=slurp)
262+
return self.input_text(text, slurp=slurp, seq=seq)
256263
else:
257-
return self.input_value(value, slurp=slurp)
264+
return self.input_value(value, slurp=slurp, seq=seq)
258265

259-
def input_value(self, value, *, slurp=False):
260-
return self.input_text(json.dumps(value), slurp=slurp)
266+
def input_value(self, value, *, slurp=False, seq=False):
267+
text = json.dumps(value)
268+
if seq:
269+
text = "\x1e" + text
270+
return self.input_text(text, slurp=slurp, seq=seq)
261271

262-
def input_values(self, values, *, slurp=False):
272+
def input_values(self, values, *, slurp=False, seq=False):
263273
fileobj = io.StringIO()
264274
for value in values:
275+
if seq:
276+
fileobj.write("\x1e")
265277
json.dump(value, fileobj)
266278
fileobj.write("\n")
267-
return self.input_text(fileobj.getvalue(), slurp=slurp)
279+
return self.input_text(fileobj.getvalue(), slurp=slurp, seq=seq)
268280

269-
def input_text(self, text, *, slurp=False):
270-
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), slurp=slurp)
281+
def input_text(self, text, *, slurp=False, seq=False):
282+
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"),
283+
slurp=slurp, seq=seq)
271284

272285
@property
273286
def program_string(self):
@@ -291,24 +304,30 @@ cdef class _ProgramWithInput(object):
291304
cdef _JqStatePool _jq_state_pool
292305
cdef object _bytes_input
293306
cdef bint _slurp
307+
cdef bint _seq
294308

295-
def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp):
309+
def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp, bint seq):
296310
self._jq_state_pool = jq_state_pool
297311
self._bytes_input = bytes_input
298312
self._slurp = slurp
313+
self._seq = seq
299314

300315
def __iter__(self):
301316
return self._make_iterator()
302317

303318
cdef _ResultIterator _make_iterator(self):
304-
return _ResultIterator(self._jq_state_pool, self._bytes_input, slurp=self._slurp)
319+
return _ResultIterator(self._jq_state_pool, self._bytes_input,
320+
slurp=self._slurp, seq=self._seq)
305321

306322
def text(self):
307323
# Performance testing suggests that using _jv_to_python (within the
308324
# result iterator) followed by json.dumps is faster than using
309325
# jv_dump_string to generate the string directly from the jv values.
310326
# See: https://github.com/mwilliamson/jq.py/pull/50
311-
return "\n".join(json.dumps(v) for v in self)
327+
if self._seq:
328+
return "\x1e" + "\n\x1e".join(json.dumps(v) for v in self)
329+
else:
330+
return "\n".join(json.dumps(v) for v in self)
312331

313332
def all(self):
314333
return list(self)
@@ -329,13 +348,14 @@ cdef class _ResultIterator(object):
329348
self._jq_state_pool.release(self._jq)
330349
jv_parser_free(self._parser)
331350

332-
def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, bint slurp):
351+
def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *,
352+
bint slurp, bint seq):
333353
self._jq_state_pool = jq_state_pool
334354
self._jq = jq_state_pool.acquire()
335355
self._bytes_input = bytes_input
336356
self._slurp = slurp
337357
self._ready = False
338-
cdef jv_parser* parser = jv_parser_new(0)
358+
cdef jv_parser* parser = jv_parser_new(JV_PARSE_SEQ if seq else 0)
339359
cdef char* cbytes_input
340360
cdef ssize_t clen_input
341361
PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input)
@@ -384,17 +404,20 @@ cdef class _ResultIterator(object):
384404
return 0
385405

386406
cdef inline jv _parse_next_input(self) except *:
387-
cdef jv value = jv_parser_next(self._parser)
388-
if jv_is_valid(value):
389-
return value
390-
elif jv_invalid_has_msg(jv_copy(value)):
391-
error_message = jv_invalid_get_msg(value)
392-
message = jv_string_to_py_string(error_message)
393-
jv_free(error_message)
394-
raise ValueError(u"parse error: " + message)
395-
else:
396-
jv_free(value)
397-
raise StopIteration()
407+
cdef jv value
408+
while True:
409+
value = jv_parser_next(self._parser)
410+
if jv_is_valid(value):
411+
return value
412+
elif jv_invalid_has_msg(jv_copy(value)):
413+
error_message = jv_invalid_get_msg(value)
414+
message = jv_string_to_py_string(error_message)
415+
jv_free(error_message)
416+
raise ValueError(u"parse error: " + message)
417+
else:
418+
if not jv_parser_remaining(self._parser):
419+
jv_free(value)
420+
raise StopIteration()
398421

399422

400423
def all(program, value=_NO_VALUE, text=_NO_VALUE):

tests/jq_tests.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,57 @@ def test_unicode_strings_can_be_used_as_input():
241241
)
242242

243243

244+
def test_record_separator_character_accepted_in_input():
245+
assert_equal(
246+
[],
247+
list(jq.compile(".").input(text='\x1e', seq=True))
248+
)
249+
assert_equal(
250+
[],
251+
list(jq.compile(".").input(text='\x1e\x1e', seq=True))
252+
)
253+
assert_equal(
254+
[{}],
255+
list(jq.compile(".").input(text='\x1e{}', seq=True))
256+
)
257+
assert_equal(
258+
[{}],
259+
list(jq.compile(".").input(text='\x1e\x1e{}', seq=True))
260+
)
261+
assert_equal(
262+
[],
263+
list(jq.compile(".").input(text='{}\x1e', seq=True))
264+
)
265+
assert_equal(
266+
[],
267+
list(jq.compile(".").input(text='{}\x1e\x1e', seq=True))
268+
)
269+
assert_equal(
270+
[{}],
271+
list(jq.compile(".").input(text='\x1e{}\x1e', seq=True))
272+
)
273+
assert_equal(
274+
[[]],
275+
list(jq.compile(".").input(text='{}\x1e[]', seq=True))
276+
)
277+
assert_equal(
278+
[[]],
279+
list(jq.compile(".").input(text='{}\x1e\x1e[]', seq=True))
280+
)
281+
assert_equal(
282+
[{},[]],
283+
list(jq.compile(".").input(text='\x1e{}\x1e[]', seq=True))
284+
)
285+
assert_equal(
286+
[[]],
287+
list(jq.compile(".").input(text='{}\x1e[]\x1e', seq=True))
288+
)
289+
assert_equal(
290+
[{},[]],
291+
list(jq.compile(".").input(text='\x1e{}\x1e[]\x1e', seq=True))
292+
)
293+
294+
244295
def test_unicode_strings_can_be_used_as_programs():
245296
assert_equal(
246297
"Dragon‽",

0 commit comments

Comments
 (0)