Skip to content

Commit f91be41

Browse files
committed
Support parsing application/json-seq
Support parsing RS-separated streams, as per RFC 7464.
1 parent 3580e9d commit f91be41

File tree

2 files changed

+88
-16
lines changed

2 files changed

+88
-16
lines changed

jq.pyx

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ cdef extern from "jv.h":
1919
JV_KIND_ARRAY,
2020
JV_KIND_OBJECT
2121

22+
ctypedef enum:
23+
JV_PARSE_SEQ,
24+
JV_PARSE_STREAMING,
25+
JV_PARSE_STREAM_ERRORS
26+
2227
ctypedef struct jv:
2328
pass
2429

@@ -49,6 +54,7 @@ cdef extern from "jv.h":
4954
jv_parser* jv_parser_new(int)
5055
void jv_parser_free(jv_parser*)
5156
void jv_parser_set_buf(jv_parser*, const char*, int, int)
57+
int jv_parser_remaining(jv_parser*)
5258
jv jv_parser_next(jv_parser*)
5359

5460
jv jv_parse(const char*)
@@ -267,7 +273,12 @@ cdef class _Program(object):
267273
return self.input_text(fileobj.getvalue(), slurp=slurp)
268274

269275
def input_text(self, text, *, slurp=False):
270-
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), slurp=slurp)
276+
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"),
277+
slurp=slurp, seq=False)
278+
279+
def input_text_sequence(self, text, *, slurp=False):
280+
return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"),
281+
slurp=slurp, seq=True)
271282

272283
@property
273284
def program_string(self):
@@ -291,17 +302,20 @@ cdef class _ProgramWithInput(object):
291302
cdef _JqStatePool _jq_state_pool
292303
cdef object _bytes_input
293304
cdef bint _slurp
305+
cdef bint _seq
294306

295-
def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp):
307+
def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp, bint seq):
296308
self._jq_state_pool = jq_state_pool
297309
self._bytes_input = bytes_input
298310
self._slurp = slurp
311+
self._seq = seq
299312

300313
def __iter__(self):
301314
return self._make_iterator()
302315

303316
cdef _ResultIterator _make_iterator(self):
304-
return _ResultIterator(self._jq_state_pool, self._bytes_input, slurp=self._slurp)
317+
return _ResultIterator(self._jq_state_pool, self._bytes_input,
318+
slurp=self._slurp, seq=self._seq)
305319

306320
def text(self):
307321
# Performance testing suggests that using _jv_to_python (within the
@@ -310,6 +324,9 @@ cdef class _ProgramWithInput(object):
310324
# See: https://github.com/mwilliamson/jq.py/pull/50
311325
return "\n".join(json.dumps(v) for v in self)
312326

327+
def text_sequence(self):
328+
return "\x1e" + "\n\x1e".join(json.dumps(v) for v in self)
329+
313330
def all(self):
314331
return list(self)
315332

@@ -329,13 +346,14 @@ cdef class _ResultIterator(object):
329346
self._jq_state_pool.release(self._jq)
330347
jv_parser_free(self._parser)
331348

332-
def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, bint slurp):
349+
def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *,
350+
bint slurp, bint seq):
333351
self._jq_state_pool = jq_state_pool
334352
self._jq = jq_state_pool.acquire()
335353
self._bytes_input = bytes_input
336354
self._slurp = slurp
337355
self._ready = False
338-
cdef jv_parser* parser = jv_parser_new(0)
356+
cdef jv_parser* parser = jv_parser_new(JV_PARSE_SEQ if seq else 0)
339357
cdef char* cbytes_input
340358
cdef ssize_t clen_input
341359
PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input)
@@ -384,17 +402,20 @@ cdef class _ResultIterator(object):
384402
return 0
385403

386404
cdef inline jv _parse_next_input(self) except *:
387-
cdef jv value = jv_parser_next(self._parser)
388-
if jv_is_valid(value):
389-
return value
390-
elif jv_invalid_has_msg(jv_copy(value)):
391-
error_message = jv_invalid_get_msg(value)
392-
message = jv_string_to_py_string(error_message)
393-
jv_free(error_message)
394-
raise ValueError(u"parse error: " + message)
395-
else:
396-
jv_free(value)
397-
raise StopIteration()
405+
cdef jv value
406+
while True:
407+
value = jv_parser_next(self._parser)
408+
if jv_is_valid(value):
409+
return value
410+
elif jv_invalid_has_msg(jv_copy(value)):
411+
error_message = jv_invalid_get_msg(value)
412+
message = jv_string_to_py_string(error_message)
413+
jv_free(error_message)
414+
raise ValueError(u"parse error: " + message)
415+
else:
416+
if not jv_parser_remaining(self._parser):
417+
jv_free(value)
418+
raise StopIteration()
398419

399420

400421
def all(program, value=_NO_VALUE, text=_NO_VALUE):

tests/jq_tests.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,57 @@ def test_unicode_strings_can_be_used_as_input():
241241
)
242242

243243

244+
def test_record_separator_character_accepted_in_input():
245+
assert_equal(
246+
[],
247+
list(jq.compile(".").input_text_sequence('\x1e'))
248+
)
249+
assert_equal(
250+
[],
251+
list(jq.compile(".").input_text_sequence('\x1e\x1e'))
252+
)
253+
assert_equal(
254+
[{}],
255+
list(jq.compile(".").input_text_sequence('\x1e{}'))
256+
)
257+
assert_equal(
258+
[{}],
259+
list(jq.compile(".").input_text_sequence('\x1e\x1e{}'))
260+
)
261+
assert_equal(
262+
[],
263+
list(jq.compile(".").input_text_sequence('{}\x1e'))
264+
)
265+
assert_equal(
266+
[],
267+
list(jq.compile(".").input_text_sequence('{}\x1e\x1e'))
268+
)
269+
assert_equal(
270+
[{}],
271+
list(jq.compile(".").input_text_sequence('\x1e{}\x1e'))
272+
)
273+
assert_equal(
274+
[[]],
275+
list(jq.compile(".").input_text_sequence('{}\x1e[]'))
276+
)
277+
assert_equal(
278+
[[]],
279+
list(jq.compile(".").input_text_sequence('{}\x1e\x1e[]'))
280+
)
281+
assert_equal(
282+
[{},[]],
283+
list(jq.compile(".").input_text_sequence('\x1e{}\x1e[]'))
284+
)
285+
assert_equal(
286+
[[]],
287+
list(jq.compile(".").input_text_sequence('{}\x1e[]\x1e'))
288+
)
289+
assert_equal(
290+
[{},[]],
291+
list(jq.compile(".").input_text_sequence('\x1e{}\x1e[]\x1e'))
292+
)
293+
294+
244295
def test_unicode_strings_can_be_used_as_programs():
245296
assert_equal(
246297
"Dragon‽",

0 commit comments

Comments
 (0)