Skip to content

Commit

Permalink
Avoid parsing field name twice when matching named instruction in `ge…
Browse files Browse the repository at this point in the history
…t_json_object` kernel (#2471)

* Temporarily add back benchmark code

Signed-off-by: Nghia Truong <[email protected]>

* Optimize stack data

Signed-off-by: Nghia Truong <[email protected]>

* Revert "Optimize stack data"

This reverts commit 59ac5bf.

* Reorganize code

Signed-off-by: Nghia Truong <[email protected]>

* Perform name matching when parsing name field

Signed-off-by: Nghia Truong <[email protected]>

* Optimize write

Signed-off-by: Nghia Truong <[email protected]>

* Cleanup

Signed-off-by: Nghia Truong <[email protected]>

* Simplify `char_range_reader`

Signed-off-by: Nghia Truong <[email protected]>

* Try to reduce stack data size

Signed-off-by: Nghia Truong <[email protected]>

* Revert "Try to reduce stack data size"

This reverts commit a8d563c.

* Cleanup

Signed-off-by: Nghia Truong <[email protected]>

* Revert benchmark

Signed-off-by: Nghia Truong <[email protected]>

---------

Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia authored Oct 11, 2024
1 parent 71736bd commit 024b9c6
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 165 deletions.
24 changes: 7 additions & 17 deletions src/main/cpp/src/get_json_object.cu
Original file line number Diff line number Diff line change
Expand Up @@ -304,17 +304,6 @@ __device__ inline thrust::tuple<bool, int> path_match_index(
}
}

__device__ inline thrust::tuple<bool, cudf::string_view> path_match_named(
cudf::device_span<path_instruction const> path)
{
auto match = path_match_element(path, path_instruction_type::NAMED);
if (match) {
return thrust::make_tuple(true, path.data()[0].name);
} else {
return thrust::make_tuple(false, cudf::string_view());
}
}

__device__ inline thrust::tuple<bool, int> path_match_index_wildcard(
cudf::device_span<path_instruction const> path)
{
Expand Down Expand Up @@ -464,7 +453,7 @@ __device__ thrust::pair<bool, cudf::size_type> evaluate_path(
// case (START_OBJECT, Named :: xs)
// case path 4
else if (json_token::START_OBJECT == ctx.token &&
thrust::get<0>(path_match_named(ctx.path))) {
ctx.path.front().type == path_instruction_type::NAMED) {
if (!ctx.is_first_enter) {
// 2st enter
// skip the following children after the expect
Expand Down Expand Up @@ -492,15 +481,16 @@ __device__ thrust::pair<bool, cudf::size_type> evaluate_path(
ctx.is_first_enter = false;
// match first mached children with expected name
bool found_expected_child = false;
while (json_token::END_OBJECT != p.next_token()) {
auto const to_match_name = ctx.path.front().name;
while (true) {
auto const is_name_matched = p.parse_next_token_with_matching(to_match_name);
if (json_token::END_OBJECT == p.get_current_token()) { break; }

// JSON validation check
if (json_token::ERROR == p.get_current_token()) { return {false, 0}; }

// need to try more children
auto match_named = path_match_named(ctx.path);
auto named = thrust::get<1>(match_named);
// current token is FIELD_NAME
if (p.match_current_field_name(named)) {
if (is_name_matched) {
// skip FIELD_NAME token
p.next_token();
// JSON validation check
Expand Down
Loading

0 comments on commit 024b9c6

Please sign in to comment.