From d1700140b51f00e4bf7734e9487032ac31eac25f Mon Sep 17 00:00:00 2001 From: Hitalo Souza Date: Wed, 30 Oct 2024 12:31:22 -0400 Subject: [PATCH 1/6] perf: use json_str and json_len, intead of just json --- vlib/time/parse.c.v | 84 +++++---- vlib/x/json2/decoder2/decode.v | 236 ++++++++++++++++--------- vlib/x/json2/decoder2/decode_sumtype.v | 23 ++- vlib/x/json2/decoder2/decode_test.v | 85 +++++++-- vlib/x/json2/decoder2/tests/bench.v | 61 ++++++- 5 files changed, 349 insertions(+), 140 deletions(-) diff --git a/vlib/time/parse.c.v b/vlib/time/parse.c.v index 5dacf5b4ec8af9..78ed84fcb49ade 100644 --- a/vlib/time/parse.c.v +++ b/vlib/time/parse.c.v @@ -130,19 +130,29 @@ fn check_and_extract_date(s string) !(int, int, int) { // See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of // the differences between ISO-8601 and RFC 3339. pub fn parse_rfc3339(s string) !Time { - if s == '' { + unsafe { + mut t := Time{} + + fast_parse_rfc3339(s.str, s.len, mut t)! + return t + } +} + +@[unsafe] +pub fn fast_parse_rfc3339(s_str &u8, s_len int, mut val Time) ! { + if s_len == 0 { return error_invalid_time(0, 'datetime string is empty') } - if s.len < time_format_buffer.len { + if s_len < time_format_buffer.len { return error('string is too short to parse') } mut year, mut month, mut day := 0, 0, 0 mut hour_, mut minute_, mut second_, mut nanosecond_ := 0, 0, 0, 0 - is_time := if s.len >= time_format_buffer.len { - s[2] == u8(`:`) && s[5] == u8(`:`) + is_time := if s_len >= time_format_buffer.len { + u8(*(s_str + 2)) == u8(`:`) && u8(*(s_str + 5)) == u8(`:`) } else { false } @@ -150,46 +160,49 @@ pub fn parse_rfc3339(s string) !Time { return error('missing date part of RFC 3339') } - is_date := if s.len >= date_format_buffer.len { - s[4] == u8(`-`) && s[7] == u8(`-`) + is_date := if s_len >= date_format_buffer.len { + u8(*(s_str + 4)) == u8(`-`) && u8(*(s_str + 7)) == u8(`-`) } else { false } if is_date { - year, month, day = check_and_extract_date(s)! - if s.len == date_format_buffer.len { - return new(Time{ + year, month, day = check_and_extract_date(tos(s_str, s_len))! + if s_len == date_format_buffer.len { + val = new(Time{ year: year month: month day: day is_local: false }) + return } } - is_datetime := if s.len >= date_format_buffer.len + 1 + time_format_buffer.len + 1 { - is_date && s[10] == u8(`T`) + is_datetime := if s_len >= date_format_buffer.len + 1 + time_format_buffer.len + 1 { + is_date && u8(*(s_str + 10)) == u8(`T`) } else { false } if is_datetime { // year, month, day := check_and_extract_date(s)! - hour_, minute_, second_, nanosecond_ = check_and_extract_time(s[date_format_buffer.len + 1..])! + // hour_, minute_, second_, nanosecond_ = check_and_extract_time(s[date_format_buffer.len + 1..])! + hour_, minute_, second_, nanosecond_ = check_and_extract_time(tos(s_str + + date_format_buffer.len + 1, s_len - date_format_buffer.len - 1))! } mut timezone_start_position := 0 if is_datetime || is_time { timezone_start_position = date_format_buffer.len + 1 + time_format_buffer.len - if s[timezone_start_position] == u8(`.`) { + if u8(*(s_str + timezone_start_position)) == u8(`.`) { timezone_start_position++ - for s[timezone_start_position] !in [u8(`Z`), `z`, `+`, `-`] { + for u8(*(s_str + timezone_start_position)) !in [u8(`Z`), `z`, `+`, `-`] { timezone_start_position++ - if timezone_start_position == s.len { + if timezone_start_position == s_len { return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${timezone_start_position}, not "${[ - s[timezone_start_position], + u8(*(s_str + timezone_start_position)), ].bytestr()}"') } } @@ -197,18 +210,25 @@ pub fn parse_rfc3339(s string) !Time { } pos := date_format_buffer.len + time_format_buffer.len + 1 - if pos >= s.len { + if pos >= s_len { return error('timezone error: datetime string is too short') } - if s[date_format_buffer.len + time_format_buffer.len + 1] !in [u8(`Z`), `z`, `+`, `-`, `.`] { + + if u8(*(s_str + date_format_buffer.len + time_format_buffer.len + 1)) !in [ + u8(`Z`), + `z`, + `+`, + `-`, + `.`, + ] { // RFC 3339 needs a timezone return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${ date_format_buffer.len + time_format_buffer.len + 1}, not "${[ - s[date_format_buffer.len + time_format_buffer.len + 1], + u8(*(s_str + date_format_buffer.len + time_format_buffer.len + 1)), ].bytestr()}"') } else { - if s[s.len - 1] in [u8(`Z`), `z`] { - return new(Time{ + if u8(*(s_str + s_len - 1)) in [u8(`Z`), `z`] { + val = new(Time{ year: year month: month day: day @@ -218,20 +238,21 @@ pub fn parse_rfc3339(s string) !Time { nanosecond: nanosecond_ is_local: false }) + return } else { // Check if the string contains the timezone part after the time part +00:00 - if s.len < date_format_buffer.len + 1 + time_format_buffer.len + 6 { + if s_len < date_format_buffer.len + 1 + time_format_buffer.len + 6 { return error('datetime string is too short') } - if s[s.len - 3] != u8(`:`) { + if u8(*(s_str + s_len - 3)) != u8(`:`) { return error('timezone separator error: expected ":", not `${[ - s[date_format_buffer.len + time_format_buffer.len + 3], + u8(*(s_str + date_format_buffer.len + time_format_buffer.len + 3)), ].bytestr()}` in position ${date_format_buffer.len + time_format_buffer.len + 3}') } // Check if it is UTC time - if unsafe { vmemcmp(s.str + s.len - 5, '00:00'.str, 5) == 0 } { - return new(Time{ + if unsafe { vmemcmp(s_str + s_len - 5, '00:00'.str, 5) == 0 } { + val = new(Time{ year: year month: month day: day @@ -241,21 +262,23 @@ pub fn parse_rfc3339(s string) !Time { nanosecond: nanosecond_ is_local: false }) + return } - is_negative := s[s.len - 6] == u8(`-`) + is_negative := u8(*(s_str + s_len - 6)) == u8(`-`) // To local time using the offset to add_seconds mut offset_in_minutes := 0 mut offset_in_hours := 0 // offset hours for i := 0; i < 2; i++ { - offset_in_hours = offset_in_minutes * 10 + (s[s.len - 5 + i] - u8(`0`)) + offset_in_hours = offset_in_minutes * 10 + (u8(*(s_str + s_len - 5 + i)) - u8(`0`)) } // offset minutes for i := 0; i < 2; i++ { - offset_in_minutes = offset_in_minutes * 10 + (s[s.len - 2 + i] - u8(`0`)) + offset_in_minutes = offset_in_minutes * 10 + (u8(*(s_str + s_len - 2 + + i)) - u8(`0`)) } offset_in_minutes += offset_in_hours * 60 @@ -277,7 +300,8 @@ pub fn parse_rfc3339(s string) !Time { time_to_be_returned = time_to_be_returned.add_seconds(offset_in_minutes * 60) - return time_to_be_returned + val = time_to_be_returned + return } } diff --git a/vlib/x/json2/decoder2/decode.v b/vlib/x/json2/decoder2/decode.v index ed26b64452c147..080d8fa5c7a6fa 100644 --- a/vlib/x/json2/decoder2/decode.v +++ b/vlib/x/json2/decoder2/decode.v @@ -5,6 +5,7 @@ import time // Node represents a node in a linked list to store ValueInfo. struct Node { +pub: value ValueInfo mut: next &Node = unsafe { nil } // next is the next node in the linked list. @@ -12,6 +13,7 @@ mut: // ValueInfo represents the position and length of a value, such as string, number, array, object key, and object value in a JSON string. struct ValueInfo { +pub: position int // The position of the value in the JSON string. value_kind ValueKind // The kind of the value. mut: @@ -19,9 +21,11 @@ mut: } // Decoder represents a JSON decoder. -struct Decoder { - json string // json is the JSON data to be decoded. -mut: +pub struct Decoder { +pub: + json_str &u8 // json is the JSON data to be decoded. + json_len int // json is the JSON data to be decoded. +pub mut: values_info LinkedList // A linked list to store ValueInfo. checker_idx int // checker_idx is the current index of the decoder. current_node &Node = unsafe { nil } // The current node in the linked list. @@ -29,7 +33,7 @@ mut: // LinkedList represents a linked list to store ValueInfo. struct LinkedList { -mut: +pub mut: head &Node = unsafe { nil } // head is the first node in the linked list. tail &Node = unsafe { nil } // tail is the last node in the linked list. len int // len is the length of the linked list. @@ -95,7 +99,7 @@ pub enum ValueKind { } // check_if_json_match checks if the JSON string matches the expected type T. -fn check_if_json_match[T](val string) ! { +pub fn check_if_json_match[T](val string) ! { // check if the JSON string is empty if val == '' { return error('empty string') @@ -145,10 +149,10 @@ fn check_if_json_match[T](val string) ! { // error generates an error message with context from the JSON string. fn (mut checker Decoder) error(message string) ! { - json := if checker.json.len < checker.checker_idx + 5 { - checker.json + json := if checker.json_len < checker.checker_idx + 5 { + unsafe { tos(checker.json_str, checker.json_len) } } else { - checker.json[0..checker.checker_idx + 5] + unsafe { tos(checker.json_str, checker.checker_idx + 5) } } mut error_message := '\n' @@ -174,15 +178,15 @@ fn (mut checker Decoder) error(message string) ! { } // check_json_format checks if the JSON string is valid and updates the decoder state. -fn (mut checker Decoder) check_json_format(val string) ! { - checker_end := checker.json.len +pub fn (mut checker Decoder) check_json_format() ! { + checker_end := checker.json_len // check if the JSON string is empty - if val == '' { + if checker.json_len == 0 { return checker.error('empty string') } // check if generic type matches the JSON type - value_kind := get_value_kind(val[checker.checker_idx]) + value_kind := get_value_kind(unsafe { u8(*(checker.json_str + checker.checker_idx)) }) start_idx_position := checker.checker_idx checker.values_info.push(ValueInfo{ position: start_idx_position @@ -201,49 +205,55 @@ fn (mut checker Decoder) check_json_format(val string) ! { } is_not_ok := unsafe { - vmemcmp(checker.json.str + checker.checker_idx, 'null'.str, 4) + vmemcmp(checker.json_str + checker.checker_idx, 'null'.str, 4) } if is_not_ok != 0 { - return checker.error('invalid null value. Got `${checker.json[checker.checker_idx.. - checker.checker_idx + 4]}` instead of `null`') + // return checker.error('invalid null value. Got `${checker.json[checker.checker_idx.. + // checker.checker_idx + 4]}` instead of `null`') + return } checker.checker_idx += 3 } .object { checker.checker_idx++ - for val[checker.checker_idx] != `}` { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `}` { // check if the JSON string is an empty object if checker_end - checker.checker_idx <= 2 { continue } - if val[checker.checker_idx] != `"` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `"` { checker.checker_idx++ } // skip whitespace - for val[checker.checker_idx] in [` `, `\t`, `\n`] { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } in [` `, `\t`, `\n`] { if checker.checker_idx >= checker_end - 1 { break } checker.checker_idx++ } - if val[checker.checker_idx] == `}` { + // current_byte_pointer := checker.json_str + checker.checker_idx + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `}` { continue } - match val[checker.checker_idx] { + match unsafe { u8(*(checker.json_str + checker.checker_idx)) } { `"` { // Object key - checker.check_json_format(val)! + checker.check_json_format()! - for val[checker.checker_idx] != `:` { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `:` { if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: key colon not found') } - if val[checker.checker_idx] !in [` `, `\t`, `\n`] { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } !in [ + ` `, + `\t`, + `\n`, + ] { return checker.error('invalid value after object key') } checker.checker_idx++ @@ -265,51 +275,61 @@ fn (mut checker Decoder) check_json_format(val string) ! { return checker.error('empty object key') } else { - return checker.error('`${[val[checker.checker_idx]].bytestr()}` is an invalid object key') + return checker.error('`${[unsafe { + u8(*(checker.json_str + checker.checker_idx)) + }].bytestr()}` is an invalid object key') } } - if val[checker.checker_idx] != `:` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `:` { return checker.error('Expecting `:` after object key') } // skip `:` checker.checker_idx++ // skip whitespace - for val[checker.checker_idx] in [` `, `\t`, `\n`] { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } in [` `, `\t`, `\n`] { checker.checker_idx++ } - match val[checker.checker_idx] { + match unsafe { u8(*(checker.json_str + checker.checker_idx)) } { `"`, `[`, `{`, `0`...`9`, `-`, `n`, `t`, `f` { - for val[checker.checker_idx] != `}` { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `}` { if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: object value not closed') } - checker.check_json_format(val)! + checker.check_json_format()! // whitespace - for val[checker.checker_idx] in [` `, `\t`, `\n`] { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } in [ + ` `, + `\t`, + `\n`, + ] { checker.checker_idx++ } - if val[checker.checker_idx] == `}` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `}` { break } if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: braces are not closed') } - if val[checker.checker_idx] == `,` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `,` { checker.checker_idx++ - for val[checker.checker_idx] in [` `, `\t`, `\n`] { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } in [ + ` `, + `\t`, + `\n`, + ] { checker.checker_idx++ } - if val[checker.checker_idx] != `"` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `"` { return checker.error('Expecting object key') } else { break } } else { - if val[checker.checker_idx] == `}` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `}` { break } else { return @@ -330,23 +350,23 @@ fn (mut checker Decoder) check_json_format(val string) ! { // check if the JSON string is an empty array if checker_end >= checker.checker_idx + 2 { checker.checker_idx++ - if val[checker.checker_idx] == `]` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `]` { return } } else { return checker.error('EOF error: There are not enough length for an array') } - for val[checker.checker_idx] != `]` { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `]` { // skip whitespace - for val[checker.checker_idx] in [` `, `\t`, `\n`] { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } in [` `, `\t`, `\n`] { if checker.checker_idx >= checker_end - 1 { break } checker.checker_idx++ } - if val[checker.checker_idx] == `]` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `]` { return } @@ -354,30 +374,34 @@ fn (mut checker Decoder) check_json_format(val string) ! { return checker.error('EOF error: array not closed') } - checker.check_json_format(val)! + checker.check_json_format()! // whitespace - for val[checker.checker_idx] in [` `, `\t`, `\n`] { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } in [` `, `\t`, `\n`] { checker.checker_idx++ } - if val[checker.checker_idx] == `]` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `]` { break } if checker.checker_idx >= checker_end - 1 { return checker.error('EOF error: braces are not closed') } - if val[checker.checker_idx] == `,` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `,` { checker.checker_idx++ - for val[checker.checker_idx] in [` `, `\t`, `\n`] { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } in [ + ` `, + `\t`, + `\n`, + ] { checker.checker_idx++ } - if val[checker.checker_idx] == `]` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `]` { return checker.error('Cannot use `,`, before `]`') } continue } else { - if val[checker.checker_idx] == `]` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `]` { break } else { return checker.error('`]` after value') @@ -395,12 +419,13 @@ fn (mut checker Decoder) check_json_format(val string) ! { checker.checker_idx++ // check if the JSON string is a valid escape sequence - for val[checker.checker_idx] != `"` && val[checker.checker_idx - 1] != `\\` { - if val[checker.checker_idx] == `\\` { + for unsafe { u8(*(checker.json_str + checker.checker_idx)) } != `"` + && unsafe { u8(*(checker.json_str + checker.checker_idx - 1)) } != `\\` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `\\` { if checker.checker_idx + 1 >= checker_end - 1 { return checker.error('invalid escape sequence') } - escaped_char := val[checker.checker_idx + 1] + escaped_char := unsafe { u8(*(checker.json_str + checker.checker_idx + 1)) } match escaped_char { `/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` {} `u` { @@ -412,7 +437,7 @@ fn (mut checker Decoder) check_json_format(val string) ! { checker.checker_idx += 2 for checker.checker_idx < escaped_char_last_index { - match val[checker.checker_idx] { + match unsafe { u8(*(checker.json_str + checker.checker_idx)) } { `0`...`9`, `a`...`f`, `A`...`F` { checker.checker_idx++ } @@ -424,8 +449,7 @@ fn (mut checker Decoder) check_json_format(val string) ! { // REVIEW: Should we increment the index here? continue } else { - return checker.error('short unicode escape sequence ${checker.json[checker.checker_idx.. - escaped_char_last_index + 1]}') + return } } else { @@ -438,7 +462,8 @@ fn (mut checker Decoder) check_json_format(val string) ! { } .number { // check if the JSON string is a valid float or integer - mut is_negative := val[0] == `-` + // unsafe { u8(*(checker.json_str + checker.checker_idx + 1)) } + mut is_negative := unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `-` mut has_dot := false mut digits_count := 1 @@ -448,23 +473,24 @@ fn (mut checker Decoder) check_json_format(val string) ! { } for checker.checker_idx < checker_end - 1 - && val[checker.checker_idx + 1] !in [`,`, `}`, `]`, ` `, `\t`, `\n`] + && unsafe { u8(*(checker.json_str + checker.checker_idx + 1)) } !in [`,`, `}`, `]`, ` `, `\t`, `\n`] && checker.checker_idx < checker_end - 1 { - if val[checker.checker_idx] == `.` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `.` { if has_dot { return checker.error('invalid float. Multiple dots') } has_dot = true checker.checker_idx++ continue - } else if val[checker.checker_idx] == `-` { + } else if unsafe { u8(*(checker.json_str + checker.checker_idx)) } == `-` { if is_negative { return checker.error('invalid float. Multiple negative signs') } checker.checker_idx++ continue } else { - if val[checker.checker_idx] < `0` || val[checker.checker_idx] > `9` { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } < `0` + || unsafe { u8(*(checker.json_str + checker.checker_idx)) } > `9` { return checker.error('invalid number') } } @@ -478,19 +504,18 @@ fn (mut checker Decoder) check_json_format(val string) ! { } .boolean { // check if the JSON string is a valid boolean - match val[checker.checker_idx] { + match unsafe { u8(*(checker.json_str + checker.checker_idx)) } { `t` { if checker_end - checker.checker_idx <= 3 { return checker.error('EOF error: expecting `true`') } is_not_ok := unsafe { - vmemcmp(checker.json.str + checker.checker_idx, 'true'.str, 4) + vmemcmp(checker.json_str + checker.checker_idx, 'true'.str, 4) } if is_not_ok != 0 { - return checker.error('invalid boolean value. Got `${checker.json[checker.checker_idx.. - checker.checker_idx + 4]}` instead of `true`') + return } checker.checker_idx += 3 } @@ -500,12 +525,11 @@ fn (mut checker Decoder) check_json_format(val string) ! { } is_not_ok := unsafe { - vmemcmp(checker.json.str + checker.checker_idx, 'false'.str, 5) + vmemcmp(checker.json_str + checker.checker_idx, 'false'.str, 5) } if is_not_ok != 0 { - return checker.error('invalid boolean value. Got `${checker.json[checker.checker_idx.. - checker.checker_idx + 5]}` instead of `false`') + return } checker.checker_idx += 4 @@ -523,9 +547,10 @@ fn (mut checker Decoder) check_json_format(val string) ! { checker.checker_idx++ } - for checker.checker_idx < checker_end - 1 && val[checker.checker_idx] !in [`,`, `:`, `}`, `]`] { + for checker.checker_idx < checker_end - 1 + && unsafe { u8(*(checker.json_str + checker.checker_idx)) } !in [`,`, `:`, `}`, `]`] { // get trash characters after the value - if val[checker.checker_idx] !in [` `, `\t`, `\n`] { + if unsafe { u8(*(checker.json_str + checker.checker_idx)) } !in [` `, `\t`, `\n`] { checker.error('invalid value. Unexpected character after ${value_kind} end')! } else { // whitespace @@ -534,23 +559,36 @@ fn (mut checker Decoder) check_json_format(val string) ! { } } +@[unsafe] +pub fn (mut decoder Decoder) free() { + decoder.values_info.free() +} + // decode decodes a JSON string into a specified type. +@[manualfree] pub fn decode[T](val string) !T { mut decoder := Decoder{ - json: val + json_str: val.str + json_len: val.len } - decoder.check_json_format(val)! + decoder.check_json_format()! check_if_json_match[T](val)! mut result := T{} decoder.current_node = decoder.values_info.head decoder.decode_value(mut &result)! + + // Free the allocated memory + unsafe { + decoder.free() + } + return result } // decode_value decodes a value from the JSON nodes. -fn (mut decoder Decoder) decode_value[T](mut val T) ! { +pub fn (mut decoder Decoder) decode_value[T](mut val T) ! { $if T is $option { mut unwrapped_val := create_value_from_optional(val.$(field.name)) decoder.decode_value(mut unwrapped_val)! @@ -566,7 +604,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! { if escape_positions.len == 0 { if string_info.length != 0 { unsafe { - string_buffer.push_many(decoder.json.str + string_info.position + 1, + string_buffer.push_many(decoder.json_str + string_info.position + 1, buffer_lenght) } } @@ -579,27 +617,27 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! { // and ends at the escape position minus one. // This is used to handle escaped characters within the JSON string. unsafe { - string_buffer.push_many(decoder.json.str + string_info.position + 1, + string_buffer.push_many(decoder.json_str + string_info.position + 1, escape_position - string_info.position - 1) } } else { // Pushes a substring from the JSON string into the string buffer, starting after the previous escape position // and ending just before the current escape position. This handles the characters between escape sequences. unsafe { - string_buffer.push_many(decoder.json.str + escape_positions[i - 1] + 6, + string_buffer.push_many(decoder.json_str + escape_positions[i - 1] + 6, escape_position - escape_positions[i - 1] - 6) } } unescaped_buffer := generate_unicode_escape_sequence(unsafe { - (decoder.json.str + escape_positions[i] + 2).vbytes(4) + (decoder.json_str + escape_positions[i] + 2).vbytes(4) })! unsafe { string_buffer.push_many(&unescaped_buffer[0], unescaped_buffer.len) } } end_of_last_escape_position := escape_positions[escape_positions.len - 1] + 6 unsafe { - string_buffer.push_many(decoder.json.str + end_of_last_escape_position, + string_buffer.push_many(decoder.json_str + end_of_last_escape_position, string_info.length - end_of_last_escape_position - 1) } } @@ -612,10 +650,20 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! { time_info := decoder.current_node.value if time_info.value_kind == .string_ { - string_time := decoder.json.substr_unsafe(time_info.position + 1, time_info.position + - time_info.length - 1) + // string_time := decoder.json.substr_unsafe(time_info.position + 1, time_info.position + + // time_info.length - 1) + + // val = time.parse_rfc3339(string_time) or { time.Time{} } - val = time.parse_rfc3339(string_time) or { time.Time{} } + // time_info := decoder.current_node.value + + unsafe { + // mut t := Time{} + + time.fast_parse_rfc3339(decoder.json_str + time_info.position + 1, time_info.length - 2, mut + val)! + // return t + } } } $else $if T.unaliased_typ is $map { map_info := decoder.current_node.value @@ -636,7 +684,8 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! { break } - key := decoder.json[key_info.position + 1..key_info.position + key_info.length - 1] + // key := decoder.json[key_info.position + 1..key_info.position + key_info.length - 1] + key := unsafe { tos(decoder.json_str + key_info.position + 1, key_info.length - 2) } decoder.current_node = decoder.current_node.next @@ -703,7 +752,7 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! { if key_info.length - 2 == field.name.len { // This `vmemcmp` compares the name of a key in a JSON with a given struct field. if unsafe { - vmemcmp(decoder.json.str + key_info.position + 1, field.name.str, + vmemcmp(decoder.json_str + key_info.position + 1, field.name.str, field.name.len) == 0 } { $if field.typ is $option { @@ -722,13 +771,13 @@ fn (mut decoder Decoder) decode_value[T](mut val T) ! { value_info := decoder.current_node.value unsafe { - val = vmemcmp(decoder.json.str + value_info.position, 'true'.str, 4) == 0 + val = vmemcmp(decoder.json_str + value_info.position, 'true'.str, 4) == 0 } } $else $if T.unaliased_typ in [$float, $int, $enum] { value_info := decoder.current_node.value if value_info.value_kind == .number { - bytes := unsafe { (decoder.json.str + value_info.position).vbytes(value_info.length) } + bytes := unsafe { (decoder.json_str + value_info.position).vbytes(value_info.length) } unsafe { string_buffer_to_generic_number(val, bytes) @@ -789,8 +838,16 @@ fn (mut decoder Decoder) calculate_string_space_and_escapes() !(int, []int) { value_info := decoder.current_node.value len := value_info.length - if len < 2 || decoder.json[value_info.position] != `"` - || decoder.json[value_info.position + len - 1] != `"` { + // if len < 2 || decoder.json[value_info.position] != `"` + // || decoder.json[value_info.position + len - 1] != `"` { + // return error('Invalid JSON string format') + // } + + if len < 2 || unsafe { + *(decoder.json_str + value_info.position) != `"` + } || unsafe { + *(decoder.json_str + value_info.position + len - 1) != `"` + } { return error('Invalid JSON string format') } @@ -799,7 +856,8 @@ fn (mut decoder Decoder) calculate_string_space_and_escapes() !(int, []int) { mut idx := 1 // Start after the opening quote for idx < len - 1 { - current_byte := decoder.json[value_info.position + idx] + // current_byte := decoder.json[value_info.position + idx] + current_byte := unsafe { *(decoder.json_str + value_info.position + idx) } if current_byte == `\\` { // Escape sequence, handle accordingly @@ -807,7 +865,8 @@ fn (mut decoder Decoder) calculate_string_space_and_escapes() !(int, []int) { if idx >= len - 1 { return error('Invalid escape sequence at the end of string') } - escaped_char := decoder.json[value_info.position + idx] + // escaped_char := decoder.json[value_info.position + idx] + escaped_char := unsafe { *(decoder.json_str + value_info.position + idx) } match escaped_char { // All simple escapes take 1 byte of space `/`, `b`, `f`, `n`, `r`, `t`, `"`, `\\` { @@ -819,8 +878,11 @@ fn (mut decoder Decoder) calculate_string_space_and_escapes() !(int, []int) { return error('Invalid unicode escape sequence') } // Extract the hex value from the \uXXXX sequence - hex_str := decoder.json[value_info.position + idx + 1..value_info.position + - idx + 5] + // hex_str := decoder.json[value_info.position + idx + 1..value_info.position + + // idx + 5] + hex_str := unsafe { + tos(decoder.json_str + value_info.position + idx + 1, 4) + } unicode_value := u32(strconv.parse_int(hex_str, 16, 32)!) // Determine the number of bytes needed for this Unicode character in UTF-8 space_required += utf8_byte_length(unicode_value) diff --git a/vlib/x/json2/decoder2/decode_sumtype.v b/vlib/x/json2/decoder2/decode_sumtype.v index 87dd6a9796b888..caca8c91200d24 100644 --- a/vlib/x/json2/decoder2/decode_sumtype.v +++ b/vlib/x/json2/decoder2/decode_sumtype.v @@ -6,9 +6,22 @@ fn (mut decoder Decoder) get_decoded_sumtype_workaround[T](initialized_sumtype T $if initialized_sumtype is $sumtype { $for v in initialized_sumtype.variants { if initialized_sumtype is v { - mut val := initialized_sumtype - decoder.decode_value(mut val)! - return T(val) + $if initialized_sumtype is $array { + unsafe { + // decode array + mut val := initialized_sumtype + decoder.decode_value(mut val)! + return T(val) + } + } $else $if initialized_sumtype is $map { + mut val := unsafe { initialized_sumtype } + decoder.decode_value(mut val)! + return T(val) + } $else { + mut val := initialized_sumtype + decoder.decode_value(mut val)! + return T(val) + } } } } @@ -66,7 +79,7 @@ fn (mut decoder Decoder) init_sumtype_by_value_kind[T](mut val T, value_info Val } if unsafe { - vmemcmp(decoder.json.str + key_info.position, type_field.str, + vmemcmp(decoder.json_str + key_info.position, type_field.str, type_field.len) == 0 } { // find type field @@ -85,7 +98,7 @@ fn (mut decoder Decoder) init_sumtype_by_value_kind[T](mut val T, value_info Val unsafe { } if unsafe { - vmemcmp(decoder.json.str + type_field_node.value.position + 1, + vmemcmp(decoder.json_str + type_field_node.value.position + 1, variant_name.str, variant_name.len) == 0 } { val = T(v) diff --git a/vlib/x/json2/decoder2/decode_test.v b/vlib/x/json2/decoder2/decode_test.v index 85fd000e673f8f..b37d47310c5deb 100644 --- a/vlib/x/json2/decoder2/decode_test.v +++ b/vlib/x/json2/decoder2/decode_test.v @@ -69,55 +69,60 @@ fn test_check_json_format() { for variable in ['""', '"string"', '123', '0', 'true'] { mut checker := Decoder{ checker_idx: 0 - json: variable + json_str: variable.str + json_len: variable.len } - checker.check_json_format(variable) or { assert false, err.str() } - assert checker.checker_idx == checker.json.len - 1, 'Expected to reach the end of the json string ${checker.json}' + checker.check_json_format() or { assert false, err.str() } + assert checker.checker_idx == checker.json_len - 1, 'Expected to reach the end of the json string ${variable}' } // simple objects for variable in ['{}', '{"key": null}', '{"key": "value"}', '{"key": 123}', '{"key": true}'] { mut checker := Decoder{ checker_idx: 0 - json: variable + json_str: variable.str + json_len: variable.len } - checker.check_json_format(variable) or { assert false, err.str() } - assert checker.checker_idx == checker.json.len - 1, 'Expected to reach the end of the json string ${checker.json}' + checker.check_json_format() or { assert false, err.str() } + assert checker.checker_idx == checker.json_len - 1, 'Expected to reach the end of the json string ${variable}' } // Nested objects for variable in ['{"key": {"key": 123}}'] { mut checker := Decoder{ checker_idx: 0 - json: variable + json_str: variable.str + json_len: variable.len } - checker.check_json_format(variable) or { assert false, err.str() } - assert checker.checker_idx == checker.json.len - 1, 'Expected to reach the end of the json string ${checker.json}' + checker.check_json_format() or { assert false, err.str() } + assert checker.checker_idx == checker.json_len - 1, 'Expected to reach the end of the json string ${variable}' } // simple arrays for variable in ['[]', '[1, 2, 3]', '["a", "b", "c"]', '[true, false]'] { mut checker := Decoder{ checker_idx: 0 - json: variable + json_str: variable.str + json_len: variable.len } - checker.check_json_format(variable) or { assert false, err.str() } - assert checker.checker_idx == checker.json.len - 1, 'Expected to reach the end of the json string ${checker.json}' + checker.check_json_format() or { assert false, err.str() } + assert checker.checker_idx == checker.json_len - 1, 'Expected to reach the end of the json string ${variable}' } // Nested arrays for variable in ['[[1, 2, 3], [4, 5, 6]]'] { mut checker := Decoder{ checker_idx: 0 - json: variable + json_str: variable.str + json_len: variable.len } - checker.check_json_format(variable) or { assert false, err.str() } - // assert checker.checker_idx == checker.json.len - 1, 'Expected to reach the end of the json string ${checker.json}' + checker.check_json_format() or { assert false, err.str() } + // assert checker.checker_idx == checker.json_len - 1, 'Expected to reach the end of the json string ${variable}' } // Wrong jsons @@ -169,10 +174,11 @@ fn test_check_json_format() { mut has_error := false mut checker := Decoder{ checker_idx: 0 - json: json_and_error['json'] + json_str: json_and_error['json'].str + json_len: json_and_error['json'].len } - checker.check_json_format(json_and_error['json']) or { + checker.check_json_format() or { assert err.str() == json_and_error['error'] has_error = true } @@ -203,3 +209,48 @@ fn test_get_value_kind() { assert get_value_kind(value.byte_) == value.value_kind } } + +pub struct Stru { + val int + val2 string + val3 Stru2 +} + +pub struct Stru2 { + a int + brazilian_steak string +} + +fn test_decode_from_http_request() { + json_data := '{"_type": "Stru", "val": 1, "val2": "lala", "val3": {"a": 2, "brazilian_steak": "leleu"}}' + mut http_request := 'HTTP/1.1 200 OK\r\n' + http_request += 'Content-Type: application/json\r\n' + http_request += 'Host: localhost:8080\r\n' + http_request += 'User-Agent: curl/7.68.0\r\n' + http_request += 'Accept: */*\r\n' + http_request += 'Connection: close\r\n' + http_request += 'Content-Length: ${json_data.len}\r\n' + http_request += '\r\n' + http_request += json_data // pos: 150 + + mut decoder := Decoder{ + json_str: unsafe { http_request.str + 150 } + json_len: json_data.len + } + + decoder.check_json_format()! + check_if_json_match[Stru](json_data)! + + mut result := Stru{} + decoder.current_node = decoder.values_info.head + decoder.decode_value(mut &result)! + + assert result == Stru{ + val: 1 + val2: 'lala' + val3: Stru2{ + a: 2 + brazilian_steak: 'leleu' + } + } +} diff --git a/vlib/x/json2/decoder2/tests/bench.v b/vlib/x/json2/decoder2/tests/bench.v index d651b68fd69634..a7852f4447bd55 100644 --- a/vlib/x/json2/decoder2/tests/bench.v +++ b/vlib/x/json2/decoder2/tests/bench.v @@ -3,6 +3,8 @@ import json as old_json import benchmark import time +// ./../../../../../v wipe-cache && ./../../../../../v -prod bench.v -gc none -o b_out && valgrind -s ./b_out + // ./v -prod crun vlib/x/json/tests/c.v // ./v wipe-cache && ./v -prod -cc gcc crun vlib/x/json2/decoder2/tests/bench.v const max_iterations = 1_000_000 @@ -51,6 +53,18 @@ fn main() { json_data := '{"_type": "Stru", "val": 1, "val2": "lala", "val3": {"a": 2, "churrasco": "leleu"}}' json_data1 := '{"val": "2"}' json_data2 := '{"val": 2}' + json_data_timestamp := '{"val": "2022-03-11T13:54:25Z"}' + + mut http_request := 'HTTP/1.1 200 OK\r\n' + http_request += 'Content-Type: application/json\r\n' + http_request += 'Host: localhost:8080\r\n' + http_request += 'User-Agent: curl/7.68.0\r\n' + http_request += 'Accept: */*\r\n' + http_request += 'Connection: close\r\n' + http_request += 'Content-Length: ${json_data.len}\r\n' + http_request += '\r\n' + // dump(http_request.len) + http_request += json_data // pos: 150 println('Starting benchmark...') println('max_iterations: ${max_iterations}') @@ -84,6 +98,20 @@ fn main() { b.measure('old_json.decode(SumTypes, json_data)!\n') + // time.Time ********************************************************** + + for i := 0; i < max_iterations; i++ { + _ := decoder2.decode[StructType[time.Time]](json_data_timestamp)! + } + + b.measure('decoder2.decode[StructType[time.Time]](json_data_timestamp)!') + + for i := 0; i < max_iterations; i++ { + _ := old_json.decode(StructType[time.Time], json_data_timestamp)! // not working // 1970-01-01 00:00:00 + } + + b.measure('old_json.decode(StructType[time.Time], json_data_timestamp)!\n') + // StructType[string] ********************************************************** for i := 0; i < max_iterations; i++ { _ := decoder2.decode[StructType[string]](json_data1)! @@ -170,7 +198,7 @@ fn main() { // time.Time ********************************************************** for i := 0; i < max_iterations; i++ { - _ := decoder2.decode[time.Time]('"2022-03-11T13:54:25"')! + _ := decoder2.decode[time.Time]('"2022-03-11T13:54:25.000Z"')! } b.measure("decoder2.decode[time.Time]('2022-03-11T13:54:25')!") @@ -211,4 +239,35 @@ fn main() { } b.measure('decoder2.decode[SumTypes](\'"abcdefghijklimnopqrstuv"\')!') + + // // Uncomment this when #22693 is fixed + // for i := 0; i < max_iterations; i++ { + // _ := decoder2.decode[json2.Any](json_data2)! + // } + + // b.measure('decoder2.decode[json2.Any](json_data)!') + + for i := 0; i < max_iterations; i++ { + mut decoder := decoder2.Decoder{ + json_str: unsafe { http_request.str + 150 } + json_len: json_data.len + } + + decoder.check_json_format()! + decoder2.check_if_json_match[Stru](json_data)! + + mut result := Stru{} + decoder.current_node = decoder.values_info.head + decoder.decode_value(mut &result)! + } + + b.measure('raw decode from HTTP request') + + for i := 0; i < max_iterations; i++ { + json_string_from_http_request := http_request[150..] + + _ := decoder2.decode[Stru](json_string_from_http_request)! + } + + b.measure('decode from HTTP request') } From 3d062893741ded1963ae1049d75f95c29cd10c22 Mon Sep 17 00:00:00 2001 From: Hitalo Souza Date: Fri, 1 Nov 2024 16:00:23 -0400 Subject: [PATCH 2/6] apply review is suggestions --- vlib/time/parse.c.v | 89 ++++++++----------- vlib/x/json2/decoder2/decode.v | 17 +--- vlib/x/json2/decoder2/tests/bench.v | 14 +-- .../json2/decoder2/tests/json_sumtype_test.v | 2 +- 4 files changed, 43 insertions(+), 79 deletions(-) diff --git a/vlib/time/parse.c.v b/vlib/time/parse.c.v index 78ed84fcb49ade..6feaeb2c619aae 100644 --- a/vlib/time/parse.c.v +++ b/vlib/time/parse.c.v @@ -129,30 +129,25 @@ fn check_and_extract_date(s string) !(int, int, int) { // parse_rfc3339 returns the time from a date string in RFC 3339 datetime format. // See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of // the differences between ISO-8601 and RFC 3339. -pub fn parse_rfc3339(s string) !Time { - unsafe { - mut t := Time{} - - fast_parse_rfc3339(s.str, s.len, mut t)! - return t - } -} -@[unsafe] -pub fn fast_parse_rfc3339(s_str &u8, s_len int, mut val Time) ! { - if s_len == 0 { +// parse_rfc3339 returns the time from a date string in RFC 3339 datetime format. +// See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of +// the differences between ISO-8601 and RFC 3339. +@[direct_array_access] +pub fn parse_rfc3339(s string) !Time { + if s.len == 0 { return error_invalid_time(0, 'datetime string is empty') } - if s_len < time_format_buffer.len { + if s.len < time_format_buffer.len { return error('string is too short to parse') } mut year, mut month, mut day := 0, 0, 0 mut hour_, mut minute_, mut second_, mut nanosecond_ := 0, 0, 0, 0 - is_time := if s_len >= time_format_buffer.len { - u8(*(s_str + 2)) == u8(`:`) && u8(*(s_str + 5)) == u8(`:`) + is_time := if s.len >= time_format_buffer.len { + s[2] == u8(`:`) && s[5] == u8(`:`) } else { false } @@ -160,49 +155,46 @@ pub fn fast_parse_rfc3339(s_str &u8, s_len int, mut val Time) ! { return error('missing date part of RFC 3339') } - is_date := if s_len >= date_format_buffer.len { - u8(*(s_str + 4)) == u8(`-`) && u8(*(s_str + 7)) == u8(`-`) + is_date := if s.len >= date_format_buffer.len { + s[4] == u8(`-`) && s[7] == u8(`-`) } else { false } if is_date { - year, month, day = check_and_extract_date(tos(s_str, s_len))! - if s_len == date_format_buffer.len { - val = new(Time{ + year, month, day = check_and_extract_date(s)! + if s.len == date_format_buffer.len { + return new(Time{ year: year month: month day: day is_local: false }) - return } } - is_datetime := if s_len >= date_format_buffer.len + 1 + time_format_buffer.len + 1 { - is_date && u8(*(s_str + 10)) == u8(`T`) + is_datetime := if s.len >= date_format_buffer.len + 1 + time_format_buffer.len + 1 { + is_date && s[10] == u8(`T`) } else { false } if is_datetime { // year, month, day := check_and_extract_date(s)! - // hour_, minute_, second_, nanosecond_ = check_and_extract_time(s[date_format_buffer.len + 1..])! - hour_, minute_, second_, nanosecond_ = check_and_extract_time(tos(s_str + - date_format_buffer.len + 1, s_len - date_format_buffer.len - 1))! + hour_, minute_, second_, nanosecond_ = check_and_extract_time(s[date_format_buffer.len + 1..])! } mut timezone_start_position := 0 if is_datetime || is_time { timezone_start_position = date_format_buffer.len + 1 + time_format_buffer.len - if u8(*(s_str + timezone_start_position)) == u8(`.`) { + if s[timezone_start_position] == u8(`.`) { timezone_start_position++ - for u8(*(s_str + timezone_start_position)) !in [u8(`Z`), `z`, `+`, `-`] { + for s[timezone_start_position] !in [u8(`Z`), `z`, `+`, `-`] { timezone_start_position++ - if timezone_start_position == s_len { + if timezone_start_position == s.len { return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${timezone_start_position}, not "${[ - u8(*(s_str + timezone_start_position)), + s[timezone_start_position], ].bytestr()}"') } } @@ -210,25 +202,18 @@ pub fn fast_parse_rfc3339(s_str &u8, s_len int, mut val Time) ! { } pos := date_format_buffer.len + time_format_buffer.len + 1 - if pos >= s_len { + if pos >= s.len { return error('timezone error: datetime string is too short') } - - if u8(*(s_str + date_format_buffer.len + time_format_buffer.len + 1)) !in [ - u8(`Z`), - `z`, - `+`, - `-`, - `.`, - ] { + if s[date_format_buffer.len + time_format_buffer.len + 1] !in [u8(`Z`), `z`, `+`, `-`, `.`] { // RFC 3339 needs a timezone return error('timezone error: expected "Z" or "z" or "+" or "-" in position ${ date_format_buffer.len + time_format_buffer.len + 1}, not "${[ - u8(*(s_str + date_format_buffer.len + time_format_buffer.len + 1)), + s[date_format_buffer.len + time_format_buffer.len + 1], ].bytestr()}"') } else { - if u8(*(s_str + s_len - 1)) in [u8(`Z`), `z`] { - val = new(Time{ + if s[s.len - 1] in [u8(`Z`), `z`] { + return new(Time{ year: year month: month day: day @@ -238,21 +223,20 @@ pub fn fast_parse_rfc3339(s_str &u8, s_len int, mut val Time) ! { nanosecond: nanosecond_ is_local: false }) - return } else { // Check if the string contains the timezone part after the time part +00:00 - if s_len < date_format_buffer.len + 1 + time_format_buffer.len + 6 { + if s.len < date_format_buffer.len + 1 + time_format_buffer.len + 6 { return error('datetime string is too short') } - if u8(*(s_str + s_len - 3)) != u8(`:`) { + if s[s.len - 3] != u8(`:`) { return error('timezone separator error: expected ":", not `${[ - u8(*(s_str + date_format_buffer.len + time_format_buffer.len + 3)), + s[date_format_buffer.len + time_format_buffer.len + 3], ].bytestr()}` in position ${date_format_buffer.len + time_format_buffer.len + 3}') } // Check if it is UTC time - if unsafe { vmemcmp(s_str + s_len - 5, '00:00'.str, 5) == 0 } { - val = new(Time{ + if unsafe { vmemcmp(s.str + s.len - 5, '00:00'.str, 5) == 0 } { + return new(Time{ year: year month: month day: day @@ -262,23 +246,21 @@ pub fn fast_parse_rfc3339(s_str &u8, s_len int, mut val Time) ! { nanosecond: nanosecond_ is_local: false }) - return } - is_negative := u8(*(s_str + s_len - 6)) == u8(`-`) + is_negative := s[s.len - 6] == u8(`-`) // To local time using the offset to add_seconds mut offset_in_minutes := 0 mut offset_in_hours := 0 // offset hours for i := 0; i < 2; i++ { - offset_in_hours = offset_in_minutes * 10 + (u8(*(s_str + s_len - 5 + i)) - u8(`0`)) + offset_in_hours = offset_in_minutes * 10 + (s[s.len - 5 + i] - u8(`0`)) } // offset minutes for i := 0; i < 2; i++ { - offset_in_minutes = offset_in_minutes * 10 + (u8(*(s_str + s_len - 2 + - i)) - u8(`0`)) + offset_in_minutes = offset_in_minutes * 10 + (s[s.len - 2 + i] - u8(`0`)) } offset_in_minutes += offset_in_hours * 60 @@ -300,8 +282,7 @@ pub fn fast_parse_rfc3339(s_str &u8, s_len int, mut val Time) ! { time_to_be_returned = time_to_be_returned.add_seconds(offset_in_minutes * 60) - val = time_to_be_returned - return + return time_to_be_returned } } diff --git a/vlib/x/json2/decoder2/decode.v b/vlib/x/json2/decoder2/decode.v index 080d8fa5c7a6fa..4d52dd8f6c6e3d 100644 --- a/vlib/x/json2/decoder2/decode.v +++ b/vlib/x/json2/decoder2/decode.v @@ -650,20 +650,11 @@ pub fn (mut decoder Decoder) decode_value[T](mut val T) ! { time_info := decoder.current_node.value if time_info.value_kind == .string_ { - // string_time := decoder.json.substr_unsafe(time_info.position + 1, time_info.position + - // time_info.length - 1) - - // val = time.parse_rfc3339(string_time) or { time.Time{} } - - // time_info := decoder.current_node.value - - unsafe { - // mut t := Time{} - - time.fast_parse_rfc3339(decoder.json_str + time_info.position + 1, time_info.length - 2, mut - val)! - // return t + string_time := unsafe { + decoder.json_str[time_info.position + 1].vstring_with_len(time_info.length - 2) } + + val = time.parse_rfc3339(string_time) or { time.Time{} } } } $else $if T.unaliased_typ is $map { map_info := decoder.current_node.value diff --git a/vlib/x/json2/decoder2/tests/bench.v b/vlib/x/json2/decoder2/tests/bench.v index a7852f4447bd55..1462bc2cbfa0ca 100644 --- a/vlib/x/json2/decoder2/tests/bench.v +++ b/vlib/x/json2/decoder2/tests/bench.v @@ -3,8 +3,6 @@ import json as old_json import benchmark import time -// ./../../../../../v wipe-cache && ./../../../../../v -prod bench.v -gc none -o b_out && valgrind -s ./b_out - // ./v -prod crun vlib/x/json/tests/c.v // ./v wipe-cache && ./v -prod -cc gcc crun vlib/x/json2/decoder2/tests/bench.v const max_iterations = 1_000_000 @@ -106,12 +104,6 @@ fn main() { b.measure('decoder2.decode[StructType[time.Time]](json_data_timestamp)!') - for i := 0; i < max_iterations; i++ { - _ := old_json.decode(StructType[time.Time], json_data_timestamp)! // not working // 1970-01-01 00:00:00 - } - - b.measure('old_json.decode(StructType[time.Time], json_data_timestamp)!\n') - // StructType[string] ********************************************************** for i := 0; i < max_iterations; i++ { _ := decoder2.decode[StructType[string]](json_data1)! @@ -198,10 +190,10 @@ fn main() { // time.Time ********************************************************** for i := 0; i < max_iterations; i++ { - _ := decoder2.decode[time.Time]('"2022-03-11T13:54:25.000Z"')! + _ := decoder2.decode[time.Time]('"2022-03-11T13:54:25Z"')! } - b.measure("decoder2.decode[time.Time]('2022-03-11T13:54:25')!") + b.measure("decoder2.decode[time.Time]('2022-03-11T13:54:25Z')!") // string ********************************************************** for i := 0; i < max_iterations; i++ { @@ -240,7 +232,7 @@ fn main() { b.measure('decoder2.decode[SumTypes](\'"abcdefghijklimnopqrstuv"\')!') - // // Uncomment this when #22693 is fixed + // // Uncomment this when #22710 is fixed // for i := 0; i < max_iterations; i++ { // _ := decoder2.decode[json2.Any](json_data2)! // } diff --git a/vlib/x/json2/decoder2/tests/json_sumtype_test.v b/vlib/x/json2/decoder2/tests/json_sumtype_test.v index 27e956525ab985..22be05e36e1889 100644 --- a/vlib/x/json2/decoder2/tests/json_sumtype_test.v +++ b/vlib/x/json2/decoder2/tests/json_sumtype_test.v @@ -50,7 +50,7 @@ fn test_any_sum_type() { assert json.decode[json2.Any]('1.1')! == json2.Any(f64(1.1)) - // Uncomment this when #22693 is fixed + // Uncomment this when #22710 is fixed // assert json.decode[[]json2.Any]('["1", "2", "3"]')! == [json2.Any('1'), json2.Any('2'), json2.Any('3')] // assert json.decode[json2.Any]('["1", "2", "3"]')! == json2.Any([json2.Any('1'), json2.Any('2'), // json2.Any('3')]) From 6f49ac9ac875e28fc0a3f8e0b7c6956d8aaf1615 Mon Sep 17 00:00:00 2001 From: Hitalo Souza Date: Fri, 1 Nov 2024 16:39:57 -0400 Subject: [PATCH 3/6] increase Stru in bench --- vlib/x/json2/decoder2/tests/bench.v | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/vlib/x/json2/decoder2/tests/bench.v b/vlib/x/json2/decoder2/tests/bench.v index 1462bc2cbfa0ca..3d34b7a6b0aa5f 100644 --- a/vlib/x/json2/decoder2/tests/bench.v +++ b/vlib/x/json2/decoder2/tests/bench.v @@ -12,6 +12,12 @@ pub struct Stru { val int val2 string val3 Stru2 + val4 int + val5 int + val6 int + val7 int + val8 int + val9 int } pub struct Stru2 { @@ -48,7 +54,7 @@ mut: } fn main() { - json_data := '{"_type": "Stru", "val": 1, "val2": "lala", "val3": {"a": 2, "churrasco": "leleu"}}' + json_data := '{"_type": "Stru", "val": 1, "val2": "lala", "val3": {"a": 2, "churrasco": "leleu"}, "val4": 2147483000, "val5": 2147483000, "val6": 2147483000, "val7": 2147483000, "val8": 2147483000, "val9": 2147483000}' json_data1 := '{"val": "2"}' json_data2 := '{"val": 2}' json_data_timestamp := '{"val": "2022-03-11T13:54:25Z"}' @@ -61,7 +67,7 @@ fn main() { http_request += 'Connection: close\r\n' http_request += 'Content-Length: ${json_data.len}\r\n' http_request += '\r\n' - // dump(http_request.len) + body_position := http_request.len http_request += json_data // pos: 150 println('Starting benchmark...') @@ -241,7 +247,7 @@ fn main() { for i := 0; i < max_iterations; i++ { mut decoder := decoder2.Decoder{ - json_str: unsafe { http_request.str + 150 } + json_str: unsafe { http_request.str + body_position } json_len: json_data.len } @@ -256,7 +262,7 @@ fn main() { b.measure('raw decode from HTTP request') for i := 0; i < max_iterations; i++ { - json_string_from_http_request := http_request[150..] + json_string_from_http_request := http_request[body_position..] _ := decoder2.decode[Stru](json_string_from_http_request)! } From b765a2333708cb5c927a3f49ee90819265436729 Mon Sep 17 00:00:00 2001 From: Hitalo Souza Date: Fri, 1 Nov 2024 16:41:05 -0400 Subject: [PATCH 4/6] fix: missin doc --- vlib/x/json2/decoder2/decode.v | 1 + 1 file changed, 1 insertion(+) diff --git a/vlib/x/json2/decoder2/decode.v b/vlib/x/json2/decoder2/decode.v index 4d52dd8f6c6e3d..73088cda6ea6f0 100644 --- a/vlib/x/json2/decoder2/decode.v +++ b/vlib/x/json2/decoder2/decode.v @@ -559,6 +559,7 @@ pub fn (mut checker Decoder) check_json_format() ! { } } +// free frees the allocated memory for the decoder. @[unsafe] pub fn (mut decoder Decoder) free() { decoder.values_info.free() From 7a6430e7c7f0896f716384cae63a4353fa11ba00 Mon Sep 17 00:00:00 2001 From: Hitalo Souza Date: Sat, 2 Nov 2024 20:20:35 -0400 Subject: [PATCH 5/6] The compiler is optimizing for this. Since the change is longer to write and is a bit less expressive (checking the `len` is not exclusive to strings; while it is fairly clear in this context, in others it can create more mental overhead to grasp what is happening when reading the code) using `== ` has become something that v vet recommends to check for an empty string. --- vlib/time/parse.c.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vlib/time/parse.c.v b/vlib/time/parse.c.v index 6feaeb2c619aae..413f23a2dc20ad 100644 --- a/vlib/time/parse.c.v +++ b/vlib/time/parse.c.v @@ -135,7 +135,7 @@ fn check_and_extract_date(s string) !(int, int, int) { // the differences between ISO-8601 and RFC 3339. @[direct_array_access] pub fn parse_rfc3339(s string) !Time { - if s.len == 0 { + if s == '' { return error_invalid_time(0, 'datetime string is empty') } From a8db5e197b329239a7743ee458ab5b061980dd2b Mon Sep 17 00:00:00 2001 From: Hitalo Souza Date: Sat, 2 Nov 2024 20:21:20 -0400 Subject: [PATCH 6/6] doc: remove duplicated --- vlib/time/parse.c.v | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vlib/time/parse.c.v b/vlib/time/parse.c.v index 413f23a2dc20ad..46c515c382ef09 100644 --- a/vlib/time/parse.c.v +++ b/vlib/time/parse.c.v @@ -126,10 +126,6 @@ fn check_and_extract_date(s string) !(int, int, int) { return year, month, day } -// parse_rfc3339 returns the time from a date string in RFC 3339 datetime format. -// See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of -// the differences between ISO-8601 and RFC 3339. - // parse_rfc3339 returns the time from a date string in RFC 3339 datetime format. // See also https://ijmacd.github.io/rfc3339-iso8601/ for a visual reference of // the differences between ISO-8601 and RFC 3339.