Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use is_whitespace more #202

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 24 additions & 22 deletions src/scanner.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ is_s_white(c::Char) = c == yaml_1_2_s_space || c == yaml_1_2_s_tab
# YAML 1.2 [37] ns-ascii-letter ::= [x41-x5A] | [x61-x7A] # A-Z a-z
is_ns_ascii_letter(c::Char) = 'A' ≤ c ≤ 'Z' || 'a' ≤ c ≤ 'z'

is_whitespace(::YAMLV1_1, c::Char) = c == '\0' || c == ' ' || c == '\t' || is_b_char(YAMLV1_1(), c)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Counting \0 as whitespace seems suspicious but maybe that's the best way to make sense of the existing code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't understood this whitespace meaning yet but only mechanically replacing existing code.


struct SimpleKey
token_number::UInt64
required::Bool
Expand Down Expand Up @@ -363,8 +365,6 @@ end
# Checkers
# --------

const whitespace = "\0 \t\r\n\u0085\u2028\u2029"


function check_directive(stream::TokenStream)
stream.column == 0
Expand All @@ -373,31 +373,31 @@ end
function check_document_start(stream::TokenStream)
stream.column == 0 &&
prefix(stream.input, 3) == "---" &&
in(peek(stream.input, 3), whitespace)
is_whitespace(YAMLV1_1(), peek(stream.input, 3))
end

function check_document_end(stream::TokenStream)
stream.column == 0 &&
prefix(stream.input, 3) == "..." &&
(in(peek(stream.input, 3), whitespace) || peek(stream.input, 3) === nothing)
(is_whitespace(YAMLV1_1(), peek(stream.input, 3)) || peek(stream.input, 3) === nothing)
end

function check_block_entry(stream::TokenStream)
in(peek(stream.input, 1), whitespace)
is_whitespace(YAMLV1_1(), peek(stream.input, 1))
end

function check_key(stream::TokenStream)
stream.flow_level > 0 || in(peek(stream.input, 1), whitespace)
stream.flow_level > 0 || is_whitespace(YAMLV1_1(), peek(stream.input, 1))
end

function check_value(stream::TokenStream)
cnext = peek(stream.input, 1)
stream.flow_level > 0 || in(cnext, whitespace) || cnext === nothing
stream.flow_level > 0 || is_whitespace(YAMLV1_1(), cnext) || cnext === nothing
end

function check_plain(stream::TokenStream)
!in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029-?:,[]{}#&*!|>\'\"%@`\uFEFF") ||
(!in(peek(stream.input, 1), whitespace) &&
(!is_whitespace(YAMLV1_1(), peek(stream.input, 1)) &&
(peek(stream.input) == '-' || (stream.flow_level == 0 &&
in(peek(stream.input), "?:"))))
end
Expand Down Expand Up @@ -1013,9 +1013,10 @@ function scan_anchor(stream::TokenStream, ::Type{T}) where {T<:Token}
end
value = prefix(stream.input, length)
forwardchars!(stream, length)
if !in(peek(stream.input), "\0 \t\r\n\u0085\u2028\u2029?:,]}%@`")
c = peek(stream.input)
if !(is_whitespace(YAMLV1_1(), c) || in(c, "?:,]}%@`"))
throw(ScannerError("while scanning an $(name)", start_mark,
"expected an alphanumeric character, but found '$(peek(stream.input))'",
"expected an alphanumeric character, but found '$c'",
get_mark(stream)))
end
end_mark = get_mark(stream)
Expand All @@ -1036,7 +1037,7 @@ function scan_tag(stream::TokenStream)
get_mark(stream)))
end
forwardchars!(stream)
elseif in(c, "\0 \t\r\n\u0085\u2028\u2029")
elseif is_whitespace(YAMLV1_1(), c)
handle = nothing
suffix = '!'
forwardchars!(stream)
Expand Down Expand Up @@ -1288,8 +1289,10 @@ function scan_flow_scalar_non_spaces(stream::TokenStream, double::Bool,
chunks = Any[]
while true
length = 0
while !in(peek(stream.input, length), "\'\"\\\0 \t\r\n\u0085\u2028\u2029")
c = peek(stream.input, length)
while !(in(c, "\'\"\\") || is_whitespace(YAMLV1_1(), c))
length += 1
c = peek(stream.input, length)
end
if length > 0
push!(chunks, prefix(stream.input, length))
Expand Down Expand Up @@ -1377,8 +1380,7 @@ function scan_flow_scalar_breaks(stream::TokenStream, double::Bool,
chunks = Any[]
while true
pref = prefix(stream.input, 3)
if pref == "---" || pref == "..." &&
in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029")
if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3))
throw(ScannerError("while scanning a quoted scalar", start_mark,
"found unexpected document seperator",
get_mark(stream)))
Expand Down Expand Up @@ -1422,10 +1424,10 @@ function scan_plain(stream::TokenStream)
while true
c = peek(stream.input, length)
cnext = peek(stream.input, length + 1)
if in(c, whitespace) ||
if is_whitespace(YAMLV1_1(), c) ||
c === nothing ||
(stream.flow_level == 0 && c == ':' &&
(cnext === nothing || in(cnext, whitespace))) ||
(cnext === nothing || is_whitespace(YAMLV1_1(), cnext))) ||
(stream.flow_level != 0 && in(c, ",:?[]{}"))
break
end
Expand All @@ -1434,8 +1436,10 @@ function scan_plain(stream::TokenStream)

# It's not clear what we should do with ':' in the flow context.
c = peek(stream.input)
if stream.flow_level != 0 && c == ':' &&
!in(peek(stream.input, length + 1), "\0 \t\r\n\u0085\u2028\u2029,[]{}")
if stream.flow_level != 0 && c == ':' && begin
cnext = peek(stream.input, length + 1)
!(is_whitespace(YAMLV1_1(), cnext) || in(cnext, ",[]{}"))
end
forwardchars!(stream, length)
throw(ScannerError("while scanning a plain scalar", start_mark,
"found unexpected ':'", get_mark(stream)))
Expand Down Expand Up @@ -1479,8 +1483,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer,
return Any[]
end
pref = prefix(stream.input, 3)
if pref == "---" || pref == "..." &&
in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029")
if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3))
return Any[]
end

Expand All @@ -1494,8 +1497,7 @@ function scan_plain_spaces(stream::TokenStream, indent::Integer,
return Any[]
end
pref = prefix(stream.input, 3)
if pref == "---" || pref == "..." &&
in(peek(stream.input, 3), "\0 \t\r\n\u0085\u2028\u2029")
if pref == "---" || pref == "..." && is_whitespace(YAMLV1_1(), peek(stream.input, 3))
return Any[]
end
end
Expand Down
Loading