-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Correctly handle CSV files with a single separator throughout
better auto-detection of CSV delimiter - files with a tsv extension are automatically detected as tab delimited - other files parsed as CSV go through the following steps: - if the first line contains at least 3 of the same separator, it uses that separator as a delimiter - if the first line contains only one supported separator character, it uses that separator as a delimiter - otherwise it falls back to treating all supported delimiters as the delimiter supported delimiters, in precedence order: - comma `,` - semi-colon `;` - tab `\t` - pipe `|`
- Loading branch information
1 parent
498df11
commit 8d94574
Showing
9 changed files
with
401 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
%YAML 1.2 | ||
--- | ||
# See http://www.sublimetext.com/docs/3/syntax.html | ||
name: Pipe Separated Values | ||
scope: text.csv.pipe | ||
variables: | ||
field_separator: (?:\|) | ||
record_separator: (?:$\n?) | ||
|
||
contexts: | ||
main: | ||
- match: '^' | ||
push: fields | ||
|
||
fields: | ||
- include: record_separator | ||
- match: '' | ||
push: | ||
- field_or_record_separator | ||
- field5 | ||
- field_or_record_separator | ||
- field4 | ||
- field_or_record_separator | ||
- field3 | ||
- field_or_record_separator | ||
- field2 | ||
- field_or_record_separator | ||
- field1 | ||
|
||
record_separator_pop: | ||
- match: (?={{record_separator}}) | ||
pop: true | ||
|
||
record_separator: | ||
- meta_include_prototype: false | ||
- match: '{{record_separator}}' | ||
scope: punctuation.terminator.record.csv | ||
pop: true | ||
|
||
field_or_record_separator: | ||
- meta_include_prototype: false | ||
- include: record_separator_pop | ||
- match: '{{field_separator}}' | ||
scope: punctuation.separator.sequence.csv | ||
pop: true | ||
|
||
field_contents: | ||
- match: '"' | ||
scope: punctuation.definition.string.begin.csv | ||
push: scope:text.csv#double_quoted_string | ||
|
||
- include: record_separator_pop | ||
- match: (?={{field_separator}}) | ||
pop: true | ||
|
||
field1: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-1.csv variable.parameter | ||
- include: field_contents | ||
field2: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-2.csv support.function | ||
- include: field_contents | ||
field3: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-3.csv constant.numeric | ||
- include: field_contents | ||
field4: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-4.csv keyword.operator | ||
- include: field_contents | ||
field5: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-5.csv string.unquoted | ||
- include: field_contents |
79 changes: 79 additions & 0 deletions
79
assets/syntaxes/02_Extra/CSV/CSV-semi-colon.sublime-syntax
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
%YAML 1.2 | ||
--- | ||
# See http://www.sublimetext.com/docs/3/syntax.html | ||
name: Semi-Colon Separated Values | ||
scope: text.csv.semi-colon | ||
variables: | ||
field_separator: (?:;) | ||
record_separator: (?:$\n?) | ||
contexts: | ||
main: | ||
- match: '^' | ||
push: fields | ||
|
||
fields: | ||
- include: record_separator | ||
- match: '' | ||
push: | ||
- field_or_record_separator | ||
- field5 | ||
- field_or_record_separator | ||
- field4 | ||
- field_or_record_separator | ||
- field3 | ||
- field_or_record_separator | ||
- field2 | ||
- field_or_record_separator | ||
- field1 | ||
|
||
record_separator_pop: | ||
- match: (?={{record_separator}}) | ||
pop: true | ||
|
||
record_separator: | ||
- meta_include_prototype: false | ||
- match: '{{record_separator}}' | ||
scope: punctuation.terminator.record.csv | ||
pop: true | ||
|
||
field_or_record_separator: | ||
- meta_include_prototype: false | ||
- include: record_separator_pop | ||
- match: '{{field_separator}}' | ||
scope: punctuation.separator.sequence.csv | ||
pop: true | ||
|
||
field_contents: | ||
- match: '"' | ||
scope: punctuation.definition.string.begin.csv | ||
push: scope:text.csv#double_quoted_string | ||
|
||
- include: record_separator_pop | ||
- match: (?={{field_separator}}) | ||
pop: true | ||
|
||
field1: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-1.csv variable.parameter | ||
- include: field_contents | ||
field2: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-2.csv support.function | ||
- include: field_contents | ||
field3: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-3.csv constant.numeric | ||
- include: field_contents | ||
field4: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-4.csv keyword.operator | ||
- include: field_contents | ||
field5: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-5.csv string.unquoted | ||
- include: field_contents |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
%YAML 1.2 | ||
--- | ||
# See http://www.sublimetext.com/docs/3/syntax.html | ||
name: Separated Values | ||
file_extensions: | ||
- csv | ||
scope: text.csv | ||
variables: | ||
field_separator_chars: ',;\t|' | ||
field_separator: (?:[{{field_separator_chars}}]) | ||
record_separator: (?:$\n?) | ||
contexts: | ||
main: | ||
- meta_include_prototype: false | ||
- include: three_field_separators | ||
- include: single_separator_type_on_line | ||
- match: '^' | ||
push: unknown-separated-main | ||
|
||
three_field_separators: | ||
- match: ^(?=(?:[^,]*,){3}) | ||
set: scope:text.csv.comma | ||
- match: ^(?=(?:[^;]*;){3}) | ||
set: scope:text.csv.semi-colon | ||
- match: ^(?=(?:[^\t]*\t){3}) | ||
set: scope:text.csv.tab | ||
- match: ^(?=(?:[^|]*\|){3}) | ||
set: scope:text.csv.pipe | ||
|
||
single_separator_type_on_line: | ||
- match: ^(?=[^{{field_separator_chars}}]*,[^;\t|]*$) | ||
set: scope:text.csv.comma | ||
- match: ^(?=[^{{field_separator_chars}}]*;[^,\t|]*$) | ||
set: scope:text.csv.semi-colon | ||
- match: ^(?=[^{{field_separator_chars}}]*\t[^,;|]*$) | ||
set: scope:text.csv.tab | ||
- match: ^(?=[^{{field_separator_chars}}]*\|[^,;\t]*$) | ||
set: scope:text.csv.pipe | ||
|
||
unknown-separated-main: | ||
- include: record_separator | ||
- match: '' | ||
push: | ||
- field_or_record_separator | ||
- field5 | ||
- field_or_record_separator | ||
- field4 | ||
- field_or_record_separator | ||
- field3 | ||
- field_or_record_separator | ||
- field2 | ||
- field_or_record_separator | ||
- field1 | ||
|
||
record_separator_pop: | ||
- match: (?={{record_separator}}) | ||
pop: true | ||
|
||
record_separator: | ||
- meta_include_prototype: false | ||
- match: '{{record_separator}}' | ||
scope: punctuation.terminator.record.csv | ||
|
||
field_or_record_separator: | ||
- meta_include_prototype: false | ||
- include: record_separator_pop | ||
- match: '{{field_separator}}' | ||
scope: punctuation.separator.sequence.csv | ||
pop: true | ||
|
||
field_contents: | ||
- match: '"' | ||
scope: punctuation.definition.string.begin.csv | ||
push: double_quoted_string | ||
|
||
- include: record_separator_pop | ||
- match: (?={{field_separator}}) | ||
pop: true | ||
|
||
double_quoted_string: | ||
- meta_include_prototype: false | ||
- meta_scope: string.quoted.double.csv | ||
- match: '""' | ||
scope: constant.character.escape.csv | ||
- match: '"' | ||
scope: punctuation.definition.string.end.csv | ||
pop: true | ||
|
||
field1: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-1.csv variable.parameter | ||
- include: field_contents | ||
field2: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-2.csv support.function | ||
- include: field_contents | ||
field3: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-3.csv constant.numeric | ||
- include: field_contents | ||
field4: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-4.csv keyword.operator | ||
- include: field_contents | ||
field5: | ||
- match: '' | ||
set: | ||
- meta_content_scope: meta.field-5.csv string.unquoted | ||
- include: field_contents |
Oops, something went wrong.