Skip to content

Commit

Permalink
Correctly handle CSV files with a single separator throughout
Browse files Browse the repository at this point in the history
better auto-detection of CSV delimiter
- files with a tsv extension are automatically detected as tab delimited
- other files parsed as CSV go through the following steps:
  - if the first line contains at least 3 of the same separator, it uses that separator as a delimiter
  - if the first line contains only one supported separator character, it uses that separator as a delimiter
  - otherwise it falls back to treating all supported delimiters as the delimiter

 supported delimiters, in precedence order:
 - comma `,`
 - semi-colon `;`
 - tab `\t`
 - pipe `|`
  • Loading branch information
keith-hall authored and Keith Hall committed Jan 24, 2025
1 parent 498df11 commit 8d94574
Show file tree
Hide file tree
Showing 9 changed files with 401 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,21 @@
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Comma Separated Values
file_extensions:
- csv
- tsv
scope: text.csv
scope: text.csv.comma
variables:
field_separator: (?:[,;|\t])
field_separator: (?:,)
record_separator: (?:$\n?)
contexts:
prototype:
- match: (?={{record_separator}})
pop: true
main:
- match: '^'
push: fields

fields:
- match: ""
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
Expand All @@ -24,54 +25,55 @@ contexts:
- field2
- field_or_record_separator
- field1
main:
- meta_include_prototype: false
- match: "^"
set: fields

field_or_record_separator:
record_separator_pop:
- match: (?={{record_separator}})
pop: true

record_separator:
- meta_include_prototype: false
- match: "{{record_separator}}"
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
pop: true
- match: "{{field_separator}}"

field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true

field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: double_quoted_string

- match: (?={{field_separator}}|{{record_separator}})
pop: true
push: scope:text.csv#double_quoted_string

double_quoted_string:
- meta_include_prototype: false
- meta_scope: string.quoted.double.csv
- match: '""'
scope: constant.character.escape.csv
- match: '"'
scope: punctuation.definition.string.end.csv
- include: record_separator_pop
- match: (?={{field_separator}})
pop: true

field1:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-1.csv support.type
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ""
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents
80 changes: 80 additions & 0 deletions assets/syntaxes/02_Extra/CSV/CSV-pipe.sublime-syntax
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Pipe Separated Values
scope: text.csv.pipe
variables:
field_separator: (?:\|)
record_separator: (?:$\n?)

contexts:
main:
- match: '^'
push: fields

fields:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1

record_separator_pop:
- match: (?={{record_separator}})
pop: true

record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
pop: true

field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true

field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: scope:text.csv#double_quoted_string

- include: record_separator_pop
- match: (?={{field_separator}})
pop: true

field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents
79 changes: 79 additions & 0 deletions assets/syntaxes/02_Extra/CSV/CSV-semi-colon.sublime-syntax
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Semi-Colon Separated Values
scope: text.csv.semi-colon
variables:
field_separator: (?:;)
record_separator: (?:$\n?)
contexts:
main:
- match: '^'
push: fields

fields:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1

record_separator_pop:
- match: (?={{record_separator}})
pop: true

record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv
pop: true

field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true

field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: scope:text.csv#double_quoted_string

- include: record_separator_pop
- match: (?={{field_separator}})
pop: true

field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents
113 changes: 113 additions & 0 deletions assets/syntaxes/02_Extra/CSV/CSV.sublime-syntax
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
%YAML 1.2
---
# See http://www.sublimetext.com/docs/3/syntax.html
name: Separated Values
file_extensions:
- csv
scope: text.csv
variables:
field_separator_chars: ',;\t|'
field_separator: (?:[{{field_separator_chars}}])
record_separator: (?:$\n?)
contexts:
main:
- meta_include_prototype: false
- include: three_field_separators
- include: single_separator_type_on_line
- match: '^'
push: unknown-separated-main

three_field_separators:
- match: ^(?=(?:[^,]*,){3})
set: scope:text.csv.comma
- match: ^(?=(?:[^;]*;){3})
set: scope:text.csv.semi-colon
- match: ^(?=(?:[^\t]*\t){3})
set: scope:text.csv.tab
- match: ^(?=(?:[^|]*\|){3})
set: scope:text.csv.pipe

single_separator_type_on_line:
- match: ^(?=[^{{field_separator_chars}}]*,[^;\t|]*$)
set: scope:text.csv.comma
- match: ^(?=[^{{field_separator_chars}}]*;[^,\t|]*$)
set: scope:text.csv.semi-colon
- match: ^(?=[^{{field_separator_chars}}]*\t[^,;|]*$)
set: scope:text.csv.tab
- match: ^(?=[^{{field_separator_chars}}]*\|[^,;\t]*$)
set: scope:text.csv.pipe

unknown-separated-main:
- include: record_separator
- match: ''
push:
- field_or_record_separator
- field5
- field_or_record_separator
- field4
- field_or_record_separator
- field3
- field_or_record_separator
- field2
- field_or_record_separator
- field1

record_separator_pop:
- match: (?={{record_separator}})
pop: true

record_separator:
- meta_include_prototype: false
- match: '{{record_separator}}'
scope: punctuation.terminator.record.csv

field_or_record_separator:
- meta_include_prototype: false
- include: record_separator_pop
- match: '{{field_separator}}'
scope: punctuation.separator.sequence.csv
pop: true

field_contents:
- match: '"'
scope: punctuation.definition.string.begin.csv
push: double_quoted_string

- include: record_separator_pop
- match: (?={{field_separator}})
pop: true

double_quoted_string:
- meta_include_prototype: false
- meta_scope: string.quoted.double.csv
- match: '""'
scope: constant.character.escape.csv
- match: '"'
scope: punctuation.definition.string.end.csv
pop: true

field1:
- match: ''
set:
- meta_content_scope: meta.field-1.csv variable.parameter
- include: field_contents
field2:
- match: ''
set:
- meta_content_scope: meta.field-2.csv support.function
- include: field_contents
field3:
- match: ''
set:
- meta_content_scope: meta.field-3.csv constant.numeric
- include: field_contents
field4:
- match: ''
set:
- meta_content_scope: meta.field-4.csv keyword.operator
- include: field_contents
field5:
- match: ''
set:
- meta_content_scope: meta.field-5.csv string.unquoted
- include: field_contents
Loading

0 comments on commit 8d94574

Please sign in to comment.