Skip to content

Commit fd3a60c

Browse files
committed
Support writing linear-tsv-style with escape="sep"
1 parent 788df43 commit fd3a60c

File tree

2 files changed

+58
-12
lines changed

2 files changed

+58
-12
lines changed

R/vroom_write.R

+23-3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#' @param escape The type of escape to use when quotes are in the data.
66
#' - `double` - quotes are escaped by doubling them.
77
#' - `backslash` - quotes are escaped by a preceding backslash.
8+
#' - `sep` - tabs, newlines, and backslashes are escaped as `\t`, `\n`, and `\\`
89
#' - `none` - quotes are not escaped.
910
#' @param quote How to handle fields which contain characters that need to be
1011
#' quoted.
@@ -37,7 +38,7 @@
3738
#' # vroom_write(mtcars, "mtcars.tsv.xz")
3839
vroom_write <- function(x, file, delim = '\t', eol = "\n", na = "NA", col_names = !append,
3940
append = FALSE, quote = c("needed", "all", "none"), escape =
40-
c("double", "backslash", "none"), bom = FALSE, num_threads =
41+
c("double", "backslash", "sep", "none"), bom = FALSE, num_threads =
4142
vroom_threads(), progress = vroom_progress(), path = deprecated()) {
4243

4344
if (lifecycle::is_present(path)) {
@@ -53,6 +54,15 @@ vroom_write <- function(x, file, delim = '\t', eol = "\n", na = "NA", col_names
5354
quote <- match.arg(quote)
5455
escape <- match.arg(escape)
5556

57+
if (escape == "sep") {
58+
if (!all(c(delim, eol) %in% c("\t", "\n", "\r", "\r\n"))) {
59+
stop("Can only escape separators `\\t`, `\\n`, and `\\r`")
60+
}
61+
if (quote != "none") {
62+
warning("quotes in data will not be escaped with `escape = sep`")
63+
}
64+
}
65+
5666
opts <- get_vroom_write_opts(quote, escape, bom)
5767

5868
# Standardise path returns a list, but we will only ever have 1 output file.
@@ -109,7 +119,8 @@ vroom_write_opts <- function() c(
109119
"quote_all" = 2L,
110120
"escape_double" = 4L,
111121
"escape_backslash" = 8L,
112-
"bom" = 16L
122+
"bom" = 16L,
123+
"escape_sep" = 32L
113124
)
114125

115126
#' Convert a data frame to a delimited string
@@ -121,7 +132,7 @@ vroom_write_opts <- function() c(
121132
#' @inheritParams vroom_write
122133
#' @export
123134
vroom_format <- function(x, delim = "\t", eol = "\n", na = "NA", col_names = TRUE,
124-
escape = c("double", "backslash", "none"),
135+
escape = c("double", "backslash", "sep", "none"),
125136
quote = c("needed", "all", "none"),
126137
bom = FALSE,
127138
num_threads = vroom_threads()) {
@@ -135,6 +146,15 @@ vroom_format <- function(x, delim = "\t", eol = "\n", na = "NA", col_names = TRU
135146
quote <- match.arg(quote)
136147
escape <- match.arg(escape)
137148

149+
if (escape == "sep") {
150+
if (!all(c(delim, eol) %in% c("\t", "\n", "\r", "\r\n"))) {
151+
stop("Can only escape separators `\\t`, `\\n`, and `\\r`")
152+
}
153+
if (quote != "none") {
154+
warning("quotes in data will not be escaped with `escape = sep`")
155+
}
156+
}
157+
138158
opts <- get_vroom_write_opts(quote, escape, bom)
139159

140160
# This seems to work ok in practice

src/vroom_write.cc

+35-9
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ typedef enum {
2020
quote_all = 2,
2121
escape_double = 4,
2222
escape_backslash = 8,
23-
bom = 16
23+
bom = 16,
24+
escape_sep = 32
2425
} vroom_write_opt_t;
2526

2627
size_t get_buffer_size(
@@ -135,16 +136,41 @@ void str_to_buf(
135136
}
136137

137138
auto end = str_p + len;
138-
bool should_escape = options & (escape_double | escape_backslash);
139-
auto escape =
140-
options & escape_double ? '"' : options & escape_backslash ? '\\' : '\0';
141-
142139
buf.reserve(buf.size() + len);
143-
while (str_p < end) {
144-
if (should_escape && *str_p == '"') {
145-
buf.push_back(escape);
140+
141+
if (options & escape_sep) {
142+
while (str_p < end) {
143+
if (*str_p == '\t') {
144+
buf.push_back('\\');
145+
buf.push_back('t');
146+
++str_p;
147+
} else if (*str_p == '\n') {
148+
buf.push_back('\\');
149+
buf.push_back('n');
150+
++str_p;
151+
} else if (*str_p == '\r') {
152+
buf.push_back('\\');
153+
buf.push_back('r');
154+
++str_p;
155+
} else if (*str_p == '\\') {
156+
buf.push_back('\\');
157+
buf.push_back('\\');
158+
++str_p;
159+
} else {
160+
buf.push_back(*str_p++);
161+
}
162+
}
163+
} else {
164+
bool should_escape = options & (escape_double | escape_backslash);
165+
auto escape =
166+
options & escape_double ? '"' : options & escape_backslash ? '\\' : '\0';
167+
168+
while (str_p < end) {
169+
if (should_escape && *str_p == '"') {
170+
buf.push_back(escape);
171+
}
172+
buf.push_back(*str_p++);
146173
}
147-
buf.push_back(*str_p++);
148174
}
149175

150176
if (should_quote) {

0 commit comments

Comments
 (0)