From 98ebda0b981d66b62c144e22034cd13635bbebe0 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 27 Feb 2025 19:43:43 +0000 Subject: [PATCH 01/11] ignore string contents optionally --- NEWS.md | 1 + R/line_length_linter.R | 51 +++++++++++++++++++++-- R/utils.R | 8 +++- man/line_length_linter.Rd | 8 +++- tests/testthat/test-line_length_linter.R | 53 ++++++++++++++++++++++++ 5 files changed, 114 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 218223120a..112659d4d9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -23,6 +23,7 @@ * `indentation_linter()` handles `for` un-braced for loops correctly (#2564, @MichaelChirico). * Setting `exclusions` supports globs like `knitr*` to exclude files/directories with a pattern (#1554, @MichaelChirico). * `object_name_linter()` and `object_length_linter()` apply to objects assigned with `assign()` or generics created with `setGeneric()` (#1665, @MichaelChirico). +* `line_length_linter()` has a new argument `ignore_string_bodies` (defaulting to `FALSE`) which governs whether the contents of multi-line string bodies should be linted (#856, @MichaelChirico). We think the biggest use case for this is writing SQL in R strings, especially in cases where the recommended string with for SQL & R differ. ### Lint accuracy fixes: removing false positives diff --git a/R/line_length_linter.R b/R/line_length_linter.R index c8780c6d8b..55eafc5e58 100644 --- a/R/line_length_linter.R +++ b/R/line_length_linter.R @@ -2,7 +2,10 @@ #' #' Check that the line length of both comments and code is less than `length`. #' -#' @param length maximum line length allowed. Default is 80L (Hollerith limit). +#' @param length Maximum line length allowed. Default is `80L` (Hollerith limit). +#' @param ignore_string_bodies Logical, default `FALSE`. If `TRUE`, the contents +#' of string literals are ignored. The quotes themselves are included, so this +#' mainly affects wide multiline strings, e.g. SQL queries. #' #' @examples #' # will produce lints @@ -22,14 +25,20 @@ #' - [linters] for a complete list of linters available in lintr. #' - #' @export -line_length_linter <- function(length = 80L) { +line_length_linter <- function(length = 80L, ignore_string_bodies = FALSE) { general_msg <- paste("Lines should not be more than", length, "characters.") Linter(linter_level = "file", function(source_expression) { # Only go over complete file - line_lengths <- nchar(source_expression$file_lines) + line_lengths <- as.integer(nchar(source_expression$file_lines)) long_lines <- which(line_lengths > length) + if (ignore_string_bodies) { + in_string_body_idx <- + is_in_string_body(source_expression$full_parsed_content, length, long_lines) + long_lines <- long_lines[!in_string_body_idx] + } + Map( function(long_line, line_length) { Lint( @@ -47,3 +56,39 @@ line_length_linter <- function(length = 80L) { ) }) } + +is_in_string_body <- function(parse_data, max_length, long_idx) { + str_idx <- parse_data$token == "STR_CONST" + if (!any(str_idx)) { + return(rep(FALSE, length(long_idx))) + } + str_data <- parse_data[str_idx, ] + if (all(str_data$line1 == str_data$line2)) { + return(rep(FALSE, length(long_idx))) + } + # right delimiter just ends at 'col2', but 'col1' takes some sleuthing + str_data$line1_width <- nchar(vapply( + strsplit(str_data$text, "\n", fixed = TRUE), + function(x) x[1L], + FUN.VALUE = character(1L), + USE.NAMES = FALSE + )) + str_data$col1_end <- str_data$col1 + str_data$line1_width + vapply( + long_idx, + function(line) { + # strictly inside a multi-line string body + if (any(str_data$line1 < line & str_data$line2 > line)) { + return(TRUE) + } + on_line1_idx <- str_data$line1 == line + if (any(on_line1_idx)) { + return(max(str_data$col1_end[on_line1_idx]) <= max_length) + } + # use parse data to capture possible trailing expressions on this line + on_line2_idx <- parse_data$line2 == line + any(on_line2_idx) && max(parse_data$col2[on_line2_idx]) <= max_length + }, + logical(1L) + ) +} diff --git a/R/utils.R b/R/utils.R index 37d37ef296..32b12861c5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -241,8 +241,12 @@ get_r_string <- function(s, xpath = NULL) { s <- xml_find_chr(s, sprintf("string(%s)", xpath)) } } - # parse() skips "" elements --> offsets the length of the output, - # but NA in --> NA out + r_string_from_parse_text(s) +} + +# parse() skips "" elements --> offsets the length of the output, +# but NA in --> NA out +r_string_from_parse_text <- function(s) { is.na(s) <- !nzchar(s) out <- as.character(parse(text = s, keep.source = FALSE)) is.na(out) <- is.na(s) diff --git a/man/line_length_linter.Rd b/man/line_length_linter.Rd index 5585139d21..4305e5df56 100644 --- a/man/line_length_linter.Rd +++ b/man/line_length_linter.Rd @@ -4,10 +4,14 @@ \alias{line_length_linter} \title{Line length linter} \usage{ -line_length_linter(length = 80L) +line_length_linter(length = 80L, ignore_string_bodies = FALSE) } \arguments{ -\item{length}{maximum line length allowed. Default is 80L (Hollerith limit).} +\item{length}{Maximum line length allowed. Default is \code{80L} (Hollerith limit).} + +\item{ignore_string_bodies}{Logical, default \code{FALSE}. If \code{TRUE}, the contents +of string literals are ignored. The quotes themselves are included, so this +mainly affects wide multiline strings, e.g. SQL queries.} } \description{ Check that the line length of both comments and code is less than \code{length}. diff --git a/tests/testthat/test-line_length_linter.R b/tests/testthat/test-line_length_linter.R index 5e22fc523b..26d79eb121 100644 --- a/tests/testthat/test-line_length_linter.R +++ b/tests/testthat/test-line_length_linter.R @@ -71,3 +71,56 @@ test_that("Multiple lints give custom messages", { line_length_linter(5L) ) }) + +test_that("string bodies can be ignored", { + linter <- line_length_linter(10L, ignore_string_bodies = TRUE) + lint_msg <- rex::rex("Lines should not be more than 10 characters. This line is 15 characters.") + + expect_no_lint( + trim_some(" + 1234567890 + str <- ' + 123456789012345 + ' + "), + linter + ) + + expect_no_lint( + trim_some(" + 1234567890 + str <- '90 + 123456789012345 + 123456789' + "), + linter + ) + + expect_lint( + trim_some(" + 1234567890 + str <- '9012345 + 1234567890 + 123456789' + "), + lint_msg, + linter + ) + + expect_lint( + trim_some(" + 1234567890 + str <- '90 + 1234567890 + 12345678'; 2345 + "), + lint_msg, + linter + ) + + expect_lint( + "'1'; '2'; '345'", + lint_msg, + linter + ) +}) From 1478a4c56f139f4c876d06e1c2ab255d488db7f4 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 27 Feb 2025 19:45:27 +0000 Subject: [PATCH 02/11] expect_no_lint --- tests/testthat/test-line_length_linter.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-line_length_linter.R b/tests/testthat/test-line_length_linter.R index 26d79eb121..2b74173621 100644 --- a/tests/testthat/test-line_length_linter.R +++ b/tests/testthat/test-line_length_linter.R @@ -1,8 +1,8 @@ test_that("line_length_linter skips allowed usages", { linter <- line_length_linter(80L) - expect_lint("blah", NULL, linter) - expect_lint(strrep("x", 80L), NULL, linter) + expect_no_lint("blah", linter) + expect_no_lint(strrep("x", 80L), linter) }) test_that("line_length_linter blocks disallowed usages", { @@ -37,7 +37,7 @@ test_that("line_length_linter blocks disallowed usages", { linter <- line_length_linter(20L) lint_msg <- rex::rex("Lines should not be more than 20 characters. This line is 22 characters.") - expect_lint(strrep("a", 20L), NULL, linter) + expect_no_lint(strrep("a", 20L), linter) expect_lint( strrep("a", 22L), list( From 75052f49046ed26e20453d1b68aaaf363e1e3ca6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 27 Feb 2025 20:18:08 +0000 Subject: [PATCH 03/11] test with no string input --- tests/testthat/test-line_length_linter.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/testthat/test-line_length_linter.R b/tests/testthat/test-line_length_linter.R index 2b74173621..88caa6b407 100644 --- a/tests/testthat/test-line_length_linter.R +++ b/tests/testthat/test-line_length_linter.R @@ -123,4 +123,10 @@ test_that("string bodies can be ignored", { lint_msg, linter ) + + expect_lint( + "123456789012345", + lint_msg, + linter + ) }) From cdcadd8b8fe27289a15debaa56743374355592fc Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 3 Mar 2025 09:55:48 -0800 Subject: [PATCH 04/11] revert as.integer() --- R/line_length_linter.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/line_length_linter.R b/R/line_length_linter.R index 55eafc5e58..0655131ce4 100644 --- a/R/line_length_linter.R +++ b/R/line_length_linter.R @@ -30,7 +30,7 @@ line_length_linter <- function(length = 80L, ignore_string_bodies = FALSE) { Linter(linter_level = "file", function(source_expression) { # Only go over complete file - line_lengths <- as.integer(nchar(source_expression$file_lines)) + line_lengths <- nchar(source_expression$file_lines) long_lines <- which(line_lengths > length) if (ignore_string_bodies) { From b44c55aa4fb8fd111c488d144239cb1f8dc1b9d6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 3 Mar 2025 18:01:16 +0000 Subject: [PATCH 05/11] more tests against off-by-one issue --- tests/testthat/test-line_length_linter.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/testthat/test-line_length_linter.R b/tests/testthat/test-line_length_linter.R index 88caa6b407..8ad23acdc5 100644 --- a/tests/testthat/test-line_length_linter.R +++ b/tests/testthat/test-line_length_linter.R @@ -86,6 +86,16 @@ test_that("string bodies can be ignored", { linter ) + expect_no_lint( + trim_some(" + 1234567890 + str45 <- ' + 123456789012345 + ' + "), + linter + ) + expect_no_lint( trim_some(" 1234567890 @@ -96,6 +106,20 @@ test_that("string bodies can be ignored", { linter ) + expect_lint( + trim_some(" + 1234567890 + str456 <- ' + 123456789012345 + ' + "), + list( + list("11 characters", line_number = 2L), + list("11 characters", line_number = 4L) + ), + linter + ) + expect_lint( trim_some(" 1234567890 From 4c0a2102e490128c8f81857dc72ab838a4ed24e7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 29 Jul 2025 12:55:32 -0700 Subject: [PATCH 06/11] typo: with->width --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 7f24875b12..144bd64229 100644 --- a/NEWS.md +++ b/NEWS.md @@ -48,7 +48,7 @@ * `object_usage_linter()` lints missing packages that may cause false positives (#2872, @AshesITR) * New argument `include_s4_slots` for the `xml_find_function_calls()` entry in the `get_source_expressions()` to govern whether calls of the form `s4Obj@fun()` are included in the result (#2820, @MichaelChirico). * `sprintf_linter()` lints `sprintf()` and `gettextf()` calls when a constant string is passed to `fmt` (#2894, @Bisaloo). -* `line_length_linter()` has a new argument `ignore_string_bodies` (defaulting to `FALSE`) which governs whether the contents of multi-line string bodies should be linted (#856, @MichaelChirico). We think the biggest use case for this is writing SQL in R strings, especially in cases where the recommended string with for SQL & R differ. +* `line_length_linter()` has a new argument `ignore_string_bodies` (defaulting to `FALSE`) which governs whether the contents of multi-line string bodies should be linted (#856, @MichaelChirico). We think the biggest use case for this is writing SQL in R strings, especially in cases where the recommended string width for SQL & R differ. ### New linters From b7ff1e31ecd8642913c4b3dd4c519416ee1f9240 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Oct 2025 08:47:28 -0700 Subject: [PATCH 07/11] try simpler version Co-authored-by: Hugo Gruson <10783929+Bisaloo@users.noreply.github.com> --- R/line_length_linter.R | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/R/line_length_linter.R b/R/line_length_linter.R index 0655131ce4..ac45aec223 100644 --- a/R/line_length_linter.R +++ b/R/line_length_linter.R @@ -66,28 +66,11 @@ is_in_string_body <- function(parse_data, max_length, long_idx) { if (all(str_data$line1 == str_data$line2)) { return(rep(FALSE, length(long_idx))) } - # right delimiter just ends at 'col2', but 'col1' takes some sleuthing - str_data$line1_width <- nchar(vapply( - strsplit(str_data$text, "\n", fixed = TRUE), - function(x) x[1L], - FUN.VALUE = character(1L), - USE.NAMES = FALSE - )) - str_data$col1_end <- str_data$col1 + str_data$line1_width vapply( long_idx, function(line) { # strictly inside a multi-line string body - if (any(str_data$line1 < line & str_data$line2 > line)) { - return(TRUE) - } - on_line1_idx <- str_data$line1 == line - if (any(on_line1_idx)) { - return(max(str_data$col1_end[on_line1_idx]) <= max_length) - } - # use parse data to capture possible trailing expressions on this line - on_line2_idx <- parse_data$line2 == line - any(on_line2_idx) && max(parse_data$col2[on_line2_idx]) <= max_length + any(str_data$line1 < line & str_data$line2 > line) }, logical(1L) ) From 72f53bb3f58171d713bba5bc52c2299ff853a5bb Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Oct 2025 16:36:03 +0000 Subject: [PATCH 08/11] Revert "try simpler version" This reverts commit b7ff1e31ecd8642913c4b3dd4c519416ee1f9240. --- R/line_length_linter.R | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/R/line_length_linter.R b/R/line_length_linter.R index ac45aec223..0655131ce4 100644 --- a/R/line_length_linter.R +++ b/R/line_length_linter.R @@ -66,11 +66,28 @@ is_in_string_body <- function(parse_data, max_length, long_idx) { if (all(str_data$line1 == str_data$line2)) { return(rep(FALSE, length(long_idx))) } + # right delimiter just ends at 'col2', but 'col1' takes some sleuthing + str_data$line1_width <- nchar(vapply( + strsplit(str_data$text, "\n", fixed = TRUE), + function(x) x[1L], + FUN.VALUE = character(1L), + USE.NAMES = FALSE + )) + str_data$col1_end <- str_data$col1 + str_data$line1_width vapply( long_idx, function(line) { # strictly inside a multi-line string body - any(str_data$line1 < line & str_data$line2 > line) + if (any(str_data$line1 < line & str_data$line2 > line)) { + return(TRUE) + } + on_line1_idx <- str_data$line1 == line + if (any(on_line1_idx)) { + return(max(str_data$col1_end[on_line1_idx]) <= max_length) + } + # use parse data to capture possible trailing expressions on this line + on_line2_idx <- parse_data$line2 == line + any(on_line2_idx) && max(parse_data$col2[on_line2_idx]) <= max_length }, logical(1L) ) From e8a50af95dae53554d55bfbe03aab3e7b1e54131 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Oct 2025 16:38:59 +0000 Subject: [PATCH 09/11] new test --- tests/testthat/test-line_length_linter.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/testthat/test-line_length_linter.R b/tests/testthat/test-line_length_linter.R index 8ad23acdc5..61f1c55abd 100644 --- a/tests/testthat/test-line_length_linter.R +++ b/tests/testthat/test-line_length_linter.R @@ -153,4 +153,7 @@ test_that("string bodies can be ignored", { lint_msg, linter ) + + expect_lint('"short" # 15!!!', lint_msg, linter) + expect_lint('foo("a", long_)', lint_msg, linter) }) From 32d62660d9f9b93c842905e0dece558fe8eae5f3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Oct 2025 16:46:11 +0000 Subject: [PATCH 10/11] example --- R/line_length_linter.R | 28 ++++++++++++++++++++++++++++ man/line_length_linter.Rd | 28 ++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/R/line_length_linter.R b/R/line_length_linter.R index 0655131ce4..ad2a410242 100644 --- a/R/line_length_linter.R +++ b/R/line_length_linter.R @@ -14,12 +14,40 @@ #' linters = line_length_linter(length = 20L) #' ) #' +#' lines <- paste( +#' "query <- '", +#' " SELECT *", +#' " FROM MyTable", +#' " WHERE profit > 0", +#' "'", +#' sep = "\n" +#' ) +#' writeLines(lines) +#' lint( +#' text = lines, +#' linters = line_length_linter(length = 10L) +#' ) +#' #' # okay #' lint( #' text = strrep("x", 21L), #' linters = line_length_linter(length = 40L) #' ) #' +#' lines <- paste( +#' "query <- '", +#' " SELECT *", +#' " FROM MyTable", +#' " WHERE profit > 0", +#' "'", +#' sep = "\n" +#' ) +#' writeLines(lines) +#' lint( +#' text = lines, +#' linters = line_length_linter(length = 10L, ignore_string_bodies = TRUE) +#' ) +#' #' @evalRd rd_tags("line_length_linter") #' @seealso #' - [linters] for a complete list of linters available in lintr. diff --git a/man/line_length_linter.Rd b/man/line_length_linter.Rd index 4305e5df56..43c6196768 100644 --- a/man/line_length_linter.Rd +++ b/man/line_length_linter.Rd @@ -23,12 +23,40 @@ lint( linters = line_length_linter(length = 20L) ) +lines <- paste( + "query <- '", + " SELECT *", + " FROM MyTable", + " WHERE profit > 0", + "'", + sep = "\n" +) +writeLines(lines) +lint( + text = lines, + linters = line_length_linter(length = 10L) +) + # okay lint( text = strrep("x", 21L), linters = line_length_linter(length = 40L) ) +lines <- paste( + "query <- '", + " SELECT *", + " FROM MyTable", + " WHERE profit > 0", + "'", + sep = "\n" +) +writeLines(lines) +lint( + text = lines, + linters = line_length_linter(length = 10L, ignore_string_bodies = TRUE) +) + } \seealso{ \itemize{ From 7f3a02903e4ccb62e4def0acbcde78429bd8c467 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Oct 2025 16:53:51 +0000 Subject: [PATCH 11/11] another example of an inline string --- R/line_length_linter.R | 20 ++++++++++++++++++++ man/line_length_linter.Rd | 20 ++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/R/line_length_linter.R b/R/line_length_linter.R index ad2a410242..0cc51c1fcd 100644 --- a/R/line_length_linter.R +++ b/R/line_length_linter.R @@ -14,6 +14,12 @@ #' linters = line_length_linter(length = 20L) #' ) #' +#' # the trailing ' is counted towards line length, so this still lints +#' lint( +#' text = "'a long single-line string'", +#' linters = line_length_linter(length = 15L, ignore_string_bodies = TRUE) +#' ) +#' #' lines <- paste( #' "query <- '", #' " SELECT *", @@ -35,6 +41,20 @@ #' ) #' #' lines <- paste( +#' "paste(", +#' " 'a long',", +#' " 'single-line',", +#' " 'string'", +#' ")", +#' sep = "\n" +#' ) +#' writeLines(lines) +#' lint( +#' text = lines, +#' linters = line_length_linter(length = 15L, ignore_string_bodies = TRUE) +#' ) +#' +#' lines <- paste( #' "query <- '", #' " SELECT *", #' " FROM MyTable", diff --git a/man/line_length_linter.Rd b/man/line_length_linter.Rd index 43c6196768..8463e43e65 100644 --- a/man/line_length_linter.Rd +++ b/man/line_length_linter.Rd @@ -23,6 +23,12 @@ lint( linters = line_length_linter(length = 20L) ) +# the trailing ' is counted towards line length, so this still lints +lint( + text = "'a long single-line string'", + linters = line_length_linter(length = 15L, ignore_string_bodies = TRUE) +) + lines <- paste( "query <- '", " SELECT *", @@ -43,6 +49,20 @@ lint( linters = line_length_linter(length = 40L) ) +lines <- paste( + "paste(", + " 'a long',", + " 'single-line',", + " 'string'", + ")", + sep = "\n" +) +writeLines(lines) +lint( + text = lines, + linters = line_length_linter(length = 15L, ignore_string_bodies = TRUE) +) + lines <- paste( "query <- '", " SELECT *",