diff --git a/DESCRIPTION b/DESCRIPTION index 65eea08..decf6ce 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: saber Type: Package Title: Code Analysis and Project Context for R -Version: 0.4.0 +Version: 0.5.0 Authors@R: person("Troy", "Hernandez", role = c("aut", "cre"), email = "troy@cornball.ai", comment = c(ORCID = "0009-0005-4248-604X")) diff --git a/R/agent_context.R b/R/agent_context.R index e03b37b..90b4392 100644 --- a/R/agent_context.R +++ b/R/agent_context.R @@ -59,19 +59,18 @@ #' saber::agent_context(agent = "claude", include_memory = TRUE) #' } #' @export -agent_context <- function(agent = NULL, - project_dir = getwd(), +agent_context <- function(agent = NULL, project_dir = getwd(), workspace_dir = NULL, - memory_base = file.path(path.expand("~"), - ".claude", "projects"), - claude_global_path = file.path(path.expand("~"), - ".claude", "CLAUDE.md"), - include_memory = NULL, - include_project = NULL, - include_global = NULL, - include_soul = NULL, + memory_base = file.path(path.expand("~"), ".claude", "projects"), + claude_global_path = file.path(path.expand("~"), ".claude", "CLAUDE.md"), + include_memory = NULL, include_project = NULL, + include_global = NULL, include_soul = NULL, max_memory_lines = 100L) { - agent_key <- if (is.null(agent)) NA_character_ else as.character(agent)[1L] + if (is.null(agent)) { + agent_key <- NA_character_ + } else { + agent_key <- as.character(agent)[1L] + } defaults <- agent_context_defaults(agent_key) incl_mem <- include_memory %||% defaults$memory @@ -119,18 +118,15 @@ agent_context <- function(agent = NULL, #' @noRd agent_context_defaults <- function(agent) { if (is.na(agent)) { - return(list(memory = TRUE, project = TRUE, - global = TRUE, soul = TRUE)) + return(list(memory = TRUE, project = TRUE, global = TRUE, soul = TRUE)) } switch(agent, claude = list(memory = FALSE, project = TRUE, global = TRUE, soul = TRUE), - codex = list(memory = TRUE, project = TRUE, - global = TRUE, soul = TRUE), + codex = list(memory = TRUE, project = TRUE, global = TRUE, soul = TRUE), llamar = list(memory = TRUE, project = TRUE, global = TRUE, soul = TRUE), - list(memory = TRUE, project = TRUE, - global = TRUE, soul = TRUE) + list(memory = TRUE, project = TRUE, global = TRUE, soul = TRUE) ) } @@ -190,7 +186,11 @@ agent_context_project <- function(project_dir, agent, forced = FALSE) { file_to_load <- NULL if (forced || is.na(agent)) { # User overrode the default, or unknown agent: prefer CLAUDE.md - file_to_load <- if (claude_exists) claude_path else agents_path + if (claude_exists) { + file_to_load <- claude_path + } else { + file_to_load <- agents_path + } } else if (identical(agent, "claude")) { # claude autoloads CLAUDE.md; only load AGENTS.md if it exists # and is a distinct file @@ -205,7 +205,11 @@ agent_context_project <- function(project_dir, agent, forced = FALSE) { } } else { # llamar / unknown: prefer CLAUDE.md, fall back to AGENTS.md - file_to_load <- if (claude_exists) claude_path else agents_path + if (claude_exists) { + file_to_load <- claude_path + } else { + file_to_load <- agents_path + } } if (is.null(file_to_load)) { @@ -242,7 +246,11 @@ agent_context_global <- function(workspace_dir, agent, claude_global, file_to_load <- NULL if (forced || is.na(agent)) { - file_to_load <- if (claude_exists) claude_global else user_path + if (claude_exists) { + file_to_load <- claude_global + } else { + file_to_load <- user_path + } } else if (identical(agent, "claude")) { # claude autoloads ~/.claude/CLAUDE.md; only load USER.md if (user_exists && !same_file(claude_global, user_path)) { @@ -250,7 +258,11 @@ agent_context_global <- function(workspace_dir, agent, claude_global, } } else { # codex / llamar / unknown: prefer claude global, fall back to USER.md - file_to_load <- if (claude_exists) claude_global else user_path + if (claude_exists) { + file_to_load <- claude_global + } else { + file_to_load <- user_path + } } if (is.null(file_to_load)) { @@ -307,3 +319,4 @@ same_file <- function(a, b) { #' Null-coalescing operator #' @noRd `%||%` <- function(a, b) if (is.null(a)) b else a + diff --git a/R/blast.R b/R/blast.R index 4e85cee..76af36e 100644 --- a/R/blast.R +++ b/R/blast.R @@ -7,13 +7,23 @@ #' project and all callers in downstream projects (projects whose DESCRIPTION #' lists this one in Depends, Imports, or LinkingTo). #' +#' With \code{include = c("r", "examples", "vignettes")} the search can be +#' extended to references in the target project's roxygen \verb{@examples} +#' blocks and vignette code chunks (Rmd, qmd, Rnw). Documentation scanning is +#' target-project only; it does not walk downstream projects' docs. +#' #' @param fn Character. Function name to search for. #' @param project Character. Project name (or path to project directory). +#' @param include Character vector. Any of \code{"r"} (R source, default), +#' \code{"examples"} (roxygen \verb{@examples} blocks in the target +#' project), and \code{"vignettes"} (code chunks in the target project's +#' vignettes). #' @param scan_dir Directory to scan for downstream projects. #' @param cache_dir Directory for symbol cache files. #' @param exclude Character vector of directory basenames to skip when #' scanning for downstream projects. -#' @return A data.frame with columns: caller, project, file, line. +#' @return A data.frame with columns: caller, project, file, line, source. +#' \code{source} is one of \code{"r"}, \code{"example"}, \code{"vignette"}. #' @examples #' # Create a minimal project #' d <- file.path(tempdir(), "blastpkg") @@ -24,10 +34,22 @@ #' # Find all callers of helper() #' blast_radius("helper", project = d, scan_dir = tempdir(), #' cache_dir = tempdir()) +#' +#' # Include roxygen @examples and vignettes from the target project +#' blast_radius("helper", project = d, include = c("r", "examples", "vignettes"), +#' scan_dir = tempdir(), cache_dir = tempdir()) #' @export -blast_radius <- function(fn, project = NULL, scan_dir = path.expand("~"), +blast_radius <- function(fn, project = NULL, include = "r", + scan_dir = path.expand("~"), cache_dir = file.path(tools::R_user_dir("saber", "cache"), "symbols"), exclude = default_exclude()) { + allowed <- c("r", "examples", "vignettes") + bad <- setdiff(include, allowed) + if (length(bad) > 0L) { + stop("invalid 'include' value(s): ", paste(bad, collapse = ", "), + ". Allowed: ", paste(allowed, collapse = ", ")) + } + if (is.null(project)) { project <- basename(getwd()) } @@ -39,44 +61,66 @@ blast_radius <- function(fn, project = NULL, scan_dir = path.expand("~"), } project_name <- basename(normalizePath(project_dir, mustWork = FALSE)) - results <- data.frame(caller = character(), project = character(), - file = character(), line = integer(), - stringsAsFactors = FALSE) + results <- empty_blast_results() # 1. Internal callers from this project's symbol cache - if (dir.exists(project_dir)) { + if ("r" %in% include && dir.exists(project_dir)) { syms <- symbols(project_dir, cache_dir = cache_dir) internal <- syms$calls[syms$calls$callee == fn,, drop = FALSE] if (nrow(internal) > 0L) { results <- rbind(results, - data.frame(caller = internal$caller, project = project_name, - file = internal$file, line = internal$line, + data.frame(caller = internal$caller, + project = project_name, + file = internal$file, + line = internal$line, + source = "r", stringsAsFactors = FALSE)) } } - # 2. Find downstream projects via DESCRIPTION files - downstream <- find_downstream(project_name, scan_dir, exclude) + # 2. Downstream projects via DESCRIPTION files (R source only) + if ("r" %in% include) { + downstream <- find_downstream(project_name, scan_dir, exclude) + for (ds_name in downstream) { + ds_dir <- file.path(scan_dir, ds_name) + if (!dir.exists(file.path(ds_dir, "R"))) { + next + } - for (ds_name in downstream) { - ds_dir <- file.path(scan_dir, ds_name) - if (!dir.exists(file.path(ds_dir, "R"))) { - next + ds_syms <- symbols(ds_dir, cache_dir = cache_dir) + qualified <- paste0(project_name, "::", fn) + ds_callers <- ds_syms$calls[ds_syms$calls$callee == qualified | + ds_syms$calls$callee == fn,, drop = FALSE] + if (nrow(ds_callers) > 0L) { + results <- rbind(results, + data.frame(caller = ds_callers$caller, + project = ds_name, + file = ds_callers$file, + line = ds_callers$line, + source = "r", + stringsAsFactors = FALSE)) + } } + } - ds_syms <- symbols(ds_dir, cache_dir = cache_dir) - # Look for pkg::fn calls and bare fn calls - qualified <- paste0(project_name, "::", fn) - ds_callers <- ds_syms$calls[ds_syms$calls$callee == qualified | - ds_syms$calls$callee == fn,, drop = FALSE] - if (nrow(ds_callers) > 0L) { - results <- rbind(results, - data.frame(caller = ds_callers$caller, project = ds_name, - file = ds_callers$file, line = ds_callers$line, - stringsAsFactors = FALSE)) - } + # 3. Target-project roxygen @examples + if ("examples" %in% include && dir.exists(project_dir)) { + results <- rbind(results, scan_examples(project_dir, fn)) + } + + # 4. Target-project vignettes + if ("vignettes" %in% include && dir.exists(project_dir)) { + results <- rbind(results, scan_vignettes(project_dir, fn)) } results } +#' Empty result frame shared by blast_radius scanners +#' @noRd +empty_blast_results <- function() { + data.frame(caller = character(), project = character(), + file = character(), line = integer(), + source = character(), stringsAsFactors = FALSE) +} + diff --git a/R/doc_scan.R b/R/doc_scan.R new file mode 100644 index 0000000..01854d4 --- /dev/null +++ b/R/doc_scan.R @@ -0,0 +1,241 @@ +#' @title Documentation scanning +#' @description Scan roxygen \verb{@examples} blocks and vignette code for +#' function references. + +#' Scan roxygen \verb{@examples} blocks for function calls +#' +#' Reads each \code{R/*.R} file in a project, extracts the contents of every +#' roxygen \verb{@examples} (or \verb{@examplesIf}) block, and returns rows +#' for lines that contain a call to \code{fn}. Detection is regex based: +#' \code{fn} must appear followed by an opening parenthesis and must not be +#' part of a longer identifier. \verb{pkg::fn(...)} calls match as well. +#' +#' The \code{caller} column is the name of the function being documented +#' (the first \code{name <- function(...)} line after the roxygen block), or +#' the empty string if none is detected. +#' +#' @param project_dir Path to the project root. +#' @param fn Function name to search for. +#' @return A data.frame with columns caller, project, file, line, source. +#' @noRd +scan_examples <- function(project_dir, fn) { + project_name <- basename(normalizePath(project_dir, mustWork = FALSE)) + empty <- empty_doc_results() + + r_dir <- file.path(project_dir, "R") + if (!dir.exists(r_dir)) { + return(empty) + } + + r_files <- list.files(r_dir, pattern = "\\.[Rr]$", full.names = TRUE) + if (length(r_files) == 0L) { + return(empty) + } + + results <- empty + for (fp in r_files) { + lines <- readLines(fp, warn = FALSE) + blocks <- extract_example_blocks(lines) + for (b in blocks) { + hits <- match_fn_lines(lines[b$line_nums], fn) + if (length(hits) == 0L) { + next + } + results <- rbind(results, + data.frame(caller = b$documented_fn, + project = project_name, + file = basename(fp), + line = b$line_nums[hits], + source = "example", + stringsAsFactors = FALSE)) + } + } + + results +} + +#' Scan vignette code chunks for function calls +#' +#' Looks in \code{vignettes/} (and \code{inst/doc/}) for Rmd, qmd, and Rnw +#' files. Extracts R code chunks and flags lines that call \code{fn}. +#' +#' @param project_dir Path to the project root. +#' @param fn Function name to search for. +#' @return A data.frame with columns caller, project, file, line, source. +#' @noRd +scan_vignettes <- function(project_dir, fn) { + project_name <- basename(normalizePath(project_dir, mustWork = FALSE)) + empty <- empty_doc_results() + + dirs <- file.path(project_dir, c("vignettes", "inst/doc")) + dirs <- dirs[dir.exists(dirs)] + if (length(dirs) == 0L) { + return(empty) + } + + files <- unlist(lapply(dirs, list.files, + pattern = "\\.(Rmd|rmd|qmd|Rnw|rnw)$", + full.names = TRUE, recursive = TRUE)) + if (length(files) == 0L) { + return(empty) + } + + results <- empty + for (fp in files) { + ext <- tolower(tools::file_ext(fp)) + lines <- readLines(fp, warn = FALSE) + is_sweave <- ext %in% c("rnw") + in_chunk <- chunk_mask(lines, is_sweave) + + code_line_nums <- which(in_chunk) + if (length(code_line_nums) == 0L) { + next + } + hits <- match_fn_lines(lines[code_line_nums], fn) + if (length(hits) == 0L) { + next + } + rel <- sub(paste0("^", project_dir, "/?"), "", fp, fixed = FALSE) + results <- rbind(results, + data.frame(caller = "", + project = project_name, + file = rel, + line = code_line_nums[hits], + source = "vignette", + stringsAsFactors = FALSE)) + } + + results +} + +#' Extract roxygen \verb{@examples} blocks from a file's lines +#' +#' Returns a list of blocks, each a list with components \code{line_nums} +#' (1-indexed file line numbers of the example body lines) and +#' \code{documented_fn} (the name of the function defined after the block, +#' or the empty string). +#' +#' @noRd +extract_example_blocks <- function(lines) { + n <- length(lines) + blocks <- list() + i <- 1L + tag_re <- "^#'\\s*@\\w+" + ex_re <- "^#'\\s*@examples(If)?\\b" + + while (i <= n) { + if (grepl(ex_re, lines[i])) { + line_nums <- integer() + # If @examplesIf has content after the condition, the body starts + # on the next line anyway. Ignore the tag line itself. + i <- i + 1L + while (i <= n && grepl("^#'", lines[i])) { + if (grepl(tag_re, lines[i])) { + break + } + line_nums <- c(line_nums, i) + i <- i + 1L + } + + documented_fn <- next_defined_fn(lines, i, n) + blocks[[length(blocks) + 1L]] <- list( + line_nums = line_nums, + documented_fn = documented_fn + ) + } else { + i <- i + 1L + } + } + + blocks +} + +#' Find the next top-level function definition after a given line +#' +#' Scans forward from \code{start} looking for a \code{name <- function} +#' (or \code{name = function}) assignment at top level. Returns the name or +#' the empty string if not found within a few lines of non-roxygen code. +#' +#' @noRd +next_defined_fn <- function(lines, start, n) { + pattern <- "^\\s*([A-Za-z.][A-Za-z0-9._]*)\\s*(<-|=)\\s*function\\b" + j <- start + while (j <= n) { + # Skip blank lines and roxygen lines (in case multiple blocks abut) + if (!nzchar(trimws(lines[j])) || grepl("^#'", lines[j])) { + j <- j + 1L + next + } + m <- regmatches(lines[j], regexec(pattern, lines[j])) + if (length(m[[1L]]) >= 2L) { + return(m[[1L]][2L]) + } + # First non-blank non-roxygen line wasn't a function def — give up + return("") + } + "" +} + +#' Build a per-line logical mask for "this line is inside an R code chunk" +#' +#' @param lines Character vector of file lines. +#' @param is_sweave If TRUE, parse Rnw \verb{<<>>=...@} chunks; otherwise +#' Rmd/qmd \verb{```{r...}...```} chunks. +#' @return Logical vector of \code{length(lines)}. +#' @noRd +chunk_mask <- function(lines, is_sweave = FALSE) { + n <- length(lines) + inside <- logical(n) + in_chunk <- FALSE + + if (is_sweave) { + start_re <- "^<<.*>>=" + end_re <- "^@\\s*$" + } else { + start_re <- "^```\\{[rR]([, }]|$)" + end_re <- "^```\\s*$" + } + + for (i in seq_len(n)) { + if (!in_chunk) { + if (grepl(start_re, lines[i])) { + in_chunk <- TRUE + } + next + } + # in_chunk is TRUE + if (grepl(end_re, lines[i])) { + in_chunk <- FALSE + next + } + inside[i] <- TRUE + } + + inside +} + +#' Find 1-indexed positions within \code{lines} containing a call to \code{fn} +#' +#' Matches \code{fn} followed by optional whitespace and an opening paren, +#' with a negative lookbehind that rejects identifier characters (so +#' \code{myfn(} or \code{my.fn(} do not match, but \code{pkg::fn(} does). +#' +#' @noRd +match_fn_lines <- function(lines, fn) { + if (length(lines) == 0L) { + return(integer()) + } + pattern <- paste0("(?= 1L) expect_true("main_fn" %in% br$caller) +# --- source column is populated with "r" for R source hits --- +expect_true("source" %in% names(br)) +expect_true(all(br$source == "r")) + # --- blast_radius finds downstream callers --- # The downstream project imports mypkg and calls mypkg::helper @@ -55,6 +59,88 @@ expect_true(any(br$project == "downstream")) br2 <- blast_radius("nonexistent_fn_xyz", project = proj, scan_dir = fake_home, cache_dir = cache) expect_equal(nrow(br2), 0L) +expect_true("source" %in% names(br2)) + +# --- include validation --- +expect_error(blast_radius("helper", project = proj, include = "bogus", + scan_dir = fake_home, cache_dir = cache)) + +# --- include = "examples": scans roxygen @examples blocks --- + +# Add a second function whose roxygen @examples calls helper() +writeLines(c( + "#' Documented fn", + "#'", + "#' @param x input", + "#' @examples", + "#' helper(1)", + "#' \\dontrun{", + "#' helper(2)", + "#' }", + "#' @export", + "documented <- function(x) x", + "", + "#' Other fn with prose only", + "#' @examples", + "#' # helper is mentioned in a comment but not called", + "#' 1 + 1", + "#' @export", + "other <- function(x) x" +), file.path(proj, "R", "documented.R")) + +# Invalidate cache so symbols() re-reads +unlink(cache, recursive = TRUE) + +br_ex <- blast_radius("helper", project = proj, + include = c("r", "examples"), + scan_dir = fake_home, cache_dir = cache) +ex_rows <- br_ex[br_ex$source == "example",, drop = FALSE] +expect_true(nrow(ex_rows) >= 2L) # two calls inside @examples +expect_equal(unique(ex_rows$file), "documented.R") +expect_true("documented" %in% ex_rows$caller) +# The prose-only @examples block in `other` should NOT match +expect_false(any(ex_rows$caller == "other")) + +# Without examples, the ex hits disappear +br_no_ex <- blast_radius("helper", project = proj, include = "r", + scan_dir = fake_home, cache_dir = cache) +expect_false(any(br_no_ex$source == "example")) + +# --- include = "vignettes": scans Rmd code chunks --- + +dir.create(file.path(proj, "vignettes")) +writeLines(c( + "---", + "title: demo", + "---", + "", + "Some prose that mentions helper() but is not in a chunk.", + "", + "```{r setup}", + "helper(10)", + "```", + "", + "More prose.", + "", + "```{r, eval=FALSE}", + "mypkg::helper(20)", + "```" +), file.path(proj, "vignettes", "demo.Rmd")) + +br_vi <- blast_radius("helper", project = proj, + include = c("r", "vignettes"), + scan_dir = fake_home, cache_dir = cache) +vi_rows <- br_vi[br_vi$source == "vignette",, drop = FALSE] +expect_equal(nrow(vi_rows), 2L) +expect_true(all(grepl("demo\\.Rmd$", vi_rows$file))) +# Prose mention outside chunks is not counted +expect_false(any(vi_rows$line == 5L)) + +# --- include accepts all three at once --- +br_all <- blast_radius("helper", project = proj, + include = c("r", "examples", "vignettes"), + scan_dir = fake_home, cache_dir = cache) +expect_true(all(c("r", "example", "vignette") %in% br_all$source)) # --- Cleanup --- unlink(c(fake_home, cache), recursive = TRUE) diff --git a/man/blast_radius.Rd b/man/blast_radius.Rd index c2557a0..a411eca 100644 --- a/man/blast_radius.Rd +++ b/man/blast_radius.Rd @@ -3,7 +3,7 @@ \alias{blast_radius} \title{Blast radius analysis} \usage{ -blast_radius(fn, project = NULL, scan_dir = path.expand("~"), +blast_radius(fn, project = NULL, include = "r", scan_dir = path.expand("~"), cache_dir = file.path(tools::R_user_dir("saber", "cache"), "symbols"), exclude = default_exclude()) } @@ -12,6 +12,11 @@ blast_radius(fn, project = NULL, scan_dir = path.expand("~"), \item{project}{Character. Project name (or path to project directory).} +\item{include}{Character vector. Any of \code{"r"} (R source, default), +\code{"examples"} (roxygen \verb{@examples} blocks in the target +project), and \code{"vignettes"} (code chunks in the target project's +vignettes).} + \item{scan_dir}{Directory to scan for downstream projects.} \item{cache_dir}{Directory for symbol cache files.} @@ -20,7 +25,8 @@ blast_radius(fn, project = NULL, scan_dir = path.expand("~"), scanning for downstream projects.} } \value{ -A data.frame with columns: caller, project, file, line. +A data.frame with columns: caller, project, file, line, source. + \code{source} is one of \code{"r"}, \code{"example"}, \code{"vignette"}. } \description{ Find all callers of a function across projects. @@ -29,6 +35,11 @@ Find callers of a function across projects Given a function name and project, finds all internal callers within that project and all callers in downstream projects (projects whose DESCRIPTION lists this one in Depends, Imports, or LinkingTo). + +With \code{include = c("r", "examples", "vignettes")} the search can be +extended to references in the target project's roxygen \verb{@examples} +blocks and vignette code chunks (Rmd, qmd, Rnw). Documentation scanning is +target-project only; it does not walk downstream projects' docs. } \examples{ # Create a minimal project @@ -40,4 +51,8 @@ writeLines("main <- function(x) helper(x * 2)", file.path(d, "R", "main.R")) # Find all callers of helper() blast_radius("helper", project = d, scan_dir = tempdir(), cache_dir = tempdir()) + +# Include roxygen @examples and vignettes from the target project +blast_radius("helper", project = d, include = c("r", "examples", "vignettes"), + scan_dir = tempdir(), cache_dir = tempdir()) }