Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: pensar
Type: Package
Title: 'LLM' Wiki Engine for R
Version: 0.3.1
Version: 0.4.0
Authors@R: c(
person("Troy", "Hernandez", role = c("aut", "cre"),
email = "troy@cornball.ai",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export(update_index)
export(use_vault)
export(vault_commit)
export(vault_export)
export(vault_graph)

S3method(print,pensar_lint)
S3method(print,pensar_page)
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# pensar 0.4.0

* New `vault_graph()` renders the vault's wikilink graph as static
SVG via `saber::graph_svg()`. Tooltips carry title, type, date,
tags, and a lede from the first meaningful body line. Broken
wikilinks appear as separate nodes. Default viewport 1600x1200 for
denser vaults.

# pensar 0.3.1

* `default_vault()` now honors `options("pensar.vault")` and the
Expand Down
133 changes: 133 additions & 0 deletions R/vault_graph.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#' @title Vault wikilink graph
#' @description Render the vault's wikilink graph as SVG via saber.

#' Render a vault's wikilink graph as SVG
#'
#' Scans every markdown page in the vault (excluding control files),
#' extracts \code{[[wikilinks]]} as edges, and renders the result via
#' \code{saber::graph_svg()}. Node tooltips carry the page type, tags,
#' and date from YAML frontmatter; broken wikilinks (targets with no
#' matching page) appear as external nodes with a distinct tooltip.
#'
#' @param vault Path to the vault directory.
#' @param width,height Viewport in pixels. Defaults (1600 x 1200) are
#' larger than \code{saber::graph_svg()}'s defaults since vaults tend
#' toward many nodes.
#' @param ... Passed through to \code{saber::graph_svg()} (e.g.,
#' \code{iterations}, \code{seed}).
#' @return Character vector of SVG lines. Write with \code{writeLines()}.
#' @export
vault_graph <- function(vault = default_vault(), width = 1600L,
height = 1200L, ...) {
if (!requireNamespace("saber", quietly = TRUE)) {
stop("Package 'saber' is required for vault_graph(). ",
"Install it from https://github.com/cornball-ai/saber")
}
vault <- normalizePath(vault, mustWork = TRUE)

all_md <- list.files(vault, pattern = "\\.md$", recursive = TRUE,
full.names = TRUE)
control <- c("index.md", "log.md", "schema.md")
all_md <- all_md[!basename(all_md) %in% control |
dirname(all_md) != vault]

if (length(all_md) == 0L) {
stop("No pages in vault: ", vault)
}

names_vec <- unname(vapply(all_md, name_from_path, character(1L)))
edges <- list()
tooltips <- character(length(all_md))
types <- character(length(all_md))

for (i in seq_along(all_md)) {
fm <- parse_frontmatter(all_md[i])
types[i] <- fm$type %||% category_from_path(all_md[i], vault)
if (length(fm$tags)) {
tags <- paste(fm$tags, collapse = ", ")
} else {
tags <- "(no tags)"
}
date <- fm$date %||% "(no date)"
title <- fm$title %||% names_vec[i]
lede <- page_lede(all_md[i])
tooltips[i] <- paste(c(
title,
sprintf("type: %s | date: %s", types[i], date),
sprintf("tags: %s", tags),
if (nzchar(lede)) lede
), collapse = "\n")

links <- unique(parse_wikilinks(all_md[i]))
if (length(links)) {
edges[[i]] <- data.frame(from = names_vec[i], to = links,
stringsAsFactors = FALSE)
}
}
edges <- do.call(rbind, edges)
if (is.null(edges)) {
edges <- data.frame(from = character(), to = character(),
stringsAsFactors = FALSE)
}

# Broken wikilinks: targets that aren't actual pages
broken <- setdiff(unique(edges$to), names_vec)
if (length(broken)) {
names_vec <- c(names_vec, broken)
tooltips <- c(tooltips, paste0(broken, "\n(broken wikilink)"))
types <- c(types, rep("broken", length(broken)))
}

nodes <- data.frame(id = names_vec, label = names_vec,
href = NA_character_, tooltip = tooltips,
stringsAsFactors = FALSE)

saber::graph_svg(edges, nodes, width = width, height = height, ...)
}

#' Read the first non-empty, non-header body line from a markdown file,
#' truncated to a readable length for tooltip use.
#' @noRd
page_lede <- function(fp, max_chars = 140L) {
lines <- readLines(fp, warn = FALSE)
# Skip frontmatter block (--- ... ---)
if (length(lines) >= 2L && trimws(lines[1L]) == "---") {
end <- which(trimws(lines[-1L]) == "---")[1L]
if (!is.na(end)) {
lines <- lines[-(1L:(end + 1L))]
}
}
# Strip markdown noise we don't want in a lede
lines <- trimws(lines)
lines <- lines[nzchar(lines)]
lines <- lines[!grepl("^#+\\s", lines)]
lines <- lines[!grepl("^---+$", lines)]
# Drop YAML-like key: value lines that show up when an ingested page
# has its own frontmatter block nested in the body.
lines <- lines[!grepl("^[A-Za-z][A-Za-z0-9_-]*:", lines)]
lines <- lines[!grepl("^-\\s", lines)]
lines <- lines[!grepl("^!\\[", lines)]
if (!length(lines)) {
return("")
}
first <- lines[1L]
if (nchar(first) > max_chars) {
first <- paste0(substr(first, 1L, max_chars - 1L), "\u2026")
}
first
}

#' Infer category from a page's path when frontmatter type is missing
#' @noRd
category_from_path <- function(fp, vault) {
rel <- sub(paste0("^", vault, "/?"), "", fp, fixed = FALSE)
parts <- strsplit(rel, "/", fixed = TRUE)[[1L]]
if (length(parts) >= 2L && parts[1L] == "raw") {
return(parts[2L])
}
if (length(parts) >= 1L && parts[1L] == "wiki") {
return("wiki")
}
"unknown"
}

38 changes: 38 additions & 0 deletions inst/tinytest/test_vault_graph.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Tests for vault_graph.R

library(pensar)

if (!requireNamespace("saber", quietly = TRUE)) {
exit_file("saber not installed")
}

tmp <- file.path(tempdir(), paste0("vault-graph-",
format(Sys.time(), "%H%M%S")))
init_vault(tmp)

# Ingest two pages that link to each other + one broken link
ingest("Discusses [[Beta]] and [[Missing]].", type = "articles",
source = "alpha-src", title = "Alpha",
tags = c("R", "intro"), vault = tmp)
ingest("References back to [[Alpha]].", type = "articles",
source = "beta-src", title = "Beta", vault = tmp)

svg <- vault_graph(vault = tmp)
expect_true(is.character(svg))
expect_true(any(grepl("^<svg", svg)))

# Node present for each ingested page (slugified by ingest())
expect_true(any(grepl("type: articles", svg, fixed = TRUE)))

# Broken wikilink appears as a node with "(broken wikilink)" tooltip
expect_true(any(grepl("(broken wikilink)", svg, fixed = TRUE)))
expect_true(any(grepl("Missing", svg, fixed = TRUE)))

# Empty vault errors cleanly
empty <- file.path(tempdir(), paste0("empty-vault-",
format(Sys.time(), "%H%M%S")))
init_vault(empty, agent_instructions = FALSE, rproj = FALSE)
# Remove the only seeded content so there are no pages (schema/index/log
# are filtered as control files).
# init_vault leaves schema/index/log only, which the function filters out.
expect_error(vault_graph(vault = empty), "No pages in vault")