diff --git a/src/cli/commands.rs b/src/cli/commands.rs index f128499..02f62bf 100644 --- a/src/cli/commands.rs +++ b/src/cli/commands.rs @@ -134,6 +134,12 @@ enum Commands { action: Option, }, + /// Manage file extensions + Extensions { + #[command(subcommand)] + action: ExtensionsAction, + }, + /// Install vgrep integration for coding agents Install { #[command(subcommand)] @@ -221,6 +227,24 @@ enum ConfigAction { Path, } +#[derive(Subcommand)] +enum ExtensionsAction { + /// List configured extensions + List, + + /// Add an extension to the list + Add { + /// Extension to add (e.g. "rs", ".txt") + extension: String, + }, + + /// Remove an extension from the list + Remove { + /// Extension to remove + extension: String, + }, +} + #[derive(Clone, ValueEnum)] enum ConfigKey { Mode, @@ -319,6 +343,7 @@ impl Cli { Some(Commands::Status) => run_status(&config), Some(Commands::Models { action }) => run_models(action, &mut config), Some(Commands::Config { action }) => run_config(action, &mut config), + Some(Commands::Extensions { action }) => run_extensions(action, &mut config), Some(Commands::Install { agent }) => match agent { InstallAgent::ClaudeCode => super::install::install_claude_code(), InstallAgent::Opencode => super::install::install_opencode(), @@ -516,7 +541,7 @@ fn run_index( } let db = Database::new(&config.db_path()?)?; - let indexer = ServerIndexer::new(db, client, max_size); + let indexer = ServerIndexer::new(db, client, config.clone()); indexer.index_directory(&path, force)?; } Mode::Local => { @@ -531,7 +556,7 @@ fn run_index( let db = Database::new(&config.db_path()?)?; let engine = EmbeddingEngine::new(config)?; - let indexer = Indexer::new(db, engine, max_size); + let indexer = Indexer::new(db, engine, config.clone()); indexer.index_directory(&path, force)?; } } @@ -1002,6 +1027,70 @@ fn run_models(action: ModelsAction, config: &mut Config) -> Result<()> { Ok(()) } +fn run_extensions(action: ExtensionsAction, config: &mut Config) -> Result<()> { + match action { + ExtensionsAction::List => { + ui::print_header("Configured Extensions"); + + if config.extensions.is_empty() { + println!(" {} No extensions configured (using hardcoded defaults?)", ui::WARN); + return Ok(()); + } + + // Group by first letter for better readability, or just list them sorted + let mut extensions = config.extensions.clone(); + extensions.sort(); + + println!(" {}", style("Extensions allowlist:").bold()); + let mut current_line = String::from(" "); + + for (i, ext) in extensions.iter().enumerate() { + let item = format!(".{}", ext); + if current_line.len() + item.len() > 80 { + println!("{}", current_line); + current_line = String::from(" "); + } + current_line.push_str(&item); + if i < extensions.len() - 1 { + current_line.push_str(", "); + } + } + if !current_line.trim().is_empty() { + println!("{}", current_line); + } + println!(); + + println!( + " Add more with: {}", + style("vgrep extensions add .custom").cyan() + ); + println!(); + } + ExtensionsAction::Add { extension } => { + let ext = extension.trim_start_matches('.').to_lowercase(); + if config.extensions.contains(&ext) { + ui::print_warning(&format!("Extension '.{}' is already in the list", ext)); + return Ok(()); + } + + config.extensions.push(ext.clone()); + config.save()?; + ui::print_success(&format!("Added extension '.{}'", ext)); + } + ExtensionsAction::Remove { extension } => { + let ext = extension.trim_start_matches('.').to_lowercase(); + if let Some(pos) = config.extensions.iter().position(|e| e == &ext) { + config.extensions.remove(pos); + config.save()?; + ui::print_success(&format!("Removed extension '.{}'", ext)); + } else { + ui::print_warning(&format!("Extension '.{}' was not in the list", ext)); + } + } + } + Ok(()) +} + fn run_config(action: Option, config: &mut Config) -> Result<()> { match action { None => { diff --git a/src/config.rs b/src/config.rs index 6e9bddf..7d31644 100644 --- a/src/config.rs +++ b/src/config.rs @@ -99,6 +99,10 @@ pub struct Config { /// Context size for embeddings #[serde(default = "default_context_size")] pub context_size: usize, + + /// List of file extensions to index + #[serde(default = "default_extensions")] + pub extensions: Vec, } fn default_server_host() -> String { @@ -159,6 +163,22 @@ fn default_context_size() -> usize { 512 } +fn default_extensions() -> Vec { + vec![ + "rs".into(), "py".into(), "js".into(), "ts".into(), "tsx".into(), "jsx".into(), + "go".into(), "c".into(), "cpp".into(), "h".into(), "hpp".into(), "java".into(), + "kt".into(), "swift".into(), "rb".into(), "php".into(), "cs".into(), "fs".into(), + "scala".into(), "clj".into(), "ex".into(), "exs".into(), "erl".into(), "hs".into(), + "ml".into(), "lua".into(), "r".into(), "jl".into(), "dart".into(), "vue".into(), + "svelte".into(), "astro".into(), "html".into(), "htm".into(), "css".into(), + "scss".into(), "sass".into(), "less".into(), "json".into(), "yaml".into(), + "yml".into(), "toml".into(), "xml".into(), "md".into(), "markdown".into(), + "txt".into(), "rst".into(), "tex".into(), "sh".into(), "bash".into(), + "zsh".into(), "fish".into(), "ps1".into(), "bat".into(), "cmd".into(), + "sql".into(), "graphql".into(), "proto".into(), + ] +} + fn default_use_reranker() -> bool { true } @@ -182,6 +202,7 @@ impl Default for Config { watch_debounce_ms: default_watch_debounce_ms(), n_threads: 0, context_size: default_context_size(), + extensions: default_extensions(), } } } diff --git a/src/core/indexer.rs b/src/core/indexer.rs index 469c120..2aa19f6 100644 --- a/src/core/indexer.rs +++ b/src/core/indexer.rs @@ -8,14 +8,13 @@ use std::path::{Path, PathBuf}; use super::db::Database; use super::embeddings::EmbeddingEngine; +use crate::config::Config; use crate::ui; pub struct Indexer { db: Database, engine: EmbeddingEngine, - max_file_size: u64, - chunk_size: usize, - chunk_overlap: usize, + config: Config, } #[derive(Clone)] @@ -32,13 +31,11 @@ struct PendingFile { } impl Indexer { - pub fn new(db: Database, engine: EmbeddingEngine, max_file_size: u64) -> Self { + pub fn new(db: Database, engine: EmbeddingEngine, config: Config) -> Self { Self { db, engine, - max_file_size, - chunk_size: 512, - chunk_overlap: 64, + config, } } @@ -237,7 +234,7 @@ impl Indexer { fn should_index(&self, path: &Path) -> bool { if let Ok(metadata) = fs::metadata(path) { - if metadata.len() > self.max_file_size { + if metadata.len() > self.config.max_file_size { return false; } } @@ -248,66 +245,14 @@ impl Indexer { .unwrap_or("") .to_lowercase(); + // Check if extension is in configured list + if self.config.extensions.iter().any(|e| e.to_lowercase() == ext) { + return true; + } + matches!( ext.as_str(), - "rs" | "py" - | "js" - | "ts" - | "tsx" - | "jsx" - | "go" - | "c" - | "cpp" - | "h" - | "hpp" - | "java" - | "kt" - | "swift" - | "rb" - | "php" - | "cs" - | "fs" - | "scala" - | "clj" - | "ex" - | "exs" - | "erl" - | "hs" - | "ml" - | "lua" - | "r" - | "jl" - | "dart" - | "vue" - | "svelte" - | "astro" - | "html" - | "htm" - | "css" - | "scss" - | "sass" - | "less" - | "json" - | "yaml" - | "yml" - | "toml" - | "xml" - | "md" - | "markdown" - | "txt" - | "rst" - | "tex" - | "sh" - | "bash" - | "zsh" - | "fish" - | "ps1" - | "bat" - | "cmd" - | "sql" - | "graphql" - | "proto" - | "" + "" // Allow extensionless files if they are in the special filenames list ) || path.file_name().is_some_and(|n| { let name = n.to_string_lossy().to_lowercase(); matches!( @@ -344,7 +289,7 @@ impl Indexer { for (line_idx, line) in lines.iter().enumerate() { let line_len = line.len() + 1; - if char_count + line_len > self.chunk_size && !current_chunk.is_empty() { + if char_count + line_len > self.config.chunk_size && !current_chunk.is_empty() { chunks.push(FileChunk { content: current_chunk.clone(), start_line: chunk_start_line as i32, @@ -352,7 +297,7 @@ impl Indexer { }); let overlap_start = if line_idx > 0 { - line_idx.saturating_sub(self.chunk_overlap / 40) + line_idx.saturating_sub(self.config.chunk_overlap / 40) } else { 0 }; @@ -394,19 +339,15 @@ fn compute_hash(content: &str) -> String { pub struct ServerIndexer { db: Database, client: crate::server::Client, - max_file_size: u64, - chunk_size: usize, - chunk_overlap: usize, + config: Config, } impl ServerIndexer { - pub fn new(db: Database, client: crate::server::Client, max_file_size: u64) -> Self { + pub fn new(db: Database, client: crate::server::Client, config: Config) -> Self { Self { db, client, - max_file_size, - chunk_size: 512, - chunk_overlap: 64, + config, } } @@ -613,7 +554,7 @@ impl ServerIndexer { fn should_index(&self, path: &Path) -> bool { if let Ok(metadata) = fs::metadata(path) { - if metadata.len() > self.max_file_size { + if metadata.len() > self.config.max_file_size { return false; } } @@ -624,66 +565,14 @@ impl ServerIndexer { .unwrap_or("") .to_lowercase(); + // Check if extension is in configured list + if self.config.extensions.iter().any(|e| e.to_lowercase() == ext) { + return true; + } + matches!( ext.as_str(), - "rs" | "py" - | "js" - | "ts" - | "tsx" - | "jsx" - | "go" - | "c" - | "cpp" - | "h" - | "hpp" - | "java" - | "kt" - | "swift" - | "rb" - | "php" - | "cs" - | "fs" - | "scala" - | "clj" - | "ex" - | "exs" - | "erl" - | "hs" - | "ml" - | "lua" - | "r" - | "jl" - | "dart" - | "vue" - | "svelte" - | "astro" - | "html" - | "htm" - | "css" - | "scss" - | "sass" - | "less" - | "json" - | "yaml" - | "yml" - | "toml" - | "xml" - | "md" - | "markdown" - | "txt" - | "rst" - | "tex" - | "sh" - | "bash" - | "zsh" - | "fish" - | "ps1" - | "bat" - | "cmd" - | "sql" - | "graphql" - | "proto" - | "" + "" // Allow extensionless files if they are in the special filenames list ) || path.file_name().is_some_and(|n| { let name = n.to_string_lossy().to_lowercase(); matches!( @@ -720,7 +609,7 @@ impl ServerIndexer { for (line_idx, line) in lines.iter().enumerate() { let line_len = line.len() + 1; - if char_count + line_len > self.chunk_size && !current_chunk.is_empty() { + if char_count + line_len > self.config.chunk_size && !current_chunk.is_empty() { chunks.push(FileChunk { content: current_chunk.clone(), start_line: chunk_start_line as i32, @@ -728,7 +617,7 @@ impl ServerIndexer { }); let overlap_start = if line_idx > 0 { - line_idx.saturating_sub(self.chunk_overlap / 40) + line_idx.saturating_sub(self.config.chunk_overlap / 40) } else { 0 }; diff --git a/src/watcher.rs b/src/watcher.rs index 816219f..19657f2 100644 --- a/src/watcher.rs +++ b/src/watcher.rs @@ -182,66 +182,14 @@ impl FileWatcher { .unwrap_or("") .to_lowercase(); + // Check if extension is in configured list + if self.config.extensions.iter().any(|e| e.to_lowercase() == ext) { + return true; + } + matches!( ext.as_str(), - "rs" | "py" - | "js" - | "ts" - | "tsx" - | "jsx" - | "go" - | "c" - | "cpp" - | "h" - | "hpp" - | "java" - | "kt" - | "swift" - | "rb" - | "php" - | "cs" - | "fs" - | "scala" - | "clj" - | "ex" - | "exs" - | "erl" - | "hs" - | "ml" - | "lua" - | "r" - | "jl" - | "dart" - | "vue" - | "svelte" - | "astro" - | "html" - | "htm" - | "css" - | "scss" - | "sass" - | "less" - | "json" - | "yaml" - | "yml" - | "toml" - | "xml" - | "md" - | "markdown" - | "txt" - | "rst" - | "tex" - | "sh" - | "bash" - | "zsh" - | "fish" - | "ps1" - | "bat" - | "cmd" - | "sql" - | "graphql" - | "proto" - | "" + "" // Allow extensionless files if they are in the special filenames list ) || path.file_name().is_some_and(|n| { let name = n.to_string_lossy().to_lowercase(); matches!( @@ -270,7 +218,7 @@ impl FileWatcher { Mode::Server => { let db = Database::new(&self.config.db_path()?)?; let client = Client::new(&self.config.server_host, self.config.server_port); - let indexer = ServerIndexer::new(db, client, self.config.max_file_size); + let indexer = ServerIndexer::new(db, client, self.config.clone()); indexer.index_directory(&self.root_path, false)?; } Mode::Local => { @@ -279,7 +227,7 @@ impl FileWatcher { } let db = Database::new(&self.config.db_path()?)?; let engine = crate::core::EmbeddingEngine::new(&self.config)?; - let indexer = Indexer::new(db, engine, self.config.max_file_size); + let indexer = Indexer::new(db, engine, self.config.clone()); indexer.index_directory(&self.root_path, false)?; } }