diff --git a/cmd/nightshift/commands/silo.go b/cmd/nightshift/commands/silo.go new file mode 100644 index 0000000..41a6e2b --- /dev/null +++ b/cmd/nightshift/commands/silo.go @@ -0,0 +1,173 @@ +package commands + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/spf13/cobra" + + "github.com/marcus/nightshift/internal/analysis" + "github.com/marcus/nightshift/internal/config" + "github.com/marcus/nightshift/internal/db" + "github.com/marcus/nightshift/internal/logging" +) + +var siloCmd = &cobra.Command{ + Use: "knowledge-silo [path]", + Short: "Detect knowledge silos in the codebase", + Long: `Analyze git history per directory to identify knowledge silos — areas where +only one or two people have contributed. + +Directories are ranked by silo risk based on contributor concentration. Use this +to find areas that need knowledge transfer, pairing sessions, or documentation. + +The silo score (0-1) combines commit concentration (Herfindahl index) with +contributor count. Higher scores indicate greater knowledge isolation.`, + RunE: func(cmd *cobra.Command, args []string) error { + path, _ := cmd.Flags().GetString("path") + if path == "" && len(args) > 0 { + path = args[0] + } + if path == "" { + var err error + path, err = os.Getwd() + if err != nil { + return fmt.Errorf("getting current directory: %w", err) + } + } + + depth, _ := cmd.Flags().GetInt("depth") + minCommits, _ := cmd.Flags().GetInt("min-commits") + jsonOutput, _ := cmd.Flags().GetBool("json") + since, _ := cmd.Flags().GetString("since") + until, _ := cmd.Flags().GetString("until") + saveReport, _ := cmd.Flags().GetBool("save") + dbPath, _ := cmd.Flags().GetString("db") + + return runSilo(path, depth, minCommits, jsonOutput, since, until, saveReport, dbPath) + }, +} + +func init() { + siloCmd.Flags().StringP("path", "p", "", "Repository path to analyze") + siloCmd.Flags().Int("depth", 2, "Directory depth for grouping (default 2)") + siloCmd.Flags().Int("min-commits", 5, "Minimum commits to include a directory") + siloCmd.Flags().Bool("json", false, "Output as JSON") + siloCmd.Flags().String("since", "", "Start date (RFC3339 or YYYY-MM-DD)") + siloCmd.Flags().String("until", "", "End date (RFC3339 or YYYY-MM-DD)") + siloCmd.Flags().Bool("save", false, "Save results to database") + siloCmd.Flags().String("db", "", "Database path (uses config if not set)") + rootCmd.AddCommand(siloCmd) +} + +func runSilo(path string, depth, minCommits int, jsonOutput bool, since, until string, saveReport bool, dbPath string) error { + logger := logging.Component("knowledge-silo") + + // Resolve path + absPath, err := filepath.Abs(path) + if err != nil { + return fmt.Errorf("resolving path: %w", err) + } + + if !analysis.RepositoryExists(absPath) { + return fmt.Errorf("not a git repository: %s", absPath) + } + + // Parse dates + var sinceTime, untilTime time.Time + if since != "" { + t, err := parseDate(since) + if err != nil { + return fmt.Errorf("parsing since date: %w", err) + } + sinceTime = t + } + if until != "" { + t, err := parseDate(until) + if err != nil { + return fmt.Errorf("parsing until date: %w", err) + } + untilTime = t + } + + // Parse git history by directory + parser := analysis.NewGitParser(absPath) + opts := analysis.SiloParseOptions{ + Since: sinceTime, + Until: untilTime, + Depth: depth, + MinCommits: minCommits, + } + + dirAuthors, err := parser.ParseAuthorsByDirectory(opts) + if err != nil { + return fmt.Errorf("parsing git history: %w", err) + } + + if len(dirAuthors) == 0 { + logger.Warnf("no directories with commits found in %s", absPath) + return nil + } + + // Calculate silo scores + entries := analysis.CalculateSilos(dirAuthors, minCommits) + + if len(entries) == 0 { + logger.Warnf("no directories met the minimum commit threshold (%d)", minCommits) + return nil + } + + // Generate report + gen := analysis.NewSiloReportGenerator() + report := gen.Generate(absPath, depth, entries) + + // Output results + if jsonOutput { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + return enc.Encode(report) + } + + markdown := gen.RenderMarkdown(report) + fmt.Println(markdown) + + // Save if requested + if saveReport { + if dbPath == "" { + cfg, err := config.Load() + if err != nil { + logger.Warnf("could not load config for db path: %v", err) + } else { + dbPath = cfg.ExpandedDBPath() + } + } + + if dbPath != "" { + database, err := db.Open(dbPath) + if err != nil { + logger.Errorf("opening database: %v", err) + } else { + defer func() { _ = database.Close() }() + + result := &analysis.SiloResult{ + Timestamp: time.Now(), + RepoPath: absPath, + Depth: depth, + Results: entries, + Summary: report, + } + + if err := result.Store(database.SQL()); err != nil { + logger.Errorf("storing result: %v", err) + } else { + logger.Infof("results saved (ID: %d)", result.ID) + } + } + } + } + + return nil +} diff --git a/internal/analysis/silo.go b/internal/analysis/silo.go new file mode 100644 index 0000000..b2b7c44 --- /dev/null +++ b/internal/analysis/silo.go @@ -0,0 +1,270 @@ +package analysis + +import ( + "fmt" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" +) + +// SiloEntry represents a directory's knowledge silo analysis. +type SiloEntry struct { + Directory string `json:"directory"` + TopContributors []CommitAuthor `json:"top_contributors"` + TotalCommits int `json:"total_commits"` + ContributorCount int `json:"contributor_count"` + SiloScore float64 `json:"silo_score"` // 0-1, 1 = max silo risk + RiskLevel string `json:"risk_level"` // critical, high, medium, low +} + +// SiloReport holds the full knowledge silo analysis results. +type SiloReport struct { + Timestamp time.Time `json:"timestamp"` + RepoPath string `json:"repo_path"` + Depth int `json:"depth"` + Entries []SiloEntry `json:"entries"` + TotalDirs int `json:"total_dirs"` + CriticalCount int `json:"critical_count"` + HighCount int `json:"high_count"` + Recommendations []string `json:"recommendations"` + ReportedAt string `json:"reported_at"` +} + +// SiloParseOptions defines filtering options for silo analysis. +type SiloParseOptions struct { + Since time.Time + Until time.Time + Depth int // directory depth to analyze (default 2) + MinCommits int // minimum commits to include a directory (default 5) +} + +// ParseAuthorsByDirectory extracts per-directory author contributions from git history. +// It runs 'git log --format=%an|%ae --name-only' and groups files by directory at the +// configured depth, returning a map of directory path to author contributions. +func (gp *GitParser) ParseAuthorsByDirectory(opts SiloParseOptions) (map[string][]CommitAuthor, error) { + if opts.Depth <= 0 { + opts.Depth = 2 + } + + args := []string{"log", "--format=COMMIT:%an|%ae", "--name-only"} + + if !opts.Since.IsZero() { + args = append(args, fmt.Sprintf("--since=%s", opts.Since.Format(time.RFC3339))) + } + if !opts.Until.IsZero() { + args = append(args, fmt.Sprintf("--until=%s", opts.Until.Format(time.RFC3339))) + } + + cmd := exec.Command("git", args...) + cmd.Dir = gp.repoPath + + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("running git log: %w", err) + } + + // Parse output: lines alternate between "COMMIT:name|email" headers and file paths + // dirAuthors maps directory -> email -> CommitAuthor + dirAuthors := make(map[string]map[string]*CommitAuthor) + var currentName, currentEmail string + + for _, line := range strings.Split(string(output), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + + if strings.HasPrefix(line, "COMMIT:") { + parts := strings.SplitN(line[7:], "|", 2) + if len(parts) == 2 { + currentName = parts[0] + currentEmail = parts[1] + } + continue + } + + // This is a file path — extract directory at configured depth + if currentEmail == "" { + continue + } + + dir := truncateToDepth(line, opts.Depth) + if dir == "" { + continue + } + + if dirAuthors[dir] == nil { + dirAuthors[dir] = make(map[string]*CommitAuthor) + } + + key := strings.ToLower(currentEmail) + if author, exists := dirAuthors[dir][key]; exists { + author.Commits++ + } else { + dirAuthors[dir][key] = &CommitAuthor{ + Name: currentName, + Email: currentEmail, + Commits: 1, + } + } + } + + // Convert nested maps to map[string][]CommitAuthor + result := make(map[string][]CommitAuthor, len(dirAuthors)) + for dir, authorMap := range dirAuthors { + authors := make([]CommitAuthor, 0, len(authorMap)) + for _, author := range authorMap { + authors = append(authors, *author) + } + // Sort by commits descending + sort.Slice(authors, func(i, j int) bool { + return authors[i].Commits > authors[j].Commits + }) + result[dir] = authors + } + + return result, nil +} + +// truncateToDepth returns the directory path truncated to the given depth. +// For depth=2, "internal/analysis/silo.go" returns "internal/analysis". +// Files at the root level return "." for depth >= 1. +func truncateToDepth(filePath string, depth int) string { + dir := filepath.Dir(filePath) + if dir == "." { + return "." + } + + parts := strings.Split(filepath.ToSlash(dir), "/") + if len(parts) > depth { + parts = parts[:depth] + } + + return strings.Join(parts, "/") +} + +// CalculateSilos computes silo scores for each directory based on author distributions. +func CalculateSilos(dirAuthors map[string][]CommitAuthor, minCommits int) []SiloEntry { + if minCommits <= 0 { + minCommits = 5 + } + + var entries []SiloEntry + + for dir, authors := range dirAuthors { + totalCommits := 0 + for _, a := range authors { + totalCommits += a.Commits + } + + // Skip directories with too few commits + if totalCommits < minCommits { + continue + } + + entry := SiloEntry{ + Directory: dir, + TotalCommits: totalCommits, + ContributorCount: len(authors), + } + + // Keep top 3 contributors for display + topN := 3 + if len(authors) < topN { + topN = len(authors) + } + entry.TopContributors = authors[:topN] + + // Calculate silo score: combine contributor count and commit concentration + entry.SiloScore = calculateSiloScore(authors, totalCommits) + entry.RiskLevel = assessSiloRisk(authors, totalCommits) + + entries = append(entries, entry) + } + + // Sort by silo score descending (worst silos first) + sort.Slice(entries, func(i, j int) bool { + return entries[i].SiloScore > entries[j].SiloScore + }) + + return entries +} + +// calculateSiloScore computes a 0-1 score where 1 = maximum silo risk. +// Uses inverse normalized contributor count weighted by commit concentration (Herfindahl). +func calculateSiloScore(authors []CommitAuthor, totalCommits int) float64 { + if len(authors) == 0 || totalCommits == 0 { + return 0 + } + + // Herfindahl index for commit concentration + hhi := 0.0 + for _, a := range authors { + share := float64(a.Commits) / float64(totalCommits) + hhi += share * share + } + + // For a single contributor, HHI = 1.0 which is the max silo + if len(authors) == 1 { + return 1.0 + } + + // Normalize HHI: remove baseline for n contributors + n := float64(len(authors)) + minHHI := 1.0 / n + normalizedHHI := (hhi - minHHI) / (1.0 - minHHI) + if normalizedHHI < 0 { + normalizedHHI = 0 + } + if normalizedHHI > 1 { + normalizedHHI = 1 + } + + // Weight: 60% concentration, 40% inverse contributor count + // Fewer contributors = higher silo risk + contributorFactor := 1.0 / n // 1 person = 1.0, 10 people = 0.1 + if contributorFactor > 1 { + contributorFactor = 1 + } + + score := 0.6*normalizedHHI + 0.4*contributorFactor + if score > 1 { + score = 1 + } + + return score +} + +// assessSiloRisk determines the risk level for a directory. +func assessSiloRisk(authors []CommitAuthor, totalCommits int) string { + if len(authors) == 0 || totalCommits == 0 { + return "unknown" + } + + // Single contributor = critical + if len(authors) <= 1 { + return "critical" + } + + // Top contributor percentage + top1Pct := float64(authors[0].Commits) / float64(totalCommits) + + // Critical: top contributor owns > 80% + if top1Pct > 0.8 { + return "critical" + } + + // High: top contributor owns > 60% or only 2 contributors + if top1Pct > 0.6 || len(authors) <= 2 { + return "high" + } + + // Medium: top contributor owns > 40% or 3 or fewer contributors + if top1Pct > 0.4 || len(authors) <= 3 { + return "medium" + } + + return "low" +} diff --git a/internal/analysis/silo_db.go b/internal/analysis/silo_db.go new file mode 100644 index 0000000..6de700a --- /dev/null +++ b/internal/analysis/silo_db.go @@ -0,0 +1,104 @@ +package analysis + +import ( + "database/sql" + "encoding/json" + "fmt" + "time" +) + +// SiloResult represents a stored knowledge silo analysis result. +type SiloResult struct { + ID int64 `json:"id"` + Timestamp time.Time `json:"timestamp"` + RepoPath string `json:"repo_path"` + Depth int `json:"depth"` + Results []SiloEntry `json:"results"` + Summary *SiloReport `json:"summary"` +} + +// Store saves a silo analysis result to the database. +func (sr *SiloResult) Store(db *sql.DB) error { + if db == nil { + return fmt.Errorf("database is nil") + } + + resultsJSON, err := json.Marshal(sr.Results) + if err != nil { + return fmt.Errorf("marshaling results: %w", err) + } + + summaryJSON, err := json.Marshal(sr.Summary) + if err != nil { + return fmt.Errorf("marshaling summary: %w", err) + } + + query := ` + INSERT INTO knowledge_silo_results (timestamp, repo_path, depth, results, summary) + VALUES (?, ?, ?, ?, ?) + ` + + res, err := db.Exec(query, + sr.Timestamp, + sr.RepoPath, + sr.Depth, + string(resultsJSON), + string(summaryJSON), + ) + if err != nil { + return fmt.Errorf("inserting silo result: %w", err) + } + + id, err := res.LastInsertId() + if err != nil { + return fmt.Errorf("getting insert id: %w", err) + } + sr.ID = id + + return nil +} + +// LoadLatestSilo loads the most recent silo analysis result for a repo. +func LoadLatestSilo(db *sql.DB, repoPath string) (*SiloResult, error) { + if db == nil { + return nil, fmt.Errorf("database is nil") + } + + query := ` + SELECT id, timestamp, repo_path, depth, results, summary + FROM knowledge_silo_results + WHERE repo_path = ? + ORDER BY timestamp DESC + LIMIT 1 + ` + + row := db.QueryRow(query, repoPath) + + result := &SiloResult{} + var resultsJSON, summaryJSON string + + err := row.Scan( + &result.ID, + &result.Timestamp, + &result.RepoPath, + &result.Depth, + &resultsJSON, + &summaryJSON, + ) + if err != nil { + if err == sql.ErrNoRows { + return nil, nil + } + return nil, fmt.Errorf("querying silo result: %w", err) + } + + if err := json.Unmarshal([]byte(resultsJSON), &result.Results); err != nil { + return nil, fmt.Errorf("unmarshaling results: %w", err) + } + + if err := json.Unmarshal([]byte(summaryJSON), &result.Summary); err != nil { + return nil, fmt.Errorf("unmarshaling summary: %w", err) + } + + return result, nil +} diff --git a/internal/analysis/silo_report.go b/internal/analysis/silo_report.go new file mode 100644 index 0000000..c274b05 --- /dev/null +++ b/internal/analysis/silo_report.go @@ -0,0 +1,153 @@ +package analysis + +import ( + "bytes" + "fmt" + "time" +) + +// SiloReportGenerator creates formatted silo reports. +type SiloReportGenerator struct{} + +// NewSiloReportGenerator creates a new silo report generator. +func NewSiloReportGenerator() *SiloReportGenerator { + return &SiloReportGenerator{} +} + +// Generate creates a SiloReport from silo entries. +func (sg *SiloReportGenerator) Generate(repoPath string, depth int, entries []SiloEntry) *SiloReport { + report := &SiloReport{ + Timestamp: time.Now(), + RepoPath: repoPath, + Depth: depth, + Entries: entries, + TotalDirs: len(entries), + ReportedAt: time.Now().Format("2006-01-02 15:04:05"), + } + + for _, e := range entries { + switch e.RiskLevel { + case "critical": + report.CriticalCount++ + case "high": + report.HighCount++ + } + } + + report.Recommendations = sg.generateRecommendations(entries) + return report +} + +// generateRecommendations creates action items based on silo analysis. +func (sg *SiloReportGenerator) generateRecommendations(entries []SiloEntry) []string { + var recs []string + + var criticalDirs, highDirs []string + for _, e := range entries { + switch e.RiskLevel { + case "critical": + criticalDirs = append(criticalDirs, e.Directory) + case "high": + highDirs = append(highDirs, e.Directory) + } + } + + if len(criticalDirs) > 0 { + recs = append(recs, fmt.Sprintf("CRITICAL: %d directories have single-person knowledge silos. Prioritize knowledge transfer immediately.", len(criticalDirs))) + for _, dir := range criticalDirs { + for _, e := range entries { + if e.Directory == dir && len(e.TopContributors) > 0 { + recs = append(recs, fmt.Sprintf(" - %s: dominated by %s (%d commits). Schedule pairing sessions.", dir, e.TopContributors[0].Name, e.TopContributors[0].Commits)) + break + } + } + } + } + + if len(highDirs) > 0 { + recs = append(recs, fmt.Sprintf("HIGH RISK: %d directories have limited contributor diversity. Encourage cross-team contributions.", len(highDirs))) + } + + if len(criticalDirs) == 0 && len(highDirs) == 0 { + recs = append(recs, "GOOD: No critical knowledge silos detected. Maintain current collaboration practices.") + } + + if len(entries) > 0 { + recs = append(recs, "Consider rotating code review assignments to spread knowledge across more team members.") + recs = append(recs, "Document architectural decisions in high-risk directories to reduce person-dependent knowledge.") + } + + return recs +} + +// RenderMarkdown generates a markdown representation of the silo report. +func (sg *SiloReportGenerator) RenderMarkdown(report *SiloReport) string { + var buf bytes.Buffer + + // Header + fmt.Fprintf(&buf, "# Knowledge Silo Analysis\n\n") + fmt.Fprintf(&buf, "*Generated: %s*\n\n", report.ReportedAt) + + // Summary + buf.WriteString("## Summary\n\n") + buf.WriteString("| Metric | Value |\n") + buf.WriteString("|--------|-------|\n") + fmt.Fprintf(&buf, "| Directories Analyzed | %d |\n", report.TotalDirs) + fmt.Fprintf(&buf, "| Critical Silos | %d |\n", report.CriticalCount) + fmt.Fprintf(&buf, "| High Risk Silos | %d |\n", report.HighCount) + fmt.Fprintf(&buf, "| Analysis Depth | %d |\n\n", report.Depth) + + // Silo table + if len(report.Entries) > 0 { + buf.WriteString("## Directory Silo Risk\n\n") + buf.WriteString("| Directory | Top Contributor | Silo Score | Contributors | Commits | Risk |\n") + buf.WriteString("|-----------|----------------|------------|--------------|---------|------|\n") + + for _, entry := range report.Entries { + topName := "-" + topPct := 0.0 + if len(entry.TopContributors) > 0 { + topName = entry.TopContributors[0].Name + if entry.TotalCommits > 0 { + topPct = float64(entry.TopContributors[0].Commits) * 100 / float64(entry.TotalCommits) + } + } + + fmt.Fprintf(&buf, "| %s | %s (%.0f%%) | %.2f | %d | %d | **%s** |\n", + entry.Directory, + topName, + topPct, + entry.SiloScore, + entry.ContributorCount, + entry.TotalCommits, + entry.RiskLevel, + ) + } + buf.WriteString("\n") + } + + // Recommendations + if len(report.Recommendations) > 0 { + buf.WriteString("## Recommendations\n\n") + for _, rec := range report.Recommendations { + if len(rec) > 0 && (rec[0] == 'C' || rec[0] == 'H' || rec[0] == 'G') && + (bytes.HasPrefix([]byte(rec), []byte("CRITICAL")) || + bytes.HasPrefix([]byte(rec), []byte("HIGH")) || + bytes.HasPrefix([]byte(rec), []byte("GOOD"))) { + fmt.Fprintf(&buf, "**%s**\n\n", rec) + } else { + fmt.Fprintf(&buf, "- %s\n", rec) + } + } + buf.WriteString("\n") + } + + // Risk explanation + buf.WriteString("## Understanding Silo Risk\n\n") + buf.WriteString("- **Critical**: Single person owns >80% of commits, or only one contributor.\n") + buf.WriteString("- **High**: Top contributor owns >60%, or only two contributors.\n") + buf.WriteString("- **Medium**: Top contributor owns >40%, or three or fewer contributors.\n") + buf.WriteString("- **Low**: Knowledge is well-distributed across multiple contributors.\n") + + return buf.String() +} diff --git a/internal/analysis/silo_test.go b/internal/analysis/silo_test.go new file mode 100644 index 0000000..2c82892 --- /dev/null +++ b/internal/analysis/silo_test.go @@ -0,0 +1,368 @@ +package analysis + +import ( + "strings" + "testing" +) + +func TestTruncateToDepth(t *testing.T) { + tests := []struct { + path string + depth int + want string + }{ + {"internal/analysis/silo.go", 2, "internal/analysis"}, + {"internal/analysis/silo.go", 1, "internal"}, + {"cmd/nightshift/commands/silo.go", 2, "cmd/nightshift"}, + {"cmd/nightshift/commands/silo.go", 3, "cmd/nightshift/commands"}, + {"README.md", 2, "."}, + {"pkg/foo.go", 1, "pkg"}, + {"a/b/c/d/e.go", 2, "a/b"}, + } + + for _, tt := range tests { + got := truncateToDepth(tt.path, tt.depth) + if got != tt.want { + t.Errorf("truncateToDepth(%q, %d) = %q, want %q", tt.path, tt.depth, got, tt.want) + } + } +} + +func TestCalculateSilosEmpty(t *testing.T) { + entries := CalculateSilos(nil, 5) + if len(entries) != 0 { + t.Errorf("expected 0 entries for nil input, got %d", len(entries)) + } +} + +func TestCalculateSilosSingleOwner(t *testing.T) { + dirAuthors := map[string][]CommitAuthor{ + "internal/core": { + {Name: "Alice", Email: "alice@example.com", Commits: 50}, + }, + } + + entries := CalculateSilos(dirAuthors, 1) + if len(entries) != 1 { + t.Fatalf("expected 1 entry, got %d", len(entries)) + } + + e := entries[0] + if e.Directory != "internal/core" { + t.Errorf("expected directory 'internal/core', got %q", e.Directory) + } + if e.RiskLevel != "critical" { + t.Errorf("single owner should be critical risk, got %q", e.RiskLevel) + } + if e.SiloScore < 0.99 { + t.Errorf("single owner should have silo score ~1.0, got %.2f", e.SiloScore) + } + if e.ContributorCount != 1 { + t.Errorf("expected 1 contributor, got %d", e.ContributorCount) + } +} + +func TestCalculateSilosWellDistributed(t *testing.T) { + dirAuthors := map[string][]CommitAuthor{ + "pkg/shared": { + {Name: "A", Email: "a@example.com", Commits: 20}, + {Name: "B", Email: "b@example.com", Commits: 20}, + {Name: "C", Email: "c@example.com", Commits: 20}, + {Name: "D", Email: "d@example.com", Commits: 20}, + {Name: "E", Email: "e@example.com", Commits: 20}, + }, + } + + entries := CalculateSilos(dirAuthors, 5) + if len(entries) != 1 { + t.Fatalf("expected 1 entry, got %d", len(entries)) + } + + e := entries[0] + if e.RiskLevel != "low" { + t.Errorf("well-distributed should be low risk, got %q", e.RiskLevel) + } + if e.SiloScore > 0.3 { + t.Errorf("well-distributed should have low silo score, got %.2f", e.SiloScore) + } +} + +func TestCalculateSilosMinCommitsFilter(t *testing.T) { + dirAuthors := map[string][]CommitAuthor{ + "internal/core": { + {Name: "Alice", Email: "alice@example.com", Commits: 50}, + }, + "docs": { + {Name: "Bob", Email: "bob@example.com", Commits: 2}, + }, + } + + entries := CalculateSilos(dirAuthors, 5) + if len(entries) != 1 { + t.Errorf("expected 1 entry (docs should be filtered), got %d", len(entries)) + } + if len(entries) > 0 && entries[0].Directory != "internal/core" { + t.Errorf("expected internal/core entry, got %q", entries[0].Directory) + } +} + +func TestCalculateSilosSortedBySeverity(t *testing.T) { + dirAuthors := map[string][]CommitAuthor{ + "pkg/shared": { + {Name: "A", Email: "a@example.com", Commits: 10}, + {Name: "B", Email: "b@example.com", Commits: 10}, + {Name: "C", Email: "c@example.com", Commits: 10}, + {Name: "D", Email: "d@example.com", Commits: 10}, + {Name: "E", Email: "e@example.com", Commits: 10}, + }, + "internal/core": { + {Name: "Alice", Email: "alice@example.com", Commits: 50}, + }, + } + + entries := CalculateSilos(dirAuthors, 5) + if len(entries) != 2 { + t.Fatalf("expected 2 entries, got %d", len(entries)) + } + + // Worst silo should come first + if entries[0].Directory != "internal/core" { + t.Errorf("expected single-owner dir first, got %q", entries[0].Directory) + } + if entries[0].SiloScore <= entries[1].SiloScore { + t.Errorf("entries should be sorted by silo score descending: %.2f <= %.2f", + entries[0].SiloScore, entries[1].SiloScore) + } +} + +func TestAssessSiloRisk(t *testing.T) { + tests := []struct { + name string + authors []CommitAuthor + total int + want string + }{ + { + name: "empty", + authors: nil, + total: 0, + want: "unknown", + }, + { + name: "single contributor", + authors: []CommitAuthor{{Commits: 100}}, + total: 100, + want: "critical", + }, + { + name: "dominant >80%", + authors: []CommitAuthor{ + {Commits: 85}, + {Commits: 15}, + }, + total: 100, + want: "critical", + }, + { + name: "dominant >60%", + authors: []CommitAuthor{ + {Commits: 70}, + {Commits: 20}, + {Commits: 10}, + }, + total: 100, + want: "high", + }, + { + name: "two contributors only", + authors: []CommitAuthor{ + {Commits: 50}, + {Commits: 50}, + }, + total: 100, + want: "high", + }, + { + name: "moderate concentration", + authors: []CommitAuthor{ + {Commits: 45}, + {Commits: 25}, + {Commits: 20}, + {Commits: 10}, + }, + total: 100, + want: "medium", + }, + { + name: "well distributed", + authors: []CommitAuthor{ + {Commits: 25}, + {Commits: 25}, + {Commits: 25}, + {Commits: 15}, + {Commits: 10}, + }, + total: 100, + want: "low", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := assessSiloRisk(tt.authors, tt.total) + if got != tt.want { + t.Errorf("assessSiloRisk() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestCalculateSiloScore(t *testing.T) { + // Single contributor should be 1.0 + score1 := calculateSiloScore([]CommitAuthor{{Commits: 100}}, 100) + if score1 < 0.99 { + t.Errorf("single contributor should have score ~1.0, got %.2f", score1) + } + + // Even distribution among many should be low + many := []CommitAuthor{ + {Commits: 20}, {Commits: 20}, {Commits: 20}, {Commits: 20}, {Commits: 20}, + } + scoreLow := calculateSiloScore(many, 100) + if scoreLow > 0.3 { + t.Errorf("even distribution should have low score, got %.2f", scoreLow) + } + + // Empty should be 0 + score0 := calculateSiloScore(nil, 0) + if score0 != 0 { + t.Errorf("empty should be 0, got %.2f", score0) + } +} + +func TestSiloReportGenerate(t *testing.T) { + entries := []SiloEntry{ + { + Directory: "internal/core", + TopContributors: []CommitAuthor{{Name: "Alice", Email: "alice@example.com", Commits: 50}}, + TotalCommits: 50, + ContributorCount: 1, + SiloScore: 1.0, + RiskLevel: "critical", + }, + } + + gen := NewSiloReportGenerator() + report := gen.Generate("/repo", 2, entries) + + if report.TotalDirs != 1 { + t.Errorf("expected 1 total dir, got %d", report.TotalDirs) + } + if report.CriticalCount != 1 { + t.Errorf("expected 1 critical, got %d", report.CriticalCount) + } + if len(report.Recommendations) == 0 { + t.Errorf("expected recommendations") + } + if report.Timestamp.IsZero() { + t.Errorf("timestamp should not be zero") + } +} + +func TestSiloReportRenderMarkdown(t *testing.T) { + entries := []SiloEntry{ + { + Directory: "internal/core", + TopContributors: []CommitAuthor{{Name: "Alice", Email: "alice@example.com", Commits: 50}}, + TotalCommits: 50, + ContributorCount: 1, + SiloScore: 1.0, + RiskLevel: "critical", + }, + { + Directory: "pkg/shared", + TopContributors: []CommitAuthor{ + {Name: "A", Email: "a@example.com", Commits: 10}, + {Name: "B", Email: "b@example.com", Commits: 10}, + }, + TotalCommits: 50, + ContributorCount: 5, + SiloScore: 0.2, + RiskLevel: "low", + }, + } + + gen := NewSiloReportGenerator() + report := gen.Generate("/repo", 2, entries) + markdown := gen.RenderMarkdown(report) + + if !strings.Contains(markdown, "Knowledge Silo Analysis") { + t.Errorf("markdown should contain title") + } + if !strings.Contains(markdown, "Directory Silo Risk") { + t.Errorf("markdown should contain silo risk table") + } + if !strings.Contains(markdown, "internal/core") { + t.Errorf("markdown should contain directory name") + } + if !strings.Contains(markdown, "Alice") { + t.Errorf("markdown should contain contributor name") + } + if !strings.Contains(markdown, "Recommendations") { + t.Errorf("markdown should contain recommendations") + } + if !strings.Contains(markdown, "CRITICAL") { + t.Errorf("markdown should contain critical recommendation") + } +} + +func TestSiloReportNoSilos(t *testing.T) { + entries := []SiloEntry{ + { + Directory: "pkg/shared", + TopContributors: []CommitAuthor{ + {Name: "A", Email: "a@example.com", Commits: 20}, + }, + TotalCommits: 100, + ContributorCount: 5, + SiloScore: 0.2, + RiskLevel: "low", + }, + } + + gen := NewSiloReportGenerator() + report := gen.Generate("/repo", 2, entries) + + foundGood := false + for _, rec := range report.Recommendations { + if strings.Contains(rec, "GOOD") { + foundGood = true + break + } + } + if !foundGood { + t.Errorf("expected GOOD recommendation when no critical/high silos") + } +} + +func TestSiloEntryTopContributorsCapped(t *testing.T) { + dirAuthors := map[string][]CommitAuthor{ + "internal/big": { + {Name: "A", Email: "a@example.com", Commits: 50}, + {Name: "B", Email: "b@example.com", Commits: 30}, + {Name: "C", Email: "c@example.com", Commits: 20}, + {Name: "D", Email: "d@example.com", Commits: 10}, + {Name: "E", Email: "e@example.com", Commits: 5}, + }, + } + + entries := CalculateSilos(dirAuthors, 1) + if len(entries) != 1 { + t.Fatalf("expected 1 entry, got %d", len(entries)) + } + + // TopContributors should be capped at 3 + if len(entries[0].TopContributors) != 3 { + t.Errorf("expected 3 top contributors, got %d", len(entries[0].TopContributors)) + } +} diff --git a/internal/db/migrations.go b/internal/db/migrations.go index 3b7d11e..7089cf2 100644 --- a/internal/db/migrations.go +++ b/internal/db/migrations.go @@ -40,6 +40,11 @@ var migrations = []Migration{ Description: "add branch column to run_history", SQL: migration005SQL, }, + { + Version: 6, + Description: "add knowledge_silo_results table for silo analysis", + SQL: migration006SQL, + }, } const migration002SQL = ` @@ -121,6 +126,19 @@ const migration005SQL = ` ALTER TABLE run_history ADD COLUMN branch TEXT NOT NULL DEFAULT ''; ` +const migration006SQL = ` +CREATE TABLE IF NOT EXISTS knowledge_silo_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp DATETIME NOT NULL, + repo_path TEXT NOT NULL, + depth INTEGER NOT NULL, + results TEXT NOT NULL, + summary TEXT NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_silo_repo_time ON knowledge_silo_results(repo_path, timestamp DESC); +` + // Migrate runs all pending migrations inside transactions. func Migrate(db *sql.DB) error { if db == nil {