Skip to content

Commit eaddc66

Browse files
marcusclaude
andcommitted
feat: add knowledge-silo command to detect team knowledge silos
Analyzes git history per directory to identify areas where only one or two people have contributed. Computes silo scores using commit concentration metrics, ranks directories by risk, and outputs a markdown report with recommendations for knowledge transfer. Nightshift-Task: knowledge-silo Nightshift-Ref: https://github.com/marcus/nightshift Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 68bbf11 commit eaddc66

File tree

6 files changed

+1068
-0
lines changed

6 files changed

+1068
-0
lines changed

cmd/nightshift/commands/silo.go

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
package commands
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"os"
7+
"path/filepath"
8+
"time"
9+
10+
"github.com/spf13/cobra"
11+
12+
"github.com/marcus/nightshift/internal/analysis"
13+
"github.com/marcus/nightshift/internal/config"
14+
"github.com/marcus/nightshift/internal/db"
15+
"github.com/marcus/nightshift/internal/logging"
16+
)
17+
18+
var knowledgeSiloCmd = &cobra.Command{
19+
Use: "knowledge-silo [path]",
20+
Short: "Detect knowledge silos in a repository",
21+
Long: `Analyze git history per directory to identify knowledge silos —
22+
areas of code where only one or two people have contributed.
23+
24+
Each directory is scored by contributor concentration. Directories where a
25+
single person owns most of the commits are flagged as knowledge silos,
26+
ranked by severity.
27+
28+
Metrics:
29+
- Silo Score: 0-1 concentration score (1 = complete silo)
30+
- Risk Level: critical/high/medium/low per directory
31+
- Top Contributors: dominant author(s) per directory`,
32+
RunE: func(cmd *cobra.Command, args []string) error {
33+
path, err := cmd.Flags().GetString("path")
34+
if err != nil {
35+
return err
36+
}
37+
38+
if path == "" && len(args) > 0 {
39+
path = args[0]
40+
}
41+
if path == "" {
42+
var err error
43+
path, err = os.Getwd()
44+
if err != nil {
45+
return fmt.Errorf("getting current directory: %w", err)
46+
}
47+
}
48+
49+
jsonOutput, _ := cmd.Flags().GetBool("json")
50+
since, _ := cmd.Flags().GetString("since")
51+
until, _ := cmd.Flags().GetString("until")
52+
depth, _ := cmd.Flags().GetInt("depth")
53+
minCommits, _ := cmd.Flags().GetInt("min-commits")
54+
saveReport, _ := cmd.Flags().GetBool("save")
55+
dbPath, _ := cmd.Flags().GetString("db")
56+
57+
return runKnowledgeSilo(path, jsonOutput, since, until, depth, minCommits, saveReport, dbPath)
58+
},
59+
}
60+
61+
func init() {
62+
knowledgeSiloCmd.Flags().StringP("path", "p", "", "Repository path")
63+
knowledgeSiloCmd.Flags().Bool("json", false, "Output as JSON")
64+
knowledgeSiloCmd.Flags().String("since", "", "Start date (RFC3339 or YYYY-MM-DD)")
65+
knowledgeSiloCmd.Flags().String("until", "", "End date (RFC3339 or YYYY-MM-DD)")
66+
knowledgeSiloCmd.Flags().Int("depth", 2, "Directory depth for grouping")
67+
knowledgeSiloCmd.Flags().Int("min-commits", 5, "Minimum commits to include a directory")
68+
knowledgeSiloCmd.Flags().Bool("save", false, "Save results to database")
69+
knowledgeSiloCmd.Flags().String("db", "", "Database path (uses config if not set)")
70+
rootCmd.AddCommand(knowledgeSiloCmd)
71+
}
72+
73+
func runKnowledgeSilo(path string, jsonOutput bool, since, until string, depth, minCommits int, saveReport bool, dbPath string) error {
74+
logger := logging.Component("knowledge-silo")
75+
76+
absPath, err := filepath.Abs(path)
77+
if err != nil {
78+
return fmt.Errorf("resolving path: %w", err)
79+
}
80+
81+
if !analysis.RepositoryExists(absPath) {
82+
return fmt.Errorf("not a git repository: %s", absPath)
83+
}
84+
85+
var sinceTime, untilTime time.Time
86+
if since != "" {
87+
t, err := parseDate(since)
88+
if err != nil {
89+
return fmt.Errorf("parsing since date: %w", err)
90+
}
91+
sinceTime = t
92+
}
93+
if until != "" {
94+
t, err := parseDate(until)
95+
if err != nil {
96+
return fmt.Errorf("parsing until date: %w", err)
97+
}
98+
untilTime = t
99+
}
100+
101+
parser := analysis.NewGitParser(absPath)
102+
opts := analysis.ParseOptions{
103+
Since: sinceTime,
104+
Until: untilTime,
105+
}
106+
107+
dirAuthors, err := parser.ParseAuthorsByDirectory(opts, depth)
108+
if err != nil {
109+
return fmt.Errorf("parsing git history: %w", err)
110+
}
111+
112+
if len(dirAuthors) == 0 {
113+
logger.Warnf("no commits found in %s", absPath)
114+
return nil
115+
}
116+
117+
entries := analysis.CalculateSilos(dirAuthors, minCommits)
118+
119+
gen := analysis.NewSiloReportGenerator()
120+
report := gen.Generate(absPath, depth, entries)
121+
122+
if jsonOutput {
123+
enc := json.NewEncoder(os.Stdout)
124+
enc.SetIndent("", " ")
125+
return enc.Encode(report)
126+
}
127+
128+
markdown := gen.RenderMarkdown(report)
129+
fmt.Println(markdown)
130+
131+
if saveReport {
132+
if dbPath == "" {
133+
cfg, err := config.Load()
134+
if err != nil {
135+
logger.Warnf("could not load config for db path: %v", err)
136+
} else {
137+
dbPath = cfg.ExpandedDBPath()
138+
}
139+
}
140+
141+
if dbPath != "" {
142+
database, err := db.Open(dbPath)
143+
if err != nil {
144+
logger.Errorf("opening database: %v", err)
145+
} else {
146+
defer func() { _ = database.Close() }()
147+
148+
result := &analysis.SiloResult{
149+
Timestamp: time.Now(),
150+
RepoPath: absPath,
151+
Depth: depth,
152+
Results: report,
153+
}
154+
155+
if err := result.Store(database.SQL()); err != nil {
156+
logger.Errorf("storing result: %v", err)
157+
} else {
158+
logger.Infof("results saved (ID: %d)", result.ID)
159+
}
160+
}
161+
}
162+
}
163+
164+
return nil
165+
}

internal/analysis/silo.go

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
package analysis
2+
3+
import (
4+
"fmt"
5+
"os/exec"
6+
"path/filepath"
7+
"sort"
8+
"strings"
9+
)
10+
11+
// SiloEntry represents a single directory's knowledge silo metrics.
12+
type SiloEntry struct {
13+
Directory string `json:"directory"`
14+
TopAuthors []CommitAuthor `json:"top_authors"`
15+
TotalCommits int `json:"total_commits"`
16+
Contributors int `json:"contributors"`
17+
SiloScore float64 `json:"silo_score"` // 0-1, 1 = max silo risk
18+
RiskLevel string `json:"risk_level"` // critical, high, medium, low
19+
}
20+
21+
// SiloReport holds the full silo analysis results.
22+
type SiloReport struct {
23+
Entries []SiloEntry `json:"entries"`
24+
TotalDirs int `json:"total_dirs"`
25+
CriticalSilos int `json:"critical_silos"`
26+
HighSilos int `json:"high_silos"`
27+
MediumSilos int `json:"medium_silos"`
28+
LowSilos int `json:"low_silos"`
29+
OverallRisk string `json:"overall_risk"`
30+
RepoPath string `json:"repo_path"`
31+
Depth int `json:"depth"`
32+
}
33+
34+
// ParseAuthorsByDirectory runs git log to build a map of directory -> []CommitAuthor
35+
// at the given depth level.
36+
func (gp *GitParser) ParseAuthorsByDirectory(opts ParseOptions, depth int) (map[string][]CommitAuthor, error) {
37+
args := []string{"log", "--format=COMMIT:%an|%ae", "--name-only"}
38+
39+
if !opts.Since.IsZero() {
40+
args = append(args, fmt.Sprintf("--since=%s", opts.Since.Format("2006-01-02T15:04:05Z07:00")))
41+
}
42+
if !opts.Until.IsZero() {
43+
args = append(args, fmt.Sprintf("--until=%s", opts.Until.Format("2006-01-02T15:04:05Z07:00")))
44+
}
45+
46+
cmd := exec.Command("git", args...)
47+
cmd.Dir = gp.repoPath
48+
49+
output, err := cmd.Output()
50+
if err != nil {
51+
return nil, fmt.Errorf("running git log: %w", err)
52+
}
53+
54+
// Parse output: lines alternate between COMMIT:author|email and file paths
55+
// Each commit block starts with COMMIT: line followed by file paths
56+
dirAuthors := make(map[string]map[string]*CommitAuthor) // dir -> email -> author
57+
var currentName, currentEmail string
58+
59+
for line := range strings.SplitSeq(string(output), "\n") {
60+
line = strings.TrimSpace(line)
61+
if line == "" {
62+
continue
63+
}
64+
65+
if after, ok := strings.CutPrefix(line, "COMMIT:"); ok {
66+
parts := strings.SplitN(after, "|", 2)
67+
if len(parts) == 2 {
68+
currentName = parts[0]
69+
currentEmail = parts[1]
70+
}
71+
continue
72+
}
73+
74+
// This is a file path — extract directory at the configured depth
75+
if currentEmail == "" {
76+
continue
77+
}
78+
79+
dir := truncateDir(line, depth)
80+
if dir == "" {
81+
continue
82+
}
83+
84+
if dirAuthors[dir] == nil {
85+
dirAuthors[dir] = make(map[string]*CommitAuthor)
86+
}
87+
88+
key := strings.ToLower(currentEmail)
89+
if author, exists := dirAuthors[dir][key]; exists {
90+
author.Commits++
91+
} else {
92+
dirAuthors[dir][key] = &CommitAuthor{
93+
Name: currentName,
94+
Email: currentEmail,
95+
Commits: 1,
96+
}
97+
}
98+
}
99+
100+
// Convert to map[string][]CommitAuthor
101+
result := make(map[string][]CommitAuthor, len(dirAuthors))
102+
for dir, authorMap := range dirAuthors {
103+
authors := make([]CommitAuthor, 0, len(authorMap))
104+
for _, a := range authorMap {
105+
authors = append(authors, *a)
106+
}
107+
// Sort by commits descending
108+
sort.Slice(authors, func(i, j int) bool {
109+
return authors[i].Commits > authors[j].Commits
110+
})
111+
result[dir] = authors
112+
}
113+
114+
return result, nil
115+
}
116+
117+
// truncateDir extracts the directory path truncated to the given depth.
118+
// For depth=2, "internal/analysis/silo.go" -> "internal/analysis"
119+
func truncateDir(filePath string, depth int) string {
120+
dir := filepath.Dir(filePath)
121+
if dir == "." {
122+
return "."
123+
}
124+
125+
parts := strings.Split(filepath.ToSlash(dir), "/")
126+
if len(parts) > depth {
127+
parts = parts[:depth]
128+
}
129+
130+
return strings.Join(parts, "/")
131+
}
132+
133+
// CalculateSilos computes silo metrics for each directory.
134+
// minCommits filters out directories with fewer commits than the threshold.
135+
func CalculateSilos(dirAuthors map[string][]CommitAuthor, minCommits int) []SiloEntry {
136+
var entries []SiloEntry
137+
138+
for dir, authors := range dirAuthors {
139+
totalCommits := 0
140+
for _, a := range authors {
141+
totalCommits += a.Commits
142+
}
143+
144+
if totalCommits < minCommits {
145+
continue
146+
}
147+
148+
score := calculateSiloScore(authors, totalCommits)
149+
risk := assessSiloRisk(authors, totalCommits, score)
150+
151+
// Keep top 3 authors max
152+
topAuthors := authors
153+
if len(topAuthors) > 3 {
154+
topAuthors = topAuthors[:3]
155+
}
156+
157+
entries = append(entries, SiloEntry{
158+
Directory: dir,
159+
TopAuthors: topAuthors,
160+
TotalCommits: totalCommits,
161+
Contributors: len(authors),
162+
SiloScore: score,
163+
RiskLevel: risk,
164+
})
165+
}
166+
167+
// Sort by silo score descending (worst silos first)
168+
sort.Slice(entries, func(i, j int) bool {
169+
return entries[i].SiloScore > entries[j].SiloScore
170+
})
171+
172+
return entries
173+
}
174+
175+
// calculateSiloScore computes a 0-1 score where 1 = complete knowledge silo.
176+
// Uses inverse normalized contributor count weighted by commit concentration.
177+
func calculateSiloScore(authors []CommitAuthor, totalCommits int) float64 {
178+
if len(authors) == 0 || totalCommits == 0 {
179+
return 0
180+
}
181+
182+
if len(authors) == 1 {
183+
return 1.0
184+
}
185+
186+
// Herfindahl-style concentration
187+
hhi := 0.0
188+
for _, a := range authors {
189+
share := float64(a.Commits) / float64(totalCommits)
190+
hhi += share * share
191+
}
192+
193+
// Top contributor share
194+
topShare := float64(authors[0].Commits) / float64(totalCommits)
195+
196+
// Combine: weight HHI (concentration) and top-contributor dominance
197+
// Both range 0-1; average gives balanced silo score
198+
score := (hhi + topShare) / 2.0
199+
200+
return score
201+
}
202+
203+
// assessSiloRisk classifies a directory's silo risk level.
204+
func assessSiloRisk(authors []CommitAuthor, totalCommits int, siloScore float64) string {
205+
if len(authors) == 0 || totalCommits == 0 {
206+
return "low"
207+
}
208+
209+
topShare := float64(authors[0].Commits) / float64(totalCommits)
210+
211+
// Critical: single contributor or top contributor >80%
212+
if len(authors) <= 1 || topShare > 0.80 {
213+
return "critical"
214+
}
215+
216+
// High: top contributor >60% or only 2 contributors
217+
if topShare > 0.60 || len(authors) <= 2 {
218+
return "high"
219+
}
220+
221+
// Medium: silo score > 0.4 or 3 contributors
222+
if siloScore > 0.4 || len(authors) <= 3 {
223+
return "medium"
224+
}
225+
226+
return "low"
227+
}

0 commit comments

Comments
 (0)