|
| 1 | +package analysis |
| 2 | + |
| 3 | +import ( |
| 4 | + "fmt" |
| 5 | + "os/exec" |
| 6 | + "path/filepath" |
| 7 | + "sort" |
| 8 | + "strings" |
| 9 | +) |
| 10 | + |
| 11 | +// SiloEntry represents a single directory's knowledge silo metrics. |
| 12 | +type SiloEntry struct { |
| 13 | + Directory string `json:"directory"` |
| 14 | + TopAuthors []CommitAuthor `json:"top_authors"` |
| 15 | + TotalCommits int `json:"total_commits"` |
| 16 | + Contributors int `json:"contributors"` |
| 17 | + SiloScore float64 `json:"silo_score"` // 0-1, 1 = max silo risk |
| 18 | + RiskLevel string `json:"risk_level"` // critical, high, medium, low |
| 19 | +} |
| 20 | + |
| 21 | +// SiloReport holds the full silo analysis results. |
| 22 | +type SiloReport struct { |
| 23 | + Entries []SiloEntry `json:"entries"` |
| 24 | + TotalDirs int `json:"total_dirs"` |
| 25 | + CriticalSilos int `json:"critical_silos"` |
| 26 | + HighSilos int `json:"high_silos"` |
| 27 | + MediumSilos int `json:"medium_silos"` |
| 28 | + LowSilos int `json:"low_silos"` |
| 29 | + OverallRisk string `json:"overall_risk"` |
| 30 | + RepoPath string `json:"repo_path"` |
| 31 | + Depth int `json:"depth"` |
| 32 | +} |
| 33 | + |
| 34 | +// ParseAuthorsByDirectory runs git log to build a map of directory -> []CommitAuthor |
| 35 | +// at the given depth level. |
| 36 | +func (gp *GitParser) ParseAuthorsByDirectory(opts ParseOptions, depth int) (map[string][]CommitAuthor, error) { |
| 37 | + args := []string{"log", "--format=COMMIT:%an|%ae", "--name-only"} |
| 38 | + |
| 39 | + if !opts.Since.IsZero() { |
| 40 | + args = append(args, fmt.Sprintf("--since=%s", opts.Since.Format("2006-01-02T15:04:05Z07:00"))) |
| 41 | + } |
| 42 | + if !opts.Until.IsZero() { |
| 43 | + args = append(args, fmt.Sprintf("--until=%s", opts.Until.Format("2006-01-02T15:04:05Z07:00"))) |
| 44 | + } |
| 45 | + |
| 46 | + cmd := exec.Command("git", args...) |
| 47 | + cmd.Dir = gp.repoPath |
| 48 | + |
| 49 | + output, err := cmd.Output() |
| 50 | + if err != nil { |
| 51 | + return nil, fmt.Errorf("running git log: %w", err) |
| 52 | + } |
| 53 | + |
| 54 | + // Parse output: lines alternate between COMMIT:author|email and file paths |
| 55 | + // Each commit block starts with COMMIT: line followed by file paths |
| 56 | + dirAuthors := make(map[string]map[string]*CommitAuthor) // dir -> email -> author |
| 57 | + var currentName, currentEmail string |
| 58 | + |
| 59 | + for line := range strings.SplitSeq(string(output), "\n") { |
| 60 | + line = strings.TrimSpace(line) |
| 61 | + if line == "" { |
| 62 | + continue |
| 63 | + } |
| 64 | + |
| 65 | + if after, ok := strings.CutPrefix(line, "COMMIT:"); ok { |
| 66 | + parts := strings.SplitN(after, "|", 2) |
| 67 | + if len(parts) == 2 { |
| 68 | + currentName = parts[0] |
| 69 | + currentEmail = parts[1] |
| 70 | + } |
| 71 | + continue |
| 72 | + } |
| 73 | + |
| 74 | + // This is a file path — extract directory at the configured depth |
| 75 | + if currentEmail == "" { |
| 76 | + continue |
| 77 | + } |
| 78 | + |
| 79 | + dir := truncateDir(line, depth) |
| 80 | + if dir == "" { |
| 81 | + continue |
| 82 | + } |
| 83 | + |
| 84 | + if dirAuthors[dir] == nil { |
| 85 | + dirAuthors[dir] = make(map[string]*CommitAuthor) |
| 86 | + } |
| 87 | + |
| 88 | + key := strings.ToLower(currentEmail) |
| 89 | + if author, exists := dirAuthors[dir][key]; exists { |
| 90 | + author.Commits++ |
| 91 | + } else { |
| 92 | + dirAuthors[dir][key] = &CommitAuthor{ |
| 93 | + Name: currentName, |
| 94 | + Email: currentEmail, |
| 95 | + Commits: 1, |
| 96 | + } |
| 97 | + } |
| 98 | + } |
| 99 | + |
| 100 | + // Convert to map[string][]CommitAuthor |
| 101 | + result := make(map[string][]CommitAuthor, len(dirAuthors)) |
| 102 | + for dir, authorMap := range dirAuthors { |
| 103 | + authors := make([]CommitAuthor, 0, len(authorMap)) |
| 104 | + for _, a := range authorMap { |
| 105 | + authors = append(authors, *a) |
| 106 | + } |
| 107 | + // Sort by commits descending |
| 108 | + sort.Slice(authors, func(i, j int) bool { |
| 109 | + return authors[i].Commits > authors[j].Commits |
| 110 | + }) |
| 111 | + result[dir] = authors |
| 112 | + } |
| 113 | + |
| 114 | + return result, nil |
| 115 | +} |
| 116 | + |
| 117 | +// truncateDir extracts the directory path truncated to the given depth. |
| 118 | +// For depth=2, "internal/analysis/silo.go" -> "internal/analysis" |
| 119 | +func truncateDir(filePath string, depth int) string { |
| 120 | + dir := filepath.Dir(filePath) |
| 121 | + if dir == "." { |
| 122 | + return "." |
| 123 | + } |
| 124 | + |
| 125 | + parts := strings.Split(filepath.ToSlash(dir), "/") |
| 126 | + if len(parts) > depth { |
| 127 | + parts = parts[:depth] |
| 128 | + } |
| 129 | + |
| 130 | + return strings.Join(parts, "/") |
| 131 | +} |
| 132 | + |
| 133 | +// CalculateSilos computes silo metrics for each directory. |
| 134 | +// minCommits filters out directories with fewer commits than the threshold. |
| 135 | +func CalculateSilos(dirAuthors map[string][]CommitAuthor, minCommits int) []SiloEntry { |
| 136 | + var entries []SiloEntry |
| 137 | + |
| 138 | + for dir, authors := range dirAuthors { |
| 139 | + totalCommits := 0 |
| 140 | + for _, a := range authors { |
| 141 | + totalCommits += a.Commits |
| 142 | + } |
| 143 | + |
| 144 | + if totalCommits < minCommits { |
| 145 | + continue |
| 146 | + } |
| 147 | + |
| 148 | + score := calculateSiloScore(authors, totalCommits) |
| 149 | + risk := assessSiloRisk(authors, totalCommits, score) |
| 150 | + |
| 151 | + // Keep top 3 authors max |
| 152 | + topAuthors := authors |
| 153 | + if len(topAuthors) > 3 { |
| 154 | + topAuthors = topAuthors[:3] |
| 155 | + } |
| 156 | + |
| 157 | + entries = append(entries, SiloEntry{ |
| 158 | + Directory: dir, |
| 159 | + TopAuthors: topAuthors, |
| 160 | + TotalCommits: totalCommits, |
| 161 | + Contributors: len(authors), |
| 162 | + SiloScore: score, |
| 163 | + RiskLevel: risk, |
| 164 | + }) |
| 165 | + } |
| 166 | + |
| 167 | + // Sort by silo score descending (worst silos first) |
| 168 | + sort.Slice(entries, func(i, j int) bool { |
| 169 | + return entries[i].SiloScore > entries[j].SiloScore |
| 170 | + }) |
| 171 | + |
| 172 | + return entries |
| 173 | +} |
| 174 | + |
| 175 | +// calculateSiloScore computes a 0-1 score where 1 = complete knowledge silo. |
| 176 | +// Uses inverse normalized contributor count weighted by commit concentration. |
| 177 | +func calculateSiloScore(authors []CommitAuthor, totalCommits int) float64 { |
| 178 | + if len(authors) == 0 || totalCommits == 0 { |
| 179 | + return 0 |
| 180 | + } |
| 181 | + |
| 182 | + if len(authors) == 1 { |
| 183 | + return 1.0 |
| 184 | + } |
| 185 | + |
| 186 | + // Herfindahl-style concentration |
| 187 | + hhi := 0.0 |
| 188 | + for _, a := range authors { |
| 189 | + share := float64(a.Commits) / float64(totalCommits) |
| 190 | + hhi += share * share |
| 191 | + } |
| 192 | + |
| 193 | + // Top contributor share |
| 194 | + topShare := float64(authors[0].Commits) / float64(totalCommits) |
| 195 | + |
| 196 | + // Combine: weight HHI (concentration) and top-contributor dominance |
| 197 | + // Both range 0-1; average gives balanced silo score |
| 198 | + score := (hhi + topShare) / 2.0 |
| 199 | + |
| 200 | + return score |
| 201 | +} |
| 202 | + |
| 203 | +// assessSiloRisk classifies a directory's silo risk level. |
| 204 | +func assessSiloRisk(authors []CommitAuthor, totalCommits int, siloScore float64) string { |
| 205 | + if len(authors) == 0 || totalCommits == 0 { |
| 206 | + return "low" |
| 207 | + } |
| 208 | + |
| 209 | + topShare := float64(authors[0].Commits) / float64(totalCommits) |
| 210 | + |
| 211 | + // Critical: single contributor or top contributor >80% |
| 212 | + if len(authors) <= 1 || topShare > 0.80 { |
| 213 | + return "critical" |
| 214 | + } |
| 215 | + |
| 216 | + // High: top contributor >60% or only 2 contributors |
| 217 | + if topShare > 0.60 || len(authors) <= 2 { |
| 218 | + return "high" |
| 219 | + } |
| 220 | + |
| 221 | + // Medium: silo score > 0.4 or 3 contributors |
| 222 | + if siloScore > 0.4 || len(authors) <= 3 { |
| 223 | + return "medium" |
| 224 | + } |
| 225 | + |
| 226 | + return "low" |
| 227 | +} |
0 commit comments