diff --git a/core/detect/dependency/detector.go b/core/detect/dependency/detector.go index 6787eea..73c07ae 100644 --- a/core/detect/dependency/detector.go +++ b/core/detect/dependency/detector.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "fmt" + "io/fs" "os" "path/filepath" "sort" @@ -68,14 +69,14 @@ var projectSignalKeywords = []string{ "gemini", } -var ignoredPathFragments = []string{ - "/.git/", - "/node_modules/", - "/vendor/", - "/dist/", - "/build/", - "/target/", - "/.venv/", +var ignoredDirectoryNames = map[string]struct{}{ + ".git": {}, + "node_modules": {}, + "vendor": {}, + "dist": {}, + "build": {}, + "target": {}, + ".venv": {}, } func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) { @@ -83,17 +84,13 @@ func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) return nil, err } - files, err := detect.WalkFiles(scope.Root) + files, err := collectDependencyManifests(scope.Root) if err != nil { return nil, err } findings := make([]model.Finding, 0) for _, rel := range files { - rel = filepath.ToSlash(rel) - if shouldSkipPath(rel) { - continue - } base := strings.ToLower(filepath.Base(rel)) switch { case base == "go.mod": @@ -342,10 +339,60 @@ func normalizeDependencyToken(value string) string { return normalized } -func shouldSkipPath(rel string) bool { - path := "/" + strings.ToLower(strings.TrimSpace(filepath.ToSlash(rel))) - for _, fragment := range ignoredPathFragments { - if strings.Contains(path, fragment) { +func collectDependencyManifests(root string) ([]string, error) { + files := make([]string, 0) + err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error { + rel, relErr := filepath.Rel(root, path) + if relErr != nil { + return relErr + } + rel = filepath.ToSlash(rel) + if rel == "." { + rel = "" + } + if walkErr != nil { + if shouldSkipTraversal(rel) { + return filepath.SkipDir + } + return walkErr + } + if d != nil && d.IsDir() { + if shouldSkipTraversal(rel) { + return filepath.SkipDir + } + return nil + } + if isDependencyManifest(rel) { + files = append(files, rel) + } + return nil + }) + if err != nil { + return nil, err + } + sort.Strings(files) + return files, nil +} + +func isDependencyManifest(rel string) bool { + base := strings.ToLower(filepath.Base(rel)) + switch { + case base == "go.mod", base == "package.json", base == "pyproject.toml", base == "cargo.toml": + return true + case strings.HasPrefix(base, "requirements") && strings.HasSuffix(base, ".txt"): + return true + default: + return false + } +} + +func shouldSkipTraversal(rel string) bool { + if strings.TrimSpace(rel) == "" { + return false + } + parts := strings.Split(strings.ToLower(filepath.ToSlash(rel)), "/") + for _, part := range parts { + if _, ok := ignoredDirectoryNames[part]; ok { return true } } @@ -353,11 +400,8 @@ func shouldSkipPath(rel string) bool { } func projectSignal(scope detect.Scope, root string) (string, string, string, bool) { - repoToken := normalizeDependencyToken(scope.Repo) - for _, keyword := range projectSignalKeywords { - if strings.Contains(repoToken, keyword) { - return "__project_signal__/" + repoSignalSlug(scope.Repo), "repo_name", keyword, true - } + if keyword, ok := firstProjectSignalKeyword(scope.Repo); ok { + return "__project_signal__/" + repoSignalSlug(scope.Repo), "repo_name", keyword, true } for _, rel := range []string{"README.md", "readme.md", "README"} { @@ -370,16 +414,43 @@ func projectSignal(scope detect.Scope, root string) (string, string, string, boo if err != nil { continue } - normalized := normalizeDependencyToken(string(payload)) - for _, keyword := range projectSignalKeywords { - if strings.Contains(normalized, keyword) { - return rel, "readme_text", keyword, true - } + if keyword, ok := firstProjectSignalKeyword(string(payload)); ok { + return rel, "readme_text", keyword, true } } return "", "", "", false } +func firstProjectSignalKeyword(value string) (string, bool) { + tokens := tokenizeProjectSignal(value) + if len(tokens) == 0 { + return "", false + } + tokenSet := make(map[string]struct{}, len(tokens)) + for _, token := range tokens { + tokenSet[token] = struct{}{} + } + for _, keyword := range projectSignalKeywords { + if _, ok := tokenSet[strings.ToLower(strings.TrimSpace(keyword))]; ok { + return keyword, true + } + } + return "", false +} + +func tokenizeProjectSignal(value string) []string { + lower := strings.ToLower(value) + return strings.FieldsFunc(lower, func(r rune) bool { + if r >= 'a' && r <= 'z' { + return false + } + if r >= '0' && r <= '9' { + return false + } + return true + }) +} + func repoSignalSlug(value string) string { slug := strings.ToLower(strings.TrimSpace(value)) slug = strings.ReplaceAll(slug, "/", "-") diff --git a/core/detect/dependency/detector_test.go b/core/detect/dependency/detector_test.go new file mode 100644 index 0000000..3c1cda1 --- /dev/null +++ b/core/detect/dependency/detector_test.go @@ -0,0 +1,93 @@ +package dependency + +import ( + "context" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/Clyra-AI/wrkr/core/detect" +) + +func TestDetectSkipsIgnoredUnreadableDirectory(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("permission semantics differ on windows") + } + + root := t.TempDir() + writeFile(t, root, "go.mod", "module example.com/repo\n\ngo 1.25.7\nrequire github.com/openai/openai-go v0.1.0\n") + + ignoredDir := filepath.Join(root, "node_modules") + if err := os.MkdirAll(filepath.Join(ignoredDir, "pkg"), 0o755); err != nil { + t.Fatalf("mkdir ignored dir: %v", err) + } + writeFile(t, root, "node_modules/pkg/package.json", "{") + + if err := os.Chmod(ignoredDir, 0o000); err != nil { + t.Fatalf("chmod ignored dir: %v", err) + } + t.Cleanup(func() { + _ = os.Chmod(ignoredDir, 0o755) + }) + + findings, err := New().Detect(context.Background(), detect.Scope{ + Org: "acme", + Repo: "repo", + Root: root, + }, detect.Options{}) + if err != nil { + t.Fatalf("detect returned error: %v", err) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from go.mod") + } +} + +func TestProjectSignalUsesTokenBoundaries(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "README.md", "Storage management utilities.") + + findings, err := New().Detect(context.Background(), detect.Scope{ + Org: "acme", + Repo: "storage-service", + Root: root, + }, detect.Options{}) + if err != nil { + t.Fatalf("detect returned error: %v", err) + } + if len(findings) != 0 { + t.Fatalf("expected no project signal findings, got %d", len(findings)) + } +} + +func TestProjectSignalMatchesExplicitToken(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "README.md", "This repository contains an agent runtime.") + + findings, err := New().Detect(context.Background(), detect.Scope{ + Org: "acme", + Repo: "platform-service", + Root: root, + }, detect.Options{}) + if err != nil { + t.Fatalf("detect returned error: %v", err) + } + if len(findings) != 1 { + t.Fatalf("expected one project signal finding, got %d", len(findings)) + } + if findings[0].FindingType != "ai_project_signal" { + t.Fatalf("expected ai_project_signal finding, got %s", findings[0].FindingType) + } +} + +func writeFile(t *testing.T, root, rel, content string) { + t.Helper() + path := filepath.Join(root, filepath.FromSlash(rel)) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("mkdir %s: %v", rel, err) + } + if err := os.WriteFile(path, []byte(content), 0o600); err != nil { + t.Fatalf("write %s: %v", rel, err) + } +}