Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 98 additions & 27 deletions core/detect/dependency/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"context"
"fmt"
"io/fs"
"os"
"path/filepath"
"sort"
Expand Down Expand Up @@ -68,32 +69,28 @@ var projectSignalKeywords = []string{
"gemini",
}

var ignoredPathFragments = []string{
"/.git/",
"/node_modules/",
"/vendor/",
"/dist/",
"/build/",
"/target/",
"/.venv/",
var ignoredDirectoryNames = map[string]struct{}{
".git": {},
"node_modules": {},
"vendor": {},
"dist": {},
"build": {},
"target": {},
".venv": {},
}

func (Detector) Detect(_ context.Context, scope detect.Scope, _ detect.Options) ([]model.Finding, error) {
if err := detect.ValidateScopeRoot(scope.Root); err != nil {
return nil, err
}

files, err := detect.WalkFiles(scope.Root)
files, err := collectDependencyManifests(scope.Root)
if err != nil {
return nil, err
}

findings := make([]model.Finding, 0)
for _, rel := range files {
rel = filepath.ToSlash(rel)
if shouldSkipPath(rel) {
continue
}
base := strings.ToLower(filepath.Base(rel))
switch {
case base == "go.mod":
Expand Down Expand Up @@ -342,22 +339,69 @@ func normalizeDependencyToken(value string) string {
return normalized
}

func shouldSkipPath(rel string) bool {
path := "/" + strings.ToLower(strings.TrimSpace(filepath.ToSlash(rel)))
for _, fragment := range ignoredPathFragments {
if strings.Contains(path, fragment) {
func collectDependencyManifests(root string) ([]string, error) {
files := make([]string, 0)
err := filepath.WalkDir(root, func(path string, d fs.DirEntry, walkErr error) error {
rel, relErr := filepath.Rel(root, path)
if relErr != nil {
return relErr
}
rel = filepath.ToSlash(rel)
if rel == "." {
rel = ""
}
if walkErr != nil {
if shouldSkipTraversal(rel) {
return filepath.SkipDir
}
return walkErr
}
if d != nil && d.IsDir() {
if shouldSkipTraversal(rel) {
return filepath.SkipDir
}
return nil
}
if isDependencyManifest(rel) {
files = append(files, rel)
}
return nil
})
if err != nil {
return nil, err
}
sort.Strings(files)
return files, nil
}

func isDependencyManifest(rel string) bool {
base := strings.ToLower(filepath.Base(rel))
switch {
case base == "go.mod", base == "package.json", base == "pyproject.toml", base == "cargo.toml":
return true
case strings.HasPrefix(base, "requirements") && strings.HasSuffix(base, ".txt"):
return true
default:
return false
}
}

func shouldSkipTraversal(rel string) bool {
if strings.TrimSpace(rel) == "" {
return false
}
parts := strings.Split(strings.ToLower(filepath.ToSlash(rel)), "/")
for _, part := range parts {
if _, ok := ignoredDirectoryNames[part]; ok {
return true
}
}
return false
}

func projectSignal(scope detect.Scope, root string) (string, string, string, bool) {
repoToken := normalizeDependencyToken(scope.Repo)
for _, keyword := range projectSignalKeywords {
if strings.Contains(repoToken, keyword) {
return "__project_signal__/" + repoSignalSlug(scope.Repo), "repo_name", keyword, true
}
if keyword, ok := firstProjectSignalKeyword(scope.Repo); ok {
return "__project_signal__/" + repoSignalSlug(scope.Repo), "repo_name", keyword, true
}

for _, rel := range []string{"README.md", "readme.md", "README"} {
Expand All @@ -370,16 +414,43 @@ func projectSignal(scope detect.Scope, root string) (string, string, string, boo
if err != nil {
continue
}
normalized := normalizeDependencyToken(string(payload))
for _, keyword := range projectSignalKeywords {
if strings.Contains(normalized, keyword) {
return rel, "readme_text", keyword, true
}
if keyword, ok := firstProjectSignalKeyword(string(payload)); ok {
return rel, "readme_text", keyword, true
}
}
return "", "", "", false
}

func firstProjectSignalKeyword(value string) (string, bool) {
tokens := tokenizeProjectSignal(value)
if len(tokens) == 0 {
return "", false
}
tokenSet := make(map[string]struct{}, len(tokens))
for _, token := range tokens {
tokenSet[token] = struct{}{}
}
for _, keyword := range projectSignalKeywords {
if _, ok := tokenSet[strings.ToLower(strings.TrimSpace(keyword))]; ok {
return keyword, true
}
}
return "", false
}

func tokenizeProjectSignal(value string) []string {
lower := strings.ToLower(value)
return strings.FieldsFunc(lower, func(r rune) bool {
if r >= 'a' && r <= 'z' {
return false
}
if r >= '0' && r <= '9' {
return false
}
return true
})
}

func repoSignalSlug(value string) string {
slug := strings.ToLower(strings.TrimSpace(value))
slug = strings.ReplaceAll(slug, "/", "-")
Expand Down
93 changes: 93 additions & 0 deletions core/detect/dependency/detector_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package dependency

import (
"context"
"os"
"path/filepath"
"runtime"
"testing"

"github.com/Clyra-AI/wrkr/core/detect"
)

func TestDetectSkipsIgnoredUnreadableDirectory(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("permission semantics differ on windows")
}

root := t.TempDir()
writeFile(t, root, "go.mod", "module example.com/repo\n\ngo 1.25.7\nrequire github.com/openai/openai-go v0.1.0\n")

ignoredDir := filepath.Join(root, "node_modules")
if err := os.MkdirAll(filepath.Join(ignoredDir, "pkg"), 0o755); err != nil {
t.Fatalf("mkdir ignored dir: %v", err)
}
writeFile(t, root, "node_modules/pkg/package.json", "{")

if err := os.Chmod(ignoredDir, 0o000); err != nil {
t.Fatalf("chmod ignored dir: %v", err)
}
t.Cleanup(func() {
_ = os.Chmod(ignoredDir, 0o755)
})

findings, err := New().Detect(context.Background(), detect.Scope{
Org: "acme",
Repo: "repo",
Root: root,
}, detect.Options{})
if err != nil {
t.Fatalf("detect returned error: %v", err)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from go.mod")
}
}

func TestProjectSignalUsesTokenBoundaries(t *testing.T) {
root := t.TempDir()
writeFile(t, root, "README.md", "Storage management utilities.")

findings, err := New().Detect(context.Background(), detect.Scope{
Org: "acme",
Repo: "storage-service",
Root: root,
}, detect.Options{})
if err != nil {
t.Fatalf("detect returned error: %v", err)
}
if len(findings) != 0 {
t.Fatalf("expected no project signal findings, got %d", len(findings))
}
}

func TestProjectSignalMatchesExplicitToken(t *testing.T) {
root := t.TempDir()
writeFile(t, root, "README.md", "This repository contains an agent runtime.")

findings, err := New().Detect(context.Background(), detect.Scope{
Org: "acme",
Repo: "platform-service",
Root: root,
}, detect.Options{})
if err != nil {
t.Fatalf("detect returned error: %v", err)
}
if len(findings) != 1 {
t.Fatalf("expected one project signal finding, got %d", len(findings))
}
if findings[0].FindingType != "ai_project_signal" {
t.Fatalf("expected ai_project_signal finding, got %s", findings[0].FindingType)
}
}

func writeFile(t *testing.T, root, rel, content string) {
t.Helper()
path := filepath.Join(root, filepath.FromSlash(rel))
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("mkdir %s: %v", rel, err)
}
if err := os.WriteFile(path, []byte(content), 0o600); err != nil {
t.Fatalf("write %s: %v", rel, err)
}
}