@@ -7,10 +7,17 @@ import (
7
7
"path/filepath"
8
8
"regexp"
9
9
"strings"
10
+ "sync"
10
11
11
12
"github.com/IGLOU-EU/go-wildcard/v2"
12
13
)
13
14
15
+ // Global caches for performance optimization
16
+ var (
17
+ // Cache for Maven coordinate parsing results (most expensive operation)
18
+ mavenCoordCache = sync.Map {} // thread-safe map[string]string
19
+ )
20
+
14
21
// bazelParser handles parsing of Bazel build files to extract dependencies
15
22
type bazelParser struct {
16
23
fsys fs.FS
@@ -76,6 +83,11 @@ func (b *bazelParser) GetGoDeps() []Dependency {
76
83
return b .deps ["go" ]
77
84
}
78
85
86
+ // GetJSDeps returns JavaScript dependencies found in Bazel files
87
+ func (b * bazelParser ) GetJSDeps () []Dependency {
88
+ return b .deps ["js" ]
89
+ }
90
+
79
91
// GetWorkspaceDeps returns WORKSPACE dependencies found in Bazel files
80
92
func (b * bazelParser ) GetWorkspaceDeps () []Dependency {
81
93
return b .deps ["workspace" ]
@@ -138,6 +150,9 @@ var (
138
150
// Match Go rules
139
151
goRulePattern = regexp .MustCompile (`(go_library|go_binary|go_test)\s*\(` )
140
152
153
+ // Match JavaScript/Node.js rules
154
+ jsRulePattern = regexp .MustCompile (`(js_library|js_binary|js_test|nodejs_binary|nodejs_test)\s*\(` )
155
+
141
156
// Match external Maven dependencies
142
157
mavenDepPattern = regexp .MustCompile (`@maven//:(.+)` )
143
158
@@ -147,6 +162,9 @@ var (
147
162
// Match external Go dependencies
148
163
goDepPattern = regexp .MustCompile (`@([^/]+)//.*` )
149
164
165
+ // Match external npm dependencies
166
+ npmDepPattern = regexp .MustCompile (`@npm//(.+)` )
167
+
150
168
// Match bazel_dep declarations in MODULE.bazel
151
169
bazelDepPattern = regexp .MustCompile (`bazel_dep\s*\(\s*name\s*=\s*"([^"]+)"\s*,\s*version\s*=\s*"([^"]+)"` )
152
170
@@ -166,12 +184,19 @@ var (
166
184
func (b * bazelParser ) parseBuildFiles () error {
167
185
buildFiles := []string {"BUILD" , "BUILD.bazel" }
168
186
187
+ // Optimize by checking file existence first to avoid unnecessary I/O
188
+ var existingFiles []string
169
189
for _ , filename := range buildFiles {
190
+ if _ , err := b .fsys .Open (filepath .Join (b .path , filename )); err == nil {
191
+ existingFiles = append (existingFiles , filename )
192
+ } else if ! errors .Is (err , fs .ErrNotExist ) {
193
+ return err
194
+ }
195
+ }
196
+
197
+ // Parse only existing files
198
+ for _ , filename := range existingFiles {
170
199
if err := b .parseBuildFile (filename ); err != nil {
171
- // If file doesn't exist, continue to next file
172
- if errors .Is (err , fs .ErrNotExist ) {
173
- continue
174
- }
175
200
return err
176
201
}
177
202
}
@@ -214,6 +239,10 @@ func (b *bazelParser) parseBuildFile(filename string) error {
214
239
currentRule = "go"
215
240
inRule = true
216
241
ruleContent .Reset ()
242
+ case jsRulePattern .MatchString (line ):
243
+ currentRule = "js"
244
+ inRule = true
245
+ ruleContent .Reset ()
217
246
}
218
247
219
248
if inRule {
@@ -243,6 +272,10 @@ func (b *bazelParser) extractDepsFromRule(ruleContent, language string) []Depend
243
272
244
273
// Extract individual dependency strings
245
274
depStrings := depStringPattern .FindAllStringSubmatch (depsMatches [1 ], - 1 )
275
+
276
+ // Pre-allocate slice for better performance
277
+ deps = make ([]Dependency , 0 , len (depStrings ))
278
+
246
279
for _ , match := range depStrings {
247
280
if len (match ) < 2 {
248
281
continue
@@ -265,50 +298,12 @@ func (b *bazelParser) parseDependencyTarget(target, language string) Dependency
265
298
// Handle Maven dependencies
266
299
if mavenMatches := mavenDepPattern .FindStringSubmatch (target ); len (mavenMatches ) > 1 {
267
300
mavenCoord := mavenMatches [1 ]
268
- // Convert maven coordinate format (com_google_guava_guava) to standard format
269
- // The format is typically groupId_groupId_..._artifactId or just groupId_artifactId
270
- parts := strings .Split (mavenCoord , "_" )
271
- if len (parts ) >= 2 {
272
- // For coordinates like org_slf4j_slf4j_api, we need to be smarter about parsing
273
- // Common patterns:
274
- // - com_google_guava_guava -> com.google.guava:guava
275
- // - junit_junit -> junit:junit
276
- // - org_slf4j_slf4j_api -> org.slf4j:slf4j-api
277
-
278
- // Heuristic: if the last part looks like a repeated group name, treat it differently
279
- lastPart := parts [len (parts )- 1 ]
280
-
281
- // Check if this follows the pattern where artifact name is constructed from multiple parts
282
- var groupId , artifactId string
283
- if len (parts ) == 2 {
284
- // Simple case: group_artifact
285
- groupId = parts [0 ]
286
- artifactId = parts [1 ]
287
- } else if len (parts ) >= 3 {
288
- // Complex case: try to determine where group ends and artifact begins
289
- // Look for repeated patterns or common separators
290
-
291
- // Strategy 1: If last two parts are similar to first parts, it might be group_group_artifact
292
- switch {
293
- case len (parts ) == 4 && parts [0 ] == parts [1 ] && parts [1 ] == parts [2 ]:
294
- // Pattern like com_google_guava_guava
295
- groupId = strings .Join (parts [:len (parts )- 1 ], "." )
296
- artifactId = lastPart
297
- case len (parts ) == 4 && parts [1 ] == parts [2 ]:
298
- // Pattern like org_slf4j_slf4j_api
299
- groupId = strings .Join (parts [:2 ], "." )
300
- artifactId = strings .Join (parts [2 :], "-" )
301
- default :
302
- // Default: assume last part is artifact, rest is group
303
- groupId = strings .Join (parts [:len (parts )- 1 ], "." )
304
- artifactId = lastPart
305
- }
301
+ dep .Name = b .parseMavenCoordinate (mavenCoord )
302
+ if dep .Name != "" {
303
+ // Extract vendor from coordinate if possible
304
+ if colonIdx := strings .Index (dep .Name , ":" ); colonIdx > 0 {
305
+ dep .Vendor = dep .Name [:colonIdx ]
306
306
}
307
-
308
- dep .Vendor = groupId
309
- dep .Name = groupId + ":" + artifactId
310
- } else {
311
- dep .Name = mavenCoord
312
307
}
313
308
return dep
314
309
}
@@ -322,6 +317,21 @@ func (b *bazelParser) parseDependencyTarget(target, language string) Dependency
322
317
return dep
323
318
}
324
319
320
+ // Handle npm dependencies
321
+ if npmMatches := npmDepPattern .FindStringSubmatch (target ); len (npmMatches ) > 1 {
322
+ npmPackage := npmMatches [1 ]
323
+ // Convert npm package format to standard package name
324
+ // Common patterns: @npm//package_name, @npm//@scope/package_name
325
+ if strings .HasPrefix (npmPackage , "@" ) {
326
+ // Handle scoped packages like @npm//@angular/core -> @angular/core
327
+ dep .Name = npmPackage
328
+ } else {
329
+ // Handle regular packages like @npm//lodash -> lodash
330
+ dep .Name = strings .ReplaceAll (npmPackage , "_" , "-" )
331
+ }
332
+ return dep
333
+ }
334
+
325
335
// Handle Go dependencies
326
336
if language == "go" {
327
337
if goMatches := goDepPattern .FindStringSubmatch (target ); len (goMatches ) > 1 {
@@ -515,3 +525,217 @@ func (b *bazelParser) parseWorkspaceDeclaration(content, declarationType string)
515
525
516
526
return dep
517
527
}
528
+
529
+ // parseMavenCoordinate converts Bazel Maven coordinate format to standard Maven coordinate
530
+ // with sophisticated heuristics for various patterns
531
+ func (b * bazelParser ) parseMavenCoordinate (mavenCoord string ) string {
532
+ // Check cache first for performance
533
+ if cached , ok := mavenCoordCache .Load (mavenCoord ); ok {
534
+ if result , ok := cached .(string ); ok {
535
+ return result
536
+ }
537
+ }
538
+
539
+ result := b .parseMavenCoordinateUncached (mavenCoord )
540
+
541
+ // Cache the result for future use
542
+ mavenCoordCache .Store (mavenCoord , result )
543
+
544
+ return result
545
+ }
546
+
547
+ // parseMavenCoordinateUncached performs the actual parsing without caching
548
+ func (b * bazelParser ) parseMavenCoordinateUncached (mavenCoord string ) string {
549
+ // Handle empty or invalid coordinates
550
+ if mavenCoord == "" {
551
+ return ""
552
+ }
553
+
554
+ // Split by underscore - this is the standard Bazel convention
555
+ parts := strings .Split (mavenCoord , "_" )
556
+ if len (parts ) < 2 {
557
+ return mavenCoord // Return as-is if we can't parse it
558
+ }
559
+
560
+ // Enhanced pattern recognition for Maven coordinates
561
+ // Common patterns in real-world usage:
562
+ // 1. Simple: group_artifact (junit_junit)
563
+ // 2. Multi-part group: org_springframework_spring_core
564
+ // 3. Repeated components: com_google_guava_guava
565
+ // 4. Complex artifacts: org_slf4j_slf4j_api, io_grpc_grpc_netty_shaded
566
+ // 5. Deep hierarchies: org_apache_commons_commons_lang3
567
+
568
+ var groupId , artifactId string
569
+
570
+ switch len (parts ) {
571
+ case 2 :
572
+ // Simple case: group_artifact
573
+ groupId = parts [0 ]
574
+ artifactId = parts [1 ]
575
+
576
+ case 3 :
577
+ // Three parts - need to determine the split
578
+ // Common patterns:
579
+ // - org_junit_jupiter -> org.junit:jupiter
580
+ // - com_fasterxml_jackson -> com.fasterxml:jackson
581
+ groupId = strings .Join (parts [:2 ], "." )
582
+ artifactId = parts [2 ]
583
+
584
+ case 4 :
585
+ // Four parts - most complex cases
586
+ switch {
587
+ case parts [0 ] == parts [1 ] && parts [1 ] == parts [2 ]:
588
+ // Pattern: com_google_guava_guava -> com.google.guava:guava
589
+ groupId = strings .Join (parts [:3 ], "." )
590
+ artifactId = parts [3 ]
591
+ case parts [1 ] == parts [2 ]:
592
+ // Pattern: org_slf4j_slf4j_api -> org.slf4j:slf4j-api
593
+ groupId = strings .Join (parts [:2 ], "." )
594
+ artifactId = strings .Join (parts [2 :], "-" )
595
+ case b .isKnownGroupPattern (parts ):
596
+ // Use known patterns for common libraries
597
+ groupId , artifactId = b .parseKnownPattern (parts )
598
+ default :
599
+ // Default: assume first 3 parts are group, last is artifact
600
+ groupId = strings .Join (parts [:3 ], "." )
601
+ artifactId = parts [3 ]
602
+ }
603
+
604
+ case 5 :
605
+ // Five parts - very complex hierarchies
606
+ switch {
607
+ case b .isKnownGroupPattern (parts ):
608
+ groupId , artifactId = b .parseKnownPattern (parts )
609
+ case parts [2 ] == parts [3 ]:
610
+ // Pattern like: io_grpc_grpc_netty_shaded -> io.grpc:grpc-netty-shaded
611
+ groupId = strings .Join (parts [:2 ], "." )
612
+ artifactId = strings .Join (parts [2 :], "-" )
613
+ default :
614
+ // Default: assume first 4 parts are group, last is artifact
615
+ groupId = strings .Join (parts [:4 ], "." )
616
+ artifactId = parts [4 ]
617
+ }
618
+
619
+ default :
620
+ // Six or more parts - handle known patterns or default strategy
621
+ if len (parts ) >= 6 && b .isKnownGroupPattern (parts ) {
622
+ groupId , artifactId = b .parseKnownPattern (parts )
623
+ } else {
624
+ // Conservative default: assume last part is artifact, rest is group
625
+ groupId = strings .Join (parts [:len (parts )- 1 ], "." )
626
+ artifactId = parts [len (parts )- 1 ]
627
+ }
628
+ }
629
+
630
+ // Post-processing: normalize common naming conventions
631
+ artifactId = b .normalizeArtifactId (artifactId , groupId )
632
+
633
+ return groupId + ":" + artifactId
634
+ }
635
+
636
+ // isKnownGroupPattern checks if the coordinate matches known library patterns
637
+ func (b * bazelParser ) isKnownGroupPattern (parts []string ) bool {
638
+ if len (parts ) < 3 {
639
+ return false
640
+ }
641
+
642
+ // Check for well-known library patterns
643
+ coordinate := strings .Join (parts , "_" )
644
+
645
+ // Spring Framework patterns
646
+ if strings .HasPrefix (coordinate , "org_springframework_" ) {
647
+ return true
648
+ }
649
+
650
+ // Apache Commons patterns
651
+ if strings .HasPrefix (coordinate , "org_apache_commons_" ) {
652
+ return true
653
+ }
654
+
655
+ // Jackson patterns
656
+ if strings .HasPrefix (coordinate , "com_fasterxml_jackson_" ) {
657
+ return true
658
+ }
659
+
660
+ // gRPC patterns
661
+ if strings .HasPrefix (coordinate , "io_grpc_" ) {
662
+ return true
663
+ }
664
+
665
+ // Netty patterns
666
+ if strings .HasPrefix (coordinate , "io_netty_" ) {
667
+ return true
668
+ }
669
+
670
+ return false
671
+ }
672
+
673
+ // parseKnownPattern handles specific known library patterns
674
+ func (b * bazelParser ) parseKnownPattern (parts []string ) (string , string ) {
675
+ coordinate := strings .Join (parts , "_" )
676
+
677
+ // Spring Framework: org_springframework_spring_* -> org.springframework:spring-*
678
+ if strings .HasPrefix (coordinate , "org_springframework_spring_" ) {
679
+ return "org.springframework" , strings .Join (parts [2 :], "-" )
680
+ }
681
+
682
+ // Apache Commons: org_apache_commons_commons_* -> org.apache.commons:commons-*
683
+ if strings .HasPrefix (coordinate , "org_apache_commons_commons_" ) {
684
+ return "org.apache.commons" , strings .Join (parts [3 :], "-" )
685
+ }
686
+
687
+ // Jackson: com_fasterxml_jackson_* -> com.fasterxml.jackson.*:jackson-*
688
+ if strings .HasPrefix (coordinate , "com_fasterxml_jackson_" ) {
689
+ if len (parts ) >= 4 {
690
+ groupId := strings .Join (parts [:4 ], "." )
691
+ artifactId := strings .Join (parts [2 :], "-" )
692
+ return groupId , artifactId
693
+ }
694
+ }
695
+
696
+ // gRPC: io_grpc_grpc_* -> io.grpc:grpc-*
697
+ if strings .HasPrefix (coordinate , "io_grpc_grpc_" ) {
698
+ return "io.grpc" , strings .Join (parts [2 :], "-" )
699
+ }
700
+
701
+ // Netty: io_netty_netty_* -> io.netty:netty-*
702
+ if strings .HasPrefix (coordinate , "io_netty_netty_" ) {
703
+ return "io.netty" , strings .Join (parts [2 :], "-" )
704
+ }
705
+
706
+ // Default fallback
707
+ return strings .Join (parts [:len (parts )- 1 ], "." ), parts [len (parts )- 1 ]
708
+ }
709
+
710
+ // normalizeArtifactId applies common normalization rules to artifact IDs
711
+ func (b * bazelParser ) normalizeArtifactId (artifactId , groupId string ) string {
712
+ // No changes needed for most cases, but could add rules here
713
+ // For example, converting underscores to hyphens in artifact names
714
+ // when they're clearly meant to be hyphens
715
+
716
+ // Some artifacts use underscores where hyphens are more standard
717
+ // But we need to be conservative to avoid breaking valid cases
718
+
719
+ return artifactId
720
+ }
721
+
722
+ // ClearBazelCaches clears all Bazel-related caches to free memory
723
+ // This can be called periodically in long-running applications
724
+ func ClearBazelCaches () {
725
+ mavenCoordCache = sync.Map {}
726
+ }
727
+
728
+ // GetBazelCacheStats returns statistics about cache usage for monitoring
729
+ func GetBazelCacheStats () map [string ]int {
730
+ stats := make (map [string ]int )
731
+
732
+ // Count Maven coordinate cache entries
733
+ mavenCount := 0
734
+ mavenCoordCache .Range (func (_ , _ any ) bool {
735
+ mavenCount ++
736
+ return true
737
+ })
738
+ stats ["maven_coordinates" ] = mavenCount
739
+
740
+ return stats
741
+ }
0 commit comments