Skip to content

Commit a5e2dba

Browse files
security: harden remote URL validation at config parse time
Add strict security validation for remote URLs and remote names to prevent injection attacks when multi-remote support lands. Changes: - Add ValidateRemoteURL() with control character rejection, CLI flag injection prevention, scheme allowlist, and per-scheme structural validation (host/path requirements) - Add ValidateRemoteName() aligned with existing peer-name policy (letter-start, alphanumeric + hyphen/underscore, max 64 chars) - Add MatchesRemotePattern() and ValidateRemoteURLWithPatterns() for enterprise lockdown via federation.allowed-remote-patterns config - Move config validation from simple IsRemoteURL() classifier to strict ValidateRemoteURL() security boundary - Add defense-in-depth validation in AddCLIRemote/RemoveCLIRemote - Add validation at clone entry points (cache.Ensure, bootstrap) - Tighten SCP-style URL regex to exclude control characters in path - Add comprehensive test coverage for null bytes, control chars, newline injection, CLI flag injection, scheme validation, structural validation, remote name edge cases, and pattern matching Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4e0a654 commit a5e2dba

File tree

8 files changed

+474
-8
lines changed

8 files changed

+474
-8
lines changed

cmd/bd/config.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -515,20 +515,31 @@ func validateSyncConfig(repoPath string) []string {
515515
issues = append(issues, "federation.remote: required for Dolt sync")
516516
}
517517

518-
// Validate remote URL format
518+
// Strict security validation of remote URL
519519
if federationRemote != "" {
520-
if !isValidRemoteURL(federationRemote) {
521-
issues = append(issues, fmt.Sprintf("federation.remote: %q is not a valid remote URL (expected dolthub://, gs://, s3://, az://, file://, or standard git URL)", federationRemote))
520+
if err := remotecache.ValidateRemoteURL(federationRemote); err != nil {
521+
issues = append(issues, fmt.Sprintf("federation.remote: %s", err))
522+
}
523+
}
524+
525+
// Validate against allowed-remote-patterns if configured
526+
if federationRemote != "" {
527+
patterns := v.GetStringSlice("federation.allowed-remote-patterns")
528+
if len(patterns) > 0 {
529+
if err := remotecache.ValidateRemoteURLWithPatterns(federationRemote, patterns); err != nil {
530+
issues = append(issues, fmt.Sprintf("federation.remote: %s", err))
531+
}
522532
}
523533
}
524534

525535
return issues
526536
}
527537

528538
// isValidRemoteURL validates remote URL formats for sync configuration.
529-
// Delegates to remotecache.IsRemoteURL for consistent URL classification.
530-
func isValidRemoteURL(url string) bool {
531-
return remotecache.IsRemoteURL(url)
539+
// Uses strict security validation that checks structural correctness,
540+
// rejects control characters, and validates per-scheme requirements.
541+
func isValidRemoteURL(rawURL string) bool {
542+
return remotecache.ValidateRemoteURL(rawURL) == nil
532543
}
533544

534545
// findBeadsRepoRoot walks up from the given path to find the repo root (containing .beads)

cmd/bd/config_test.go

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ federation:
391391
issues := validateSyncConfig(tmpDir)
392392
found := false
393393
for _, issue := range issues {
394-
if strings.Contains(issue, "federation.remote") && strings.Contains(issue, "not a valid remote URL") {
394+
if strings.Contains(issue, "federation.remote") && (strings.Contains(issue, "not a valid remote URL") || strings.Contains(issue, "no scheme") || strings.Contains(issue, "not allowed")) {
395395
found = true
396396
break
397397
}
@@ -420,6 +420,66 @@ federation:
420420
t.Errorf("Expected no issues for valid config, got: %v", issues)
421421
}
422422
})
423+
424+
t.Run("remote URL with null byte", func(t *testing.T) {
425+
configContent := "prefix: test\nfederation:\n remote: \"dolthub://org/repo\\x00evil\"\n"
426+
if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil {
427+
t.Fatalf("Failed to write config.yaml: %v", err)
428+
}
429+
430+
issues := validateSyncConfig(tmpDir)
431+
found := false
432+
for _, issue := range issues {
433+
if strings.Contains(issue, "federation.remote") {
434+
found = true
435+
break
436+
}
437+
}
438+
if !found {
439+
t.Errorf("Expected issue about invalid remote URL with null byte, got: %v", issues)
440+
}
441+
})
442+
443+
t.Run("allowed-remote-patterns enforcement", func(t *testing.T) {
444+
configContent := `prefix: test
445+
federation:
446+
remote: "https://github.com/user/repo"
447+
allowed-remote-patterns:
448+
- "dolthub://myorg/*"
449+
`
450+
if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil {
451+
t.Fatalf("Failed to write config.yaml: %v", err)
452+
}
453+
454+
issues := validateSyncConfig(tmpDir)
455+
found := false
456+
for _, issue := range issues {
457+
if strings.Contains(issue, "does not match") {
458+
found = true
459+
break
460+
}
461+
}
462+
if !found {
463+
t.Errorf("Expected issue about remote not matching allowed patterns, got: %v", issues)
464+
}
465+
})
466+
467+
t.Run("allowed-remote-patterns passes when matching", func(t *testing.T) {
468+
configContent := `prefix: test
469+
federation:
470+
remote: "dolthub://myorg/myrepo"
471+
allowed-remote-patterns:
472+
- "dolthub://myorg/*"
473+
`
474+
if err := os.WriteFile(filepath.Join(beadsDir, "config.yaml"), []byte(configContent), 0644); err != nil {
475+
t.Fatalf("Failed to write config.yaml: %v", err)
476+
}
477+
478+
issues := validateSyncConfig(tmpDir)
479+
if len(issues) != 0 {
480+
t.Errorf("Expected no issues when remote matches allowed pattern, got: %v", issues)
481+
}
482+
})
423483
}
424484

425485
func TestResolvedConfigRepoRoot(t *testing.T) {

internal/config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ func Initialize() error {
146146
// Federation configuration (optional Dolt remote)
147147
v.SetDefault("federation.remote", "") // e.g., dolthub://org/beads, gs://bucket/beads, s3://bucket/beads, az://account.blob.core.windows.net/container/beads
148148
v.SetDefault("federation.sovereignty", "") // T1 | T2 | T3 | T4 (empty = no restriction)
149+
v.SetDefault("federation.allowed-remote-patterns", []string{}) // glob patterns restricting allowed remote URLs (enterprise lockdown)
149150

150151
// Push configuration defaults
151152
v.SetDefault("no-push", false)

internal/remotecache/cache.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ func (c *Cache) lockPath(remoteURL string) string {
8080
// DOLT_REMOTE_USER, DOLT_REMOTE_PASSWORD, or DoltHub credentials
8181
// configured via `dolt creds`.
8282
func (c *Cache) Ensure(ctx context.Context, remoteURL string) (string, error) {
83+
if err := ValidateRemoteURL(remoteURL); err != nil {
84+
return "", fmt.Errorf("invalid remote URL: %w", err)
85+
}
8386
if _, err := exec.LookPath("dolt"); err != nil {
8487
return "", fmt.Errorf("dolt CLI not found (required for remote cache): %w", err)
8588
}

internal/remotecache/url.go

Lines changed: 186 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package remotecache
33
import (
44
"crypto/sha256"
55
"fmt"
6+
"net/url"
7+
"path"
68
"regexp"
79
"strings"
810
)
@@ -21,8 +23,28 @@ var remoteSchemes = []string{
2123
"git+https://",
2224
}
2325

26+
// allowedSchemes is the set of recognized URL schemes for validation.
27+
var allowedSchemes = map[string]bool{
28+
"dolthub": true,
29+
"gs": true,
30+
"s3": true,
31+
"az": true,
32+
"file": true,
33+
"https": true,
34+
"http": true,
35+
"ssh": true,
36+
"git+ssh": true,
37+
"git+https": true,
38+
}
39+
2440
// gitSSHPattern matches SCP-style git remote URLs (user@host:path).
25-
var gitSSHPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+@[a-zA-Z0-9][a-zA-Z0-9._-]*:.+$`)
41+
// The path portion excludes control characters (0x00-0x1f, 0x7f).
42+
var gitSSHPattern = regexp.MustCompile(`^[a-zA-Z0-9._-]+@[a-zA-Z0-9][a-zA-Z0-9._-]*:[^\x00-\x1f\x7f]+$`)
43+
44+
// validRemoteNameRegex matches valid remote names: starts with a letter,
45+
// contains only alphanumeric characters, hyphens, and underscores.
46+
// Aligned with peer-name validation in credentials.go.
47+
var validRemoteNameRegex = regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9_-]*$`)
2648

2749
// IsRemoteURL returns true if s looks like a dolt remote URL rather than
2850
// a local filesystem path. Recognized schemes: dolthub://, https://, http://,
@@ -37,6 +59,169 @@ func IsRemoteURL(s string) bool {
3759
return gitSSHPattern.MatchString(s)
3860
}
3961

62+
// ValidateRemoteURL performs strict security validation on a remote URL.
63+
// It rejects URLs containing control characters (including null bytes),
64+
// validates structural correctness per scheme, and rejects leading dashes
65+
// that could be interpreted as CLI flags.
66+
//
67+
// This is a security boundary — all remote URLs should pass through this
68+
// before reaching exec.Command arguments or SQL parameters.
69+
func ValidateRemoteURL(rawURL string) error {
70+
if rawURL == "" {
71+
return fmt.Errorf("remote URL cannot be empty")
72+
}
73+
74+
// Reject control characters (null bytes, newlines, tabs, etc.)
75+
for i, c := range rawURL {
76+
if c < 0x20 || c == 0x7f {
77+
return fmt.Errorf("remote URL contains control character at position %d (0x%02x)", i, c)
78+
}
79+
}
80+
81+
// Reject leading dash (CLI flag injection via exec.Command arguments)
82+
if strings.HasPrefix(rawURL, "-") {
83+
return fmt.Errorf("remote URL must not start with a dash")
84+
}
85+
86+
// SCP-style URLs (user@host:path) are validated separately
87+
if gitSSHPattern.MatchString(rawURL) {
88+
return validateSCPURL(rawURL)
89+
}
90+
91+
// Parse as standard URL
92+
return validateSchemeURL(rawURL)
93+
}
94+
95+
// validateSchemeURL validates a scheme-based URL (https://, dolthub://, etc.)
96+
func validateSchemeURL(rawURL string) error {
97+
// net/url doesn't understand git+ssh:// etc., so we normalize first
98+
normalizedURL := rawURL
99+
scheme := ""
100+
if idx := strings.Index(rawURL, "://"); idx > 0 {
101+
scheme = rawURL[:idx]
102+
// For net/url parsing, replace git+ssh with a parseable scheme
103+
if strings.HasPrefix(scheme, "git+") {
104+
normalizedURL = rawURL[len(scheme)+3:] // strip scheme://
105+
normalizedURL = "placeholder://" + normalizedURL
106+
}
107+
}
108+
109+
if scheme == "" {
110+
return fmt.Errorf("remote URL has no scheme (expected one of: %s)", strings.Join(sortedSchemes(), ", "))
111+
}
112+
113+
if !allowedSchemes[scheme] {
114+
return fmt.Errorf("remote URL scheme %q is not allowed (expected one of: %s)", scheme, strings.Join(sortedSchemes(), ", "))
115+
}
116+
117+
parsed, err := url.Parse(normalizedURL)
118+
if err != nil {
119+
return fmt.Errorf("remote URL is malformed: %w", err)
120+
}
121+
122+
// Scheme-specific structural validation
123+
switch scheme {
124+
case "dolthub":
125+
// dolthub://org/repo — requires org and repo
126+
p := strings.TrimPrefix(parsed.Path, "/")
127+
host := parsed.Host
128+
combined := host
129+
if p != "" {
130+
combined = host + "/" + p
131+
}
132+
parts := strings.Split(combined, "/")
133+
if len(parts) < 2 || parts[0] == "" || parts[1] == "" {
134+
return fmt.Errorf("dolthub:// URL must have org/repo format (e.g., dolthub://myorg/myrepo)")
135+
}
136+
case "https", "http", "git+https":
137+
if parsed.Host == "" {
138+
return fmt.Errorf("%s:// URL must include a hostname", scheme)
139+
}
140+
case "ssh", "git+ssh":
141+
if parsed.Host == "" {
142+
return fmt.Errorf("%s:// URL must include a hostname", scheme)
143+
}
144+
case "s3", "gs":
145+
// s3://bucket/path, gs://bucket/path — host is the bucket
146+
if parsed.Host == "" {
147+
return fmt.Errorf("%s:// URL must include a bucket name", scheme)
148+
}
149+
case "az":
150+
// az://account.blob.core.windows.net/container/path
151+
if parsed.Host == "" {
152+
return fmt.Errorf("az:// URL must include a storage account hostname")
153+
}
154+
case "file":
155+
// file:// is allowed with any path
156+
}
157+
158+
return nil
159+
}
160+
161+
// validateSCPURL validates an SCP-style URL (user@host:path)
162+
func validateSCPURL(rawURL string) error {
163+
// Already matched gitSSHPattern, so structure is valid.
164+
// Extract host and verify no control chars (already checked above).
165+
atIdx := strings.Index(rawURL, "@")
166+
colonIdx := strings.Index(rawURL[atIdx:], ":")
167+
if atIdx < 0 || colonIdx < 0 {
168+
return fmt.Errorf("SCP-style URL must be in user@host:path format")
169+
}
170+
return nil
171+
}
172+
173+
// ValidateRemoteName checks that a remote name is safe for use as a Dolt
174+
// remote identifier. Names must start with a letter and contain only
175+
// alphanumeric characters, hyphens, and underscores. Max 64 characters.
176+
func ValidateRemoteName(name string) error {
177+
if name == "" {
178+
return fmt.Errorf("remote name cannot be empty")
179+
}
180+
if len(name) > 64 {
181+
return fmt.Errorf("remote name too long (max 64 characters)")
182+
}
183+
if strings.HasPrefix(name, "-") {
184+
return fmt.Errorf("remote name must not start with a dash")
185+
}
186+
if !validRemoteNameRegex.MatchString(name) {
187+
return fmt.Errorf("remote name must start with a letter and contain only alphanumeric characters, hyphens, and underscores")
188+
}
189+
return nil
190+
}
191+
192+
// MatchesRemotePattern checks whether a URL matches a glob-style pattern.
193+
// Patterns use path.Match semantics (e.g., "dolthub://myorg/*").
194+
func MatchesRemotePattern(rawURL, pattern string) bool {
195+
matched, err := path.Match(pattern, rawURL)
196+
if err != nil {
197+
return false
198+
}
199+
return matched
200+
}
201+
202+
// ValidateRemoteURLWithPatterns validates a URL and optionally checks it
203+
// against an allowlist of glob patterns. If patterns is empty, only
204+
// structural validation is performed.
205+
func ValidateRemoteURLWithPatterns(rawURL string, patterns []string) error {
206+
if err := ValidateRemoteURL(rawURL); err != nil {
207+
return err
208+
}
209+
if len(patterns) == 0 {
210+
return nil
211+
}
212+
for _, p := range patterns {
213+
if MatchesRemotePattern(rawURL, p) {
214+
return nil
215+
}
216+
}
217+
return fmt.Errorf("remote URL %q does not match any allowed pattern", rawURL)
218+
}
219+
220+
func sortedSchemes() []string {
221+
// Return in a consistent display order
222+
return []string{"dolthub", "https", "http", "ssh", "git+ssh", "git+https", "s3", "gs", "az", "file"}
223+
}
224+
40225
// CacheKey returns a filesystem-safe identifier for a remote URL.
41226
// It uses the first 16 hex characters (64 bits) of the SHA-256 hash.
42227
// Birthday-bound collision risk is negligible for a local cache: 50% at

0 commit comments

Comments
 (0)