Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,7 @@ __marimo__/

# Streamlit
.streamlit/secrets.toml

# Claude
PLAN.md
CLAUDE.md
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,62 @@ It-Depends can automatically try to match packages against the [OSV vulnerabilit
`--audit` option. This is a best-effort matching as it is based on package names, which might not always consistent.
Any discovered vulnerabilities are added to the JSON output.

### Checking Package Maintenance Status

It-Depends can check the maintenance status of GitHub-hosted packages using the `--check-maintenance` option. This feature
queries the GitHub API to determine when each package was last updated, helping identify stale or unmaintained dependencies
in your software supply chain.

```shell
# Check maintenance status with default threshold (365 days)
it-depends pip:requests --check-maintenance

# Use custom staleness threshold (180 days)
it-depends npm:lodash --check-maintenance --stale-threshold 180

# Provide GitHub token for higher rate limits
export GITHUB_TOKEN=your_token_here
it-depends pip:requests --check-maintenance

# Or pass token directly
it-depends pip:requests --check-maintenance --github-token your_token_here
```

The maintenance check adds a `maintenance` field to each package in the JSON output:

```json
{
"pip:requests": {
"2.31.0": {
"name": "requests",
"version": "2.31.0",
"source": "pip",
"maintenance": {
"repository_url": "https://github.com/psf/requests",
"last_commit_date": "2023-05-22T14:30:00Z",
"is_stale": false,
"days_since_update": 120,
"error": null
}
}
}
}
```

**GitHub API Rate Limits:**
- Unauthenticated requests: 60 requests/hour
- Authenticated requests (with token): 5,000 requests/hour
- Maintenance data is cached for 24 hours by default (configurable via `--maintenance-cache-ttl`)

**Supported Package Managers:**
The maintenance check currently works with packages that have GitHub repositories:
- **npm**: Queries repository field from package.json
- **pip**: Checks project URLs from PyPI metadata
- **cargo**: Uses repository field from Crates.io
- **go**: Extracts from import paths (e.g., github.com/user/repo)

Packages not hosted on GitHub will have an error message in the maintenance field but will not prevent the analysis from completing.

It-Depends attempts to parallelize as much of its effort as possible. To limit the maximum number of parallel tasks, use
the `--max-workers` option.

Expand Down
11 changes: 11 additions & 0 deletions src/it_depends/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,17 @@ def main() -> None: # noqa: C901, PLR0912, PLR0915
if settings.audit:
package_list = vulnerabilities(package_list)

if settings.check_maintenance:
from .maintenance import check_maintenance_status

package_list = check_maintenance_status(
package_list,
stale_threshold_days=settings.stale_threshold,
github_token=settings.github_token or os.getenv("GITHUB_TOKEN"),
cache=cache,
cache_ttl=settings.maintenance_cache_ttl,
)

if to_compare is not None:
to_compare_list = resolve(
to_compare,
Expand Down
25 changes: 25 additions & 0 deletions src/it_depends/cargo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from pathlib import Path
from typing import TYPE_CHECKING

import requests

if TYPE_CHECKING:
from collections.abc import Iterator

Expand Down Expand Up @@ -184,3 +186,26 @@ def resolve(self, dependency: Dependency) -> Iterator[Package]:
cache.set_resolved(dependency)
# TODO(@evandowning): propagate up any other info we have in this cache # noqa: TD003, FIX002
return cache.match(dependency)

@staticmethod
def get_repository_url(package: Package) -> str | None:
"""Get GitHub repository URL for Cargo package.

Args:
package: Package to get repository URL for

Returns:
Repository URL or None if not found

"""
try:
response = requests.get(
f"https://crates.io/api/v1/crates/{package.name}",
timeout=5,
)
if response.status_code == 200:
data = response.json()
return data.get("crate", {}).get("repository")
return None
except requests.RequestException:
return None
24 changes: 24 additions & 0 deletions src/it_depends/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,30 @@ class Settings(BaseSettings):
default=False,
description="""Audit packages for known vulnerabilities using Google OSV.""",
)
check_maintenance: CliImplicitFlag[bool] = Field(
alias="check-maintenance",
default=False,
description="""Check maintenance status of GitHub-hosted packages.
Queries GitHub API for last commit date and flags stale packages.""",
)
stale_threshold: int = Field(
alias="stale-threshold",
default=365,
description="""Days since last commit to consider a package stale.
Default: 365 (1 year). Requires --check-maintenance.""",
)
github_token: str | None = Field(
alias="github-token",
default=None,
description="""GitHub personal access token for API requests.
If not provided, uses GITHUB_TOKEN environment variable.
Authenticated: 5000 requests/hour. Unauthenticated: 60 requests/hour.""",
)
maintenance_cache_ttl: int = Field(
alias="maintenance-cache-ttl",
default=86400,
description="""Cache TTL for maintenance data in seconds. Default: 86400 (24 hours).""",
)
database: Path = Field(
default=DEFAULT_DB_PATH,
description="""Alternative path to load/store the database, or
Expand Down
13 changes: 13 additions & 0 deletions src/it_depends/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,19 @@ class Updated(Base):
__table_args__ = (UniqueConstraint("package", "version", "source", "resolver", name="updated_unique_constraint"),)


class GitHubMetadataCache(Base):
"""Cache for GitHub repository metadata."""

__tablename__ = "github_metadata_cache"

owner = Column(String, nullable=False, primary_key=True)
repo = Column(String, nullable=False, primary_key=True)
pushed_at = Column(String, nullable=True) # ISO 8601
fetched_at = Column(String, nullable=False) # ISO 8601

__table_args__ = (UniqueConstraint("owner", "repo", name="github_cache_unique"),)


class DBDependency(Base, Dependency):
"""Database model for dependencies."""

Expand Down
23 changes: 23 additions & 0 deletions src/it_depends/go.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,3 +494,26 @@
for package, version in module.dependencies
],
)

@staticmethod
def get_repository_url(package: Package) -> str | None:
"""Get GitHub repository URL for Go package.

For Go packages, the package name often IS the repository path.
For example: github.com/user/repo

Args:
package: Package to get repository URL for

Returns:
Repository URL or None if not a GitHub package

"""
if package.name.startswith("github.com/"):

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization High

The string
github.com/
may be at an arbitrary position in the sanitized URL.

Copilot Autofix

AI 5 months ago

The best way to fix this issue is to reliably parse the fully-qualified repository URL/host using standard parsing utilities and check that the host is exactly (or ends with) "github.com" before proceeding.
Steps:

  • Parse package.name as a URL (using urllib.parse.urlparse).
  • Check that the hostname component of the parsed URL is "github.com".
  • If so, proceed to extract the owner and repo from the path.
  • If not, return None.

You only need to edit the get_repository_url function in file src/it_depends/go.py, and add the required import (from urllib.parse import urlparse) if not present (the file currently imports request only from urllib).


Suggested changeset 1
src/it_depends/go.py

Autofix patch

Autofix patch
Run the following command in your local git repository to apply this patch
cat << 'EOF' | git apply
diff --git a/src/it_depends/go.py b/src/it_depends/go.py
--- a/src/it_depends/go.py
+++ b/src/it_depends/go.py
@@ -16,6 +16,7 @@
 from subprocess import DEVNULL, CalledProcessError, check_call, check_output
 from tempfile import TemporaryDirectory
 from urllib import request
+from urllib.parse import urlparse
 from urllib.error import HTTPError, URLError
 
 if TYPE_CHECKING:
@@ -509,11 +510,14 @@
             Repository URL or None if not a GitHub package
 
         """
-        if package.name.startswith("github.com/"):
-            # Extract owner/repo from path like github.com/owner/repo/subpath
-            parts = package.name.split("/")
-            if len(parts) >= 3:
-                owner = parts[1]
-                repo = parts[2]
+        # Parse as URL or fallback to direct path check
+        name = package.name
+        parsed = urlparse(name if name.startswith("http") else f"https://{name}")
+        if parsed.hostname == "github.com":
+            # Extract owner/repo from parsed path (format: /owner/repo[/...])
+            parts = parsed.path.strip("/").split("/")
+            if len(parts) >= 2:
+                owner = parts[0]
+                repo = parts[1]
                 return f"https://github.com/{owner}/{repo}"
         return None
EOF
@@ -16,6 +16,7 @@
from subprocess import DEVNULL, CalledProcessError, check_call, check_output
from tempfile import TemporaryDirectory
from urllib import request
from urllib.parse import urlparse
from urllib.error import HTTPError, URLError

if TYPE_CHECKING:
@@ -509,11 +510,14 @@
Repository URL or None if not a GitHub package

"""
if package.name.startswith("github.com/"):
# Extract owner/repo from path like github.com/owner/repo/subpath
parts = package.name.split("/")
if len(parts) >= 3:
owner = parts[1]
repo = parts[2]
# Parse as URL or fallback to direct path check
name = package.name
parsed = urlparse(name if name.startswith("http") else f"https://{name}")
if parsed.hostname == "github.com":
# Extract owner/repo from parsed path (format: /owner/repo[/...])
parts = parsed.path.strip("/").split("/")
if len(parts) >= 2:
owner = parts[0]
repo = parts[1]
return f"https://github.com/{owner}/{repo}"
return None
Copilot is powered by AI and may make mistakes. Always verify output.
# Extract owner/repo from path like github.com/owner/repo/subpath
parts = package.name.split("/")
if len(parts) >= 3:
owner = parts[1]
repo = parts[2]
return f"https://github.com/{owner}/{repo}"
return None
Loading
Loading