Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: archival-identifier
description: A GitHub Action to identify repositories that may be candidates for archival based on inactivity, usage, repository contents, and criticality score

inputs:
ORG:
description: 'GitHub organization to analyze'
required: true

THRESHOLD_TIME:
description: 'Number of days of inactivity to consider for archival'
required: false
default: '180'

GITHUB_ORG_TOKEN:
description: 'GitHub token for org access'
required: true
default: ${{ github.token }}

runs:
using: 'composite'
steps:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
shell: bash
run: |
pip install -r ${{ github.action_path }}/requirements.txt

# - name: Run superchangelog to retrieve development activity data
# id: run-superchangelog
# uses: DSACMS/superchangelog@main
# with:
# ORG: ${{ inputs.ORG }}
# THRESHOLD_TIME: ${{ inputs.THRESHOLD_TIME }}
# GITHUB_ORG_TOKEN: ${{ inputs.GITHUB_ORG_TOKEN }}

- name: Run inactivity script
id: identify-archival-candidates
shell: bash
env:
ORG: ${{ inputs.ORG }}
THRESHOLD_TIME: ${{ inputs.THRESHOLD_TIME }}
GITHUB_ORG_TOKEN: ${{ inputs.GITHUB_ORG_TOKEN }}
run: |
python ${{ github.action_path }}/main.py data.json

- name: Create issue with archival-identifier results on the repository
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_ORG_TOKEN: ${{ inputs.GITHUB_ORG_TOKEN }}
with:
filename: ./archival_candidates_report.md
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
PyGitHub
criticality-score==1.0.7
pytest
pylint
190 changes: 190 additions & 0 deletions scripts/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import json
import sys
import subprocess
import os
from datetime import datetime, timedelta
from github import Github

def generate_markdown_table(stats_list):
"""
Generate a markdown table from issue statistics.

Args:
stats_list (list): List of statistics dictionaries from GitHubIssuesAnalyzer

Returns:
str: Markdown formatted table
"""
markdown = "# GitHub Issues Report\n\n"
markdown += "| Repository | Open Issues | Closed Issues | Open PRs | Merged PRs | Closed PRs | Releases | Commits | Dependants | Criticality Score | Forks | Active Forks | Status |\n"
markdown += "|---|---|---|---|---|---|---|---|---|---|---|---|---|\n"

for repo, stats in stats_list.items():
markdown += f"| {repo} | {stats['issues_open_count']} | {stats['issues_closed_count']} | {stats['pr_open_count']} | {stats['pr_merged_count']} | {stats['pr_closed_count']} | {stats['release_count']} | {stats['commit_count']} | {stats['dependents_count']} | {stats['criticality_score']} | {stats['forks_count']} | {stats['active_forks_count']} | {stats['status']} |\n"

return markdown

def write_markdown_file(markdown_content, output_path="archival_candidates_report.md"):
"""
Write markdown content to a file.

Args:
markdown_content (str): The markdown content to write
output_path (str): Path where the markdown file should be saved
"""
with open(output_path, 'w') as f:
f.write(markdown_content)
print(f"Markdown report written to {output_path}")

def get_criticality_score(org_name, repo_name, api_token):
"""See https://github.com/ossf/criticality_score for more details on the OpenSSF Criticality Score.

org_name (str)
repo_name (str)
api_token (str)

dependents_count (str)
criticality_score (str): This value ranges from 0 to 1 (like a float) with lower scores indicating less critical projects.

"""

os.environ['GITHUB_AUTH_TOKEN'] = api_token

cmd_str = 'criticality_score --repo github.com/' + org_name + '/' + repo_name + ' --format csv'

try:
proc = subprocess.Popen(cmd_str, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
out, err = proc.communicate()

if not err:
csv_str = out.decode("utf-8")
items = csv_str.split(',')
dependents_count = items[25]
criticality_score = items[26].rstrip()
else:
dependents_count = None
criticality_score = None
except:
dependents_count = None
criticality_score = None

return dependents_count, criticality_score

def define_status_determination(stats):
"""
Determine the status of a repository based on its statistics.

stats (dict): A dictionary containing development activity statistics for a repository

str: The determined status of the repository (e.g., "Active", "Dormant")
"""
# Placeholder logic for status determination
if all(stats.get(field, 0) == 0 for field in [
"issues_open_count",
"issues_closed_count",
"pr_open_count",
"pr_merged_count",
"pr_closed_count",
"release_count",
"commit_count",
"active_forks_count"
]):
return "Dormant"
else:
return "Active"

def analyze_fork_activity(repo):
"""
Analyze the activity of forks for a given repository.

repo_obj: A PyGithub Repository object

dict: A dictionary containing statistics about the forks (e.g., number of forks, recent activity)
"""

g = Github(os.getenv("GITHUB_AUTH_TOKEN"))
org_name = "DSACMS"

cutoff_date = datetime.utcnow() - timedelta(days=180)
cutoff_date_str = cutoff_date.strftime("%Y-%m-%dT%H:%M:%SZ")

repo_obj = g.get_repo(f"{org_name}/{repo}")
forks_count = len(list(repo_obj.get_forks()))
active_forks = []

for fork in repo_obj.get_forks():
print(f"Analyzing fork: {fork.full_name}...") # Debug statement

# First check for recent activity in fork
if fork.pushed_at < datetime.now(fork.pushed_at.tzinfo) - timedelta(days=180):
print(f"{fork.full_name} has no recent activity.")
continue

# Then check if fork has diverged / has unique commits
try:
# Compare the fork's default branch with the parent's default branch
comparison = repo_obj.compare(repo_obj.default_branch, fork.default_branch)

if comparison.ahead_by > 0:
# The fork has unique commits! It's active.
active_forks.append((fork, comparison.ahead_by))
else:
# The fork is not ahead, so it has no new commits on its default branch
print(f"{fork.full_name} has no unique commits.")
continue

except Exception as e:
# Fork has an empty default branch or encountered other issues. Classify as inactive.
if e.status == 404:
print("Error")
else:
print(f"\nCould not compare {fork.full_name}: {e}")

return forks_count, len(active_forks)


def main():
development_activity_file = sys.argv[1]

with open(development_activity_file, 'r') as f:
data = json.load(f)

stats= {}

for repo in data["repos"]:
print(f"Analyzing repository's development activity: {repo['name']}...") # Debug statement

# Calculate statistics for the repository
stats[repo["name"]] = {
"issues_open_count": len([issue for issue in repo["issues"] if issue["state"] == "open"]),
"issues_closed_count": len([issue for issue in repo["issues"] if issue["state"] == "closed"]),
# TODO: for pulls, add the author
# TODO: filter pulls by author and mentions of "code.json" in the title
"pr_open_count": len([pr for pr in repo["pulls"] if pr["state"] == "open"]),
"pr_merged_count": len([pr for pr in repo["pulls"] if pr["state"] == "closed" and pr["merged"] == True]),
"pr_closed_count": len([pr for pr in repo["pulls"] if pr["state"] == "closed" and pr["merged"] == False]),
"release_count": "N/A",
"commit_count": len(repo["commits"])
}

# Run OpenSSF Criticality Score
dependents_count, criticality_score = get_criticality_score("DSACMS", repo["name"], os.getenv("GITHUB_AUTH_TOKEN"))
stats[repo["name"]]["dependents_count"] = dependents_count # dependents_count
stats[repo["name"]]["criticality_score"] = criticality_score # criticality_score

# TODO: Analyze fork activity
forks_count, active_forks_count = analyze_fork_activity(repo["name"])
stats[repo["name"]]["forks_count"] = forks_count
stats[repo["name"]]["active_forks_count"] = active_forks_count

# Determine the status of the repository
stats[repo["name"]]["status"] = define_status_determination(stats[repo["name"]])

# print(stats)

# Generate and write markdown file
markdown_content = generate_markdown_table(stats)
write_markdown_file(markdown_content)

if __name__ == "__main__":
main()
Loading