Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 63 additions & 15 deletions phabricator_etl/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
"differential.revision.reopen",
"differential.revision.request",
"differential.revision.resign",
"differential.revision.reviewers",
"differential.revision.status",
"differential.revision.void",
"differential.revision.wrong",
Expand Down Expand Up @@ -282,15 +283,35 @@ def get_stack_size(
return len(stack)


def get_user_name(author_phid: str, sessions: Sessions) -> Optional[str]:
try:
user = sessions.users.query(UserDb.User).filter_by(phid=author_phid).one()
return user.userName
except NoResultFound:
return None
_user_name_cache: dict[str | bytes, Optional[str]] = {}
_project_name_cache: dict[str | bytes, Optional[str]] = {}


def get_user_name(author_phid: str | bytes, sessions: Sessions) -> Optional[str]:
if author_phid not in _user_name_cache:
try:
user = sessions.users.query(UserDb.User).filter_by(phid=author_phid).one()
_user_name_cache[author_phid] = user.userName
except NoResultFound:
_user_name_cache[author_phid] = None
return _user_name_cache[author_phid]


def get_project_name(project_phid: str | bytes, sessions: Sessions) -> Optional[str]:
if project_phid not in _project_name_cache:
try:
project = (
sessions.projects.query(ProjectDb.Project)
.filter_by(phid=project_phid)
.one()
)
_project_name_cache[project_phid] = project.name
except NoResultFound:
_project_name_cache[project_phid] = None
return _project_name_cache[project_phid]

def get_user_email(author_phid: str, sessions: Sessions) -> Optional[str]:

def get_user_email(author_phid: str | bytes, sessions: Sessions) -> Optional[str]:
Comment on lines +286 to +314
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

str | bytes type unions require Python 3.10+, but this repo targets Python 3.9 (see .python-version and pyproject.toml requires-python). Please replace these annotations with typing.Union[str, bytes] (and update the cache type hints similarly), or bump the project’s minimum Python version to 3.10+ if that’s intended.

Copilot uses AI. Check for mistakes.
try:
user_email = (
sessions.users.query(UserDb.UserEmail)
Expand All @@ -314,12 +335,7 @@ def get_review_requests(
):
is_reviewer_group = review.reviewerPHID.startswith(b"PHID-PROJ-")
if is_reviewer_group:
reviewer = (
sessions.projects.query(ProjectDb.Project)
.filter_by(phid=review.reviewerPHID)
.one()
)
reviewer_username = reviewer.name
reviewer_username = get_project_name(review.reviewerPHID, sessions)
reviewer_email = None
else:
reviewer_username = get_user_name(review.reviewerPHID, sessions)
Expand Down Expand Up @@ -469,10 +485,19 @@ def get_transactions(revision: DiffDb.Revision, sessions: Sessions) -> list[dict
"author_email": get_user_email(transaction.authorPHID, sessions),
"author_username": get_user_name(transaction.authorPHID, sessions),
"date_created": transaction.dateCreated,
"old_value": convert_value_to_string(transaction.oldValue),
"new_value": convert_value_to_string(transaction.newValue),
}

if transaction.transactionType == "differential.revision.reviewers":
transaction_obj["old_value"] = convert_json_to_string(
transaction.oldValue, sessions
)
transaction_obj["new_value"] = convert_json_to_string(
transaction.newValue, sessions
)
else:
transaction_obj["old_value"] = convert_value_to_string(transaction.oldValue)
transaction_obj["new_value"] = convert_value_to_string(transaction.newValue)

transactions.append(transaction_obj)

return transactions
Expand Down Expand Up @@ -576,6 +601,29 @@ def convert_value_to_string(value):
return str(value)


def convert_json_to_string(value: Any, sessions: Sessions) -> str:
"""Convert a JSON-encoded PHID map to a comma-separated string of names.

Handles the "differential.revision.reviewers" transaction type, where old/new values
are dicts mapping reviewer PHIDs to their status. Only keys are used; values are ignored.
Falls back to convert_value_to_string if the value is not a valid JSON dict.
"""
try:
phid_map = json.loads(value)
if isinstance(phid_map, dict):
names = [
get_project_name(phid.encode("utf-8"), sessions)
if phid.startswith("PHID-PROJ-")
else get_user_name(phid.encode("utf-8"), sessions)
for phid in phid_map.keys()
]
return ", ".join(name for name in names if name is not None)
Comment thread
dklawren marked this conversation as resolved.
Comment thread
dklawren marked this conversation as resolved.
else:
return convert_value_to_string(value)
except (json.JSONDecodeError, TypeError):
return convert_value_to_string(value)


def get_last_run_timestamp(bq_client: bigquery.Client) -> Optional[datetime]:
"""Get the timestamp of the most recently added entry in BigQuery.

Expand Down
Loading