From 3d640057d47e9f45abe590134204e3bbd7caaa85 Mon Sep 17 00:00:00 2001 From: Azax4 Date: Mon, 9 Jun 2025 17:20:04 +0100 Subject: [PATCH 1/8] Added script to update author page issues --- bin/update_author-page_issues.py | 77 ++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 bin/update_author-page_issues.py diff --git a/bin/update_author-page_issues.py b/bin/update_author-page_issues.py new file mode 100644 index 0000000000..71ab42cb87 --- /dev/null +++ b/bin/update_author-page_issues.py @@ -0,0 +1,77 @@ + +"""Usage: update_author-page_issues.py + +Updates all issues containing "Author page:" in the title to follow the latest template + +Set your OS environment variable "GITHUB_TOKEN" to your personal token or hardcode it in the code. Make sure to not reveal it to others! + +""" + +import os +import requests + +# Configuration +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") #Can hardcode token here +REPO_OWNER = 'acl-org' +REPO_NAME = 'acl-anthology' + +# Base URL +BASE_URL = f'https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}' + +HEADERS = { + 'Authorization': f'token {GITHUB_TOKEN}', + 'Accept': 'application/vnd.github.v3+json' +} + +def get_issues_with_title(title): + issues_url = f'{BASE_URL}/issues' + params = {'state': 'open', 'per_page': 100} + issues = [] + + while issues_url: + response = requests.get(issues_url, headers=HEADERS, params=params) + response.raise_for_status() + data = response.json() + + for issue in data: + if title in issue.get('title', '') and 'pull_request' not in issue: + issues.append(issue) + + issues_url = response.links.get('next', {}).get('url') + + return issues + +def add_comment_to_issue(issue_number, comment): + url = f'{BASE_URL}/issues/{issue_number}/comments' + payload = {'body': comment} + response = requests.post(url, headers=HEADERS, json=payload) + response.raise_for_status() + print(f'Comment added to issue #{issue_number}') + +def edit_body_of_issue(issue_number, new_body): + url = f'https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/issues/{issue_number}' + payload = {'body': new_body} + response = requests.patch(url, headers=HEADERS, json=payload) + response.raise_for_status() + print(f'Edited body of issue (ID: {issue_number}) updated.') + +def main(): + print('🔎 Fetching issues...') + issues = get_issues_with_title("Author Page:") + + for issue in issues: + number = issue["number"] + print(f'---\nProcessing issue #{number}: {issue["title"]}') + + add_comment_to_issue(number, "Hi! We have just added a few new fields to help us manage our database of author pages better. You can see the new fields in the body of the issue. Please fill these out and let us know when done so that we can continue working on your issue. Thank you for your coperation!") + + issue_body = issue["body"] + if "### Author ORCID" not in issue_body: + issue_body_list = issue_body.split("### Type of Author Metadata Correction") + issue_body_list.insert(1, "### Author ORCID\n\n-Add ORCID here-\n\n### Institution of highest (anticipated) degree\n\n-Add insitution here-\n\n### Author Name (only if published in another script)\n\n -add author name here if needed-\n\n### Is the authors name read right to left? (only if published in another script)\n\n- [ ] Script is read right-to-left.\n\n### Type of Author Metadata Correction") + issue_body = "".join(issue_body_list) + edit_body_of_issue(number, issue_body) + + +if __name__ == '__main__': + main() From fdfca82850e5f62ffa3e483540588152142fb759 Mon Sep 17 00:00:00 2001 From: Azax4 Date: Mon, 9 Jun 2025 17:21:55 +0100 Subject: [PATCH 2/8] Updated author issue template --- .github/ISSUE_TEMPLATE/02-name-correction.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/02-name-correction.yml b/.github/ISSUE_TEMPLATE/02-name-correction.yml index e95b8fd1a5..bb82aaa6f9 100644 --- a/.github/ISSUE_TEMPLATE/02-name-correction.yml +++ b/.github/ISSUE_TEMPLATE/02-name-correction.yml @@ -25,7 +25,7 @@ body: validations: required: true - - type: textarea + - type: input id: author_orcid attributes: label: Author ORCID @@ -34,7 +34,7 @@ body: placeholder: ex. https://orcid.org/my-orcid?orcid=0009-0003-8868-7504 validations: required: true - - type: textarea + - type: input id: author_highest_degree_institution attributes: label: Institution of highest (anticipated) degree @@ -45,7 +45,7 @@ body: placeholder: ex. Johns Hopkins University (https://www.jhu.edu/) validations: required: true - - type: textarea + - type: input id: author_name_script_variant attributes: label: Author Name (only if published in another script) From a342362613508d635518c18df829cf7837a6f79d Mon Sep 17 00:00:00 2001 From: Azax4 Date: Mon, 9 Jun 2025 17:25:33 +0100 Subject: [PATCH 3/8] Minor change No need to add comment if body does not need to be updated --- bin/update_author-page_issues.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/update_author-page_issues.py b/bin/update_author-page_issues.py index 71ab42cb87..ff807c38cf 100644 --- a/bin/update_author-page_issues.py +++ b/bin/update_author-page_issues.py @@ -63,7 +63,7 @@ def main(): number = issue["number"] print(f'---\nProcessing issue #{number}: {issue["title"]}') - add_comment_to_issue(number, "Hi! We have just added a few new fields to help us manage our database of author pages better. You can see the new fields in the body of the issue. Please fill these out and let us know when done so that we can continue working on your issue. Thank you for your coperation!") + issue_body = issue["body"] if "### Author ORCID" not in issue_body: @@ -71,6 +71,8 @@ def main(): issue_body_list.insert(1, "### Author ORCID\n\n-Add ORCID here-\n\n### Institution of highest (anticipated) degree\n\n-Add insitution here-\n\n### Author Name (only if published in another script)\n\n -add author name here if needed-\n\n### Is the authors name read right to left? (only if published in another script)\n\n- [ ] Script is read right-to-left.\n\n### Type of Author Metadata Correction") issue_body = "".join(issue_body_list) edit_body_of_issue(number, issue_body) + + add_comment_to_issue(number, "Hi! We have just added a few new fields to help us manage our database of author pages better. You can see the new fields in the body of the issue. Please fill these out and let us know when done so that we can continue working on your issue. Thank you for your coperation!") if __name__ == '__main__': From 6064ae0e788729d6accb3ed3ce324735ef202b75 Mon Sep 17 00:00:00 2001 From: Matt Post Date: Tue, 10 Jun 2025 17:01:53 -0400 Subject: [PATCH 4/8] minor changes --- bin/update_author-page_issues.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/update_author-page_issues.py b/bin/update_author-page_issues.py index ff807c38cf..253aa37446 100644 --- a/bin/update_author-page_issues.py +++ b/bin/update_author-page_issues.py @@ -68,11 +68,11 @@ def main(): issue_body = issue["body"] if "### Author ORCID" not in issue_body: issue_body_list = issue_body.split("### Type of Author Metadata Correction") - issue_body_list.insert(1, "### Author ORCID\n\n-Add ORCID here-\n\n### Institution of highest (anticipated) degree\n\n-Add insitution here-\n\n### Author Name (only if published in another script)\n\n -add author name here if needed-\n\n### Is the authors name read right to left? (only if published in another script)\n\n- [ ] Script is read right-to-left.\n\n### Type of Author Metadata Correction") + issue_body_list.insert(1, "### Author ORCID\n\n-Add ORCID here-\n\n### Institution of highest (anticipated) degree\n\n-Add insitution here-\n\n### Author Name (only if published in another script)\n\n -add author name here if needed-\n\n### Is the author's name read right to left? (only if published in another script)\n\n- [ ] Script is read right-to-left.\n\n### Type of Author Metadata Correction") issue_body = "".join(issue_body_list) edit_body_of_issue(number, issue_body) - add_comment_to_issue(number, "Hi! We have just added a few new fields to help us manage our database of author pages better. You can see the new fields in the body of the issue. Please fill these out and let us know when done so that we can continue working on your issue. Thank you for your coperation!") + add_comment_to_issue(number, "Hi! We have just added a few new fields to help us manage our author database and decrease ambiguity on future imports. Please fill out the fields that have been added to the body of this issue, and leave a comment when you are finished. We will then proceed with processing this request.") if __name__ == '__main__': From 8a8676ba57fa3c46ef5067332ea68ccd81512a2c Mon Sep 17 00:00:00 2001 From: Matt Post Date: Mon, 18 Aug 2025 18:10:22 -0400 Subject: [PATCH 5/8] Update script --- ...issues.py => update_author_page_issues.py} | 63 ++++++++++++++++--- 1 file changed, 53 insertions(+), 10 deletions(-) rename bin/{update_author-page_issues.py => update_author_page_issues.py} (50%) mode change 100644 => 100755 diff --git a/bin/update_author-page_issues.py b/bin/update_author_page_issues.py old mode 100644 new mode 100755 similarity index 50% rename from bin/update_author-page_issues.py rename to bin/update_author_page_issues.py index 253aa37446..d36b7fae30 --- a/bin/update_author-page_issues.py +++ b/bin/update_author_page_issues.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 """Usage: update_author-page_issues.py @@ -8,10 +9,11 @@ """ import os +import textwrap import requests # Configuration -GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") #Can hardcode token here +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") # Can hardcode token here REPO_OWNER = 'acl-org' REPO_NAME = 'acl-anthology' @@ -20,9 +22,10 @@ HEADERS = { 'Authorization': f'token {GITHUB_TOKEN}', - 'Accept': 'application/vnd.github.v3+json' + 'Accept': 'application/vnd.github.v3+json', } + def get_issues_with_title(title): issues_url = f'{BASE_URL}/issues' params = {'state': 'open', 'per_page': 100} @@ -41,6 +44,7 @@ def get_issues_with_title(title): return issues + def add_comment_to_issue(issue_number, comment): url = f'{BASE_URL}/issues/{issue_number}/comments' payload = {'body': comment} @@ -48,6 +52,7 @@ def add_comment_to_issue(issue_number, comment): response.raise_for_status() print(f'Comment added to issue #{issue_number}') + def edit_body_of_issue(issue_number, new_body): url = f'https://api.github.com/repos/{REPO_OWNER}/{REPO_NAME}/issues/{issue_number}' payload = {'body': new_body} @@ -55,25 +60,63 @@ def edit_body_of_issue(issue_number, new_body): response.raise_for_status() print(f'Edited body of issue (ID: {issue_number}) updated.') -def main(): + +def main(issue_ids): print('🔎 Fetching issues...') - issues = get_issues_with_title("Author Page:") + issues = get_issues_with_title("Author Page:") + get_issues_with_title("Author Metadata:") + print(f"Found {len(issues)} issues.") for issue in issues: number = issue["number"] + + if issue_ids and number not in issue_ids: + # print(f"Skipping issue #{number}: {issue['title']}") + continue + print(f'---\nProcessing issue #{number}: {issue["title"]}') - - issue_body = issue["body"] if "### Author ORCID" not in issue_body: issue_body_list = issue_body.split("### Type of Author Metadata Correction") - issue_body_list.insert(1, "### Author ORCID\n\n-Add ORCID here-\n\n### Institution of highest (anticipated) degree\n\n-Add insitution here-\n\n### Author Name (only if published in another script)\n\n -add author name here if needed-\n\n### Is the author's name read right to left? (only if published in another script)\n\n- [ ] Script is read right-to-left.\n\n### Type of Author Metadata Correction") + issue_body_list.insert( + 1, + textwrap.dedent(""" + ### Author ORCID + + -Add ORCID here- + + ### Institution of highest (anticipated) degree + + -Add insitution here- + + ### Your papers (if required, see comment below) + + -Provide Anthology IDs or Anthology URLs here- + + ### Type of Author Metadata Correction + """), + ) issue_body = "".join(issue_body_list) edit_body_of_issue(number, issue_body) - - add_comment_to_issue(number, "Hi! We have just added a few new fields to help us manage our author database and decrease ambiguity on future imports. Please fill out the fields that have been added to the body of this issue, and leave a comment when you are finished. We will then proceed with processing this request.") + + add_comment_to_issue( + number, + textwrap.dedent(""" + Hello: we are attempting to close out a large backlog of author page requests. As part of these efforts, + we are collecting additional information which will help us assign papers to the correct author + in the future. Please modify the updated description above with the requested information. + + If you are requesting to split an author page (i.e., your page has some papers that are not yours), + please also provide a list of your papers, in the form of Anthology IDs or URLS + (e.g., 2023.wmt-1.13 or https://aclanthology.org/2023.wmt-1.13/). + """) + ) if __name__ == '__main__': - main() + import argparse + parser = argparse.ArgumentParser(description='Update author page issues') + parser.add_argument('issue_ids', nargs='+', type=int, help='List of issue IDs to update') + args = parser.parse_args() + + main(args.issue_ids) From 81f2a6b395a0296501816b8537347efc6b770a0f Mon Sep 17 00:00:00 2001 From: Matt Post Date: Mon, 18 Aug 2025 18:16:05 -0400 Subject: [PATCH 6/8] Small updates --- bin/update_author_page_issues.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bin/update_author_page_issues.py b/bin/update_author_page_issues.py index d36b7fae30..04bce1b224 100755 --- a/bin/update_author_page_issues.py +++ b/bin/update_author_page_issues.py @@ -63,7 +63,7 @@ def edit_body_of_issue(issue_number, new_body): def main(issue_ids): print('🔎 Fetching issues...') - issues = get_issues_with_title("Author Page:") + get_issues_with_title("Author Metadata:") + issues = get_issues_with_title("Author Metadata:") + get_issues_with_title("Author Page:") print(f"Found {len(issues)} issues.") for issue in issues: @@ -103,8 +103,9 @@ def main(issue_ids): number, textwrap.dedent(""" Hello: we are attempting to close out a large backlog of author page requests. As part of these efforts, - we are collecting additional information which will help us assign papers to the correct author - in the future. Please modify the updated description above with the requested information. + we are collecting additional information ([ORCID](https://orcid.org/) and degree institution) which will + help us assign papers to the correct author in the future. Please modify the updated description above + with the requested information. If you are requesting to split an author page (i.e., your page has some papers that are not yours), please also provide a list of your papers, in the form of Anthology IDs or URLS From 130ee5e2fa5d70403efd2b8cf6cc5c4b3975f1bc Mon Sep 17 00:00:00 2001 From: Matt Post Date: Mon, 18 Aug 2025 18:21:23 -0400 Subject: [PATCH 7/8] More fiddling with the message --- bin/update_author_page_issues.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/bin/update_author_page_issues.py b/bin/update_author_page_issues.py index 04bce1b224..37cbbc86d0 100755 --- a/bin/update_author_page_issues.py +++ b/bin/update_author_page_issues.py @@ -68,12 +68,13 @@ def main(issue_ids): for issue in issues: number = issue["number"] + title = issue["title"] if issue_ids and number not in issue_ids: # print(f"Skipping issue #{number}: {issue['title']}") continue - print(f'---\nProcessing issue #{number}: {issue["title"]}') + print(f'---\nProcessing issue #{number}: {title}') issue_body = issue["body"] if "### Author ORCID" not in issue_body: @@ -102,14 +103,9 @@ def main(issue_ids): add_comment_to_issue( number, textwrap.dedent(""" - Hello: we are attempting to close out a large backlog of author page requests. As part of these efforts, - we are collecting additional information ([ORCID](https://orcid.org/) and degree institution) which will - help us assign papers to the correct author in the future. Please modify the updated description above - with the requested information. - - If you are requesting to split an author page (i.e., your page has some papers that are not yours), - please also provide a list of your papers, in the form of Anthology IDs or URLS - (e.g., 2023.wmt-1.13 or https://aclanthology.org/2023.wmt-1.13/). + Hello: we are attempting to close out a large backlog of author page requests. As part of these efforts, we are collecting additional information ([ORCID](https://orcid.org/) and degree institution) which will help us assign papers to the correct author in the future. Please modify the updated description above with the requested information. + + If you are requesting to split an author page (i.e., your page has some papers that are not yours), please also provide a list of your papers, in the form of Anthology IDs or URLS (e.g., 2023.wmt-1.13 or https://aclanthology.org/2023.wmt-1.13/). We are unable to match papers to Google or Semantic Scholar profiles. """) ) @@ -117,7 +113,7 @@ def main(issue_ids): if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Update author page issues') - parser.add_argument('issue_ids', nargs='+', type=int, help='List of issue IDs to update') + parser.add_argument('issue_ids', nargs='*', type=int, help='List of issue IDs to update') args = parser.parse_args() main(args.issue_ids) From 870a09c0028e6fa81bfae4e047a73507bbf32a86 Mon Sep 17 00:00:00 2001 From: Matt Post Date: Mon, 18 Aug 2025 18:21:49 -0400 Subject: [PATCH 8/8] black --- bin/update_author_page_issues.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/bin/update_author_page_issues.py b/bin/update_author_page_issues.py index 37cbbc86d0..ff8cd1208b 100755 --- a/bin/update_author_page_issues.py +++ b/bin/update_author_page_issues.py @@ -63,7 +63,9 @@ def edit_body_of_issue(issue_number, new_body): def main(issue_ids): print('🔎 Fetching issues...') - issues = get_issues_with_title("Author Metadata:") + get_issues_with_title("Author Page:") + issues = get_issues_with_title("Author Metadata:") + get_issues_with_title( + "Author Page:" + ) print(f"Found {len(issues)} issues.") for issue in issues: @@ -81,7 +83,8 @@ def main(issue_ids): issue_body_list = issue_body.split("### Type of Author Metadata Correction") issue_body_list.insert( 1, - textwrap.dedent(""" + textwrap.dedent( + """ ### Author ORCID -Add ORCID here- @@ -95,25 +98,31 @@ def main(issue_ids): -Provide Anthology IDs or Anthology URLs here- ### Type of Author Metadata Correction - """), + """ + ), ) issue_body = "".join(issue_body_list) edit_body_of_issue(number, issue_body) add_comment_to_issue( number, - textwrap.dedent(""" + textwrap.dedent( + """ Hello: we are attempting to close out a large backlog of author page requests. As part of these efforts, we are collecting additional information ([ORCID](https://orcid.org/) and degree institution) which will help us assign papers to the correct author in the future. Please modify the updated description above with the requested information. If you are requesting to split an author page (i.e., your page has some papers that are not yours), please also provide a list of your papers, in the form of Anthology IDs or URLS (e.g., 2023.wmt-1.13 or https://aclanthology.org/2023.wmt-1.13/). We are unable to match papers to Google or Semantic Scholar profiles. - """) + """ + ), ) if __name__ == '__main__': import argparse + parser = argparse.ArgumentParser(description='Update author page issues') - parser.add_argument('issue_ids', nargs='*', type=int, help='List of issue IDs to update') + parser.add_argument( + 'issue_ids', nargs='*', type=int, help='List of issue IDs to update' + ) args = parser.parse_args() main(args.issue_ids)