diff --git a/bin/add_author_id.py b/bin/add_author_id.py
index d8e166bc8b..0961b450cc 100755
--- a/bin/add_author_id.py
+++ b/bin/add_author_id.py
@@ -1,83 +1,138 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8  -*-
-#
-# Copyright 2022 Matt Post <post@cs.jhu.edu>
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# -*- coding: utf-8 -*-
+"""Add an author ID to NameSpecification entries using the acl_anthology module.
 
-"""
-Adds an ID tag to all instances of an author in all XML files where there is no ID tag.
-
-First use case was the Bill Byrne separation of July 2022.
-
-2020.gebnlp-1.4 E14-1026 E14-1028 W16-2324 2021.acl-long.55 2021.eancs-1.2 W15-0116 D19-1125 D19-1331 D19-1459 P14-3000 2022.naacl-main.136 W18-1821 W18-5420 W18-6427 2020.nlp4call-1.2 N19-1406 2021.emnlp-main.620 2021.emnlp-main.666 N18-2081 N18-3013 W17-3531 2020.wmt-1.94 D15-1273 2022.nlp4convai-1.7 P16-2049 C14-1195 P19-1022 W19-4417 W19-4424 W19-5340 W19-5421 2020.wat-1.21 E17-2058 2022.ecnlp-1.13 J14-3008 N15-1041 N15-1105 P18-2051 D17-1208 D17-1220 D17-2005 2020.acl-main.690 2020.acl-main.693 N16-1100 2022.findings-acl.223 2022.findings-acl.301
+This script adds the name ID to all papers matching the first and last name.
+It will use the module to find the list of papers to edit. Alternately, you
+provide it with the list of papers.
 
 Usage:
-
-    ./add_author_id.py bill-byrne --last-name Byrne --first-name Bill
+    ./add_author_id.py <id> "Last name[, First name]" [--paper-ids 2028.acl-main.74 ...]
 """
 
-import argparse
-import os
+from __future__ import annotations
 
-from pathlib import Path
-from anthology.utils import indent
+import argparse
+from collections import defaultdict
 from itertools import chain
+from pathlib import Path
+
+from acl_anthology.anthology import Anthology
 
+# old library since we're still editing XML files
+from anthology.utils import indent
 import lxml.etree as ET
 
 
-def main(args):
-    for xml_file in Path(args.data_dir).glob("**/*.xml"):
-        changed_one = False
+def main(args: argparse.Namespace) -> None:
+
+    last_name, first_name = (
+        args.name.split(", ") if ", " in args.name else (args.name, None)
+    )
+
+    anthology = Anthology(args.data_dir, verbose=True)
+
+    # Build a collection of the set of papers to modify within each XML file
+    collection_to_paper_map = defaultdict(list)
+
+    if args.paper_ids:
+        for paper_id in args.paper_ids:
+            paper = anthology.get_paper(paper_id)
+            if paper:
+                collection_to_paper_map[paper.collection_id].append(paper.full_id_tuple)
+
+    else:
+        people = anthology.find_people(args.name)
+        if not people:
+            print(f"No person found matching name {args.name}")
+
+        # find the person with the non-explicit ID
+        for person in people:
+            if not person.is_explicit:
+                break
+
+        if not person:
+            print(f"No person found matching name {args.name} with an explicit ID")
+            return
+
+        for paper in person.papers():
+            collection_to_paper_map[paper.collection_id].append(paper.full_id_tuple)
+
+    if collection_to_paper_map:
+        print("Will edit the following paper IDs:")
+        for paper_id_tuples in collection_to_paper_map.values():
+            for paper_id in paper_id_tuples:
+                print(f" - {paper_id}")
+
+    # Now iterate over those files and the papers within them
+    for collection_id, paper_id_tuples in collection_to_paper_map.items():
+        xml_file = Path(args.data_dir) / "xml" / f"{collection_id}.xml"
 
         tree = ET.parse(xml_file)
-        for paper_xml in chain(
-            tree.getroot().findall(".//paper"), tree.getroot().findall(".//meta")
-        ):
+
+        for paper_tuple in paper_id_tuples:
+            _, volume_id, paper_id = paper_tuple
+
+            # Get the paper
+            paper_xml = tree.getroot().find(
+                f"./volume[@id='{volume_id}']/paper[@id='{paper_id}']"
+            )
+
             for author_xml in chain(
                 paper_xml.findall("./author"), paper_xml.findall("./editor")
             ):
                 if "id" in author_xml.attrib:
                     continue
-                last_name = author_xml.find("./last").text
                 try:
-                    first_name = author_xml.find("./first").text
+                    author_first_name = author_xml.find("./first").text
                 except AttributeError:
-                    first_name = ""
-                if last_name == args.last_name and first_name == args.first_name:
+                    author_first_name = None
+                author_last_name = author_xml.find("./last").text
+
+                if author_last_name == last_name and author_first_name == first_name:
                     paper_id = (
                         paper_xml.attrib["id"] if paper_xml.text == "paper" else "0"
                     )
-                    anth_id = f"{xml_file}/{paper_id}"
-                    print(f"Adding {args.id} to {anth_id}...")
+                    paper_id = anthology.get_paper(paper_tuple).full_id
+                    print(
+                        f"Adding {args.id} to {author_first_name} {author_last_name} on paper {paper_id}..."
+                    )
                     author_xml.attrib["id"] = args.id
-                    changed_one = True
 
-        if changed_one:
-            indent(tree.getroot())
-            tree.write(xml_file, encoding="UTF-8", xml_declaration=True)
+        indent(tree.getroot())
+        tree.write(xml_file, encoding="UTF-8", xml_declaration=True)
+
+    """
+    Once we have the module published, we should be able to modify this to use
+    it to write the changed XML files, instead of the above.
+    """
+    # for paper in person.papers():
+    #     print("PAPER", paper.full_id)
+    #     authors = paper.get_editors() if paper.is_frontmatter else paper.authors
+    #     for author in authors:
+    #         if author.name in person.names:
+    #             print("-> Found", author)
+    #             author.id = args.id
+    #     # collection_paper_map[paper.collection_id].append(paper.full_id)
+
+    # # save the anthology (doesn't currently work)
+    # anthology.save_all()
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser("Add an author ID to all of an author's papers")
     parser.add_argument("id", help="Author ID to add")
-    parser.add_argument("--last-name", help="Author's last name")
-    parser.add_argument("--first-name", help="Author's first name")
-    parser.add_argument("--confirm", action="store_true", help="Confirm each instance")
+    parser.add_argument("name", help="Author's name (last[, first])")
+    parser.add_argument("--paper-ids", nargs="*", help="List of paper IDs to modify")
     parser.add_argument(
-        "--data-dir", default=os.path.join(os.path.dirname(__file__), "..", "data", "xml")
+        "--data-dir",
+        default=None,
+        help="Path to anthology data directory (default: ../data relative to repository root)",
     )
     args = parser.parse_args()
+    # Normalize data_dir to a Path string used by Anthology
+    # If the user supplies a path, trust it; otherwise compute relative to this script
+    if args.data_dir is None:
+        args.data_dir = str(Path(__file__).parent.parent / "data")
 
     main(args)
diff --git a/data/xml/2020.acl.xml b/data/xml/2020.acl.xml
index f8a2f88a6a..d3b24510db 100644
--- a/data/xml/2020.acl.xml
+++ b/data/xml/2020.acl.xml
@@ -3245,7 +3245,7 @@
     <paper id="241">
       <title>Orthogonal Relation Transforms with Graph Context Modeling for Knowledge Graph Embedding</title>
       <author><first>Yun</first><last>Tang</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
diff --git a/data/xml/2021.naacl.xml b/data/xml/2021.naacl.xml
index 0a1c84b4ff..81af1414c0 100644
--- a/data/xml/2021.naacl.xml
+++ b/data/xml/2021.naacl.xml
@@ -2814,7 +2814,7 @@
       <author><first>Kevin</first><last>Huang</last></author>
       <author><first>Tengyu</first><last>Ma</last></author>
       <author><first>Quanquan</first><last>Gu</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <pages>2609–2615</pages>
       <abstract>First-order meta-learning algorithms have been widely used in practice to learn initial model parameters that can be quickly adapted to new tasks due to their efficiency and effectiveness. However, existing studies find that meta-learner can overfit to some specific adaptation when we have heterogeneous tasks, leading to significantly degraded performance. In Natural Language Processing (NLP) applications, datasets are often diverse and each task has its unique characteristics. Therefore, to address the overfitting issue when applying first-order meta-learning to NLP applications, we propose to reduce the variance of the gradient estimator used in task adaptation. To this end, we develop a variance-reduced first-order meta-learning algorithm. The core of our algorithm is to introduce a novel variance reduction term to the gradient estimation when performing the task adaptation. Experiments on two NLP applications: few-shot text classification and multi-domain dialog state tracking demonstrate the superior performance of our proposed method.</abstract>
       <url hash="93468fde">2021.naacl-main.206</url>
@@ -3111,7 +3111,7 @@
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Rex</first><last>Ying</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>2884–2894</pages>
diff --git a/data/xml/2021.repl4nlp.xml b/data/xml/2021.repl4nlp.xml
index bff5cf5223..41ce5ea356 100644
--- a/data/xml/2021.repl4nlp.xml
+++ b/data/xml/2021.repl4nlp.xml
@@ -377,7 +377,7 @@
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Tengyu</first><last>Ma</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <pages>307–315</pages>
       <abstract>Document-level relation extraction is a challenging task, requiring reasoning over multiple sentences to predict a set of relations in a document. In this paper, we propose a novel framework E2GRE (Entity and Evidence Guided Relation Extraction) that jointly extracts relations and the underlying evidence sentences by using large pretrained language model (LM) as input encoder. First, we propose to guide the pretrained LM’s attention mechanism to focus on relevant context by using attention probabilities as additional features for evidence prediction. Furthermore, instead of feeding the whole document into pretrained LMs to obtain entity representation, we concatenate document text with head entities to help LMs concentrate on parts of the document that are more related to the head entity. Our E2GRE jointly learns relation extraction and evidence prediction effectively, showing large gains on both these tasks, which we find are highly correlated.</abstract>
       <url hash="d034db75">2021.repl4nlp-1.30</url>
diff --git a/data/xml/2021.sustainlp.xml b/data/xml/2021.sustainlp.xml
index 3887dbe39d..3d3b21443f 100644
--- a/data/xml/2021.sustainlp.xml
+++ b/data/xml/2021.sustainlp.xml
@@ -143,7 +143,7 @@
       <author><first>Xiaochen</first><last>Hou</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
       <author><first>Kathleen</first><last>McKeown</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <pages>79–85</pages>
       <abstract>Large pre-trained language models (PLMs) have led to great success on various commonsense question answering (QA) tasks in an end-to-end fashion. However, little attention has been paid to what commonsense knowledge is needed to deeply characterize these QA tasks. In this work, we proposed to categorize the semantics needed for these tasks using the SocialIQA as an example. Building upon our labeled social knowledge categories dataset on top of SocialIQA, we further train neural QA models to incorporate such social knowledge categories and relation information from a knowledge base. Unlike previous work, we observe our models with semantic categorizations of social knowledge can achieve comparable performance with a relatively simple model and smaller size compared to other complex approaches.</abstract>
       <url hash="499d3240">2021.sustainlp-1.10</url>
diff --git a/data/xml/2021.textgraphs.xml b/data/xml/2021.textgraphs.xml
index 69b552a4db..06f343542a 100644
--- a/data/xml/2021.textgraphs.xml
+++ b/data/xml/2021.textgraphs.xml
@@ -107,7 +107,7 @@
     <paper id="8">
       <title>Selective Attention Based Graph Convolutional Networks for Aspect-Level Sentiment Classification</title>
       <author><first>Xiaochen</first><last>Hou</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Peng</first><last>Qi</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
diff --git a/data/xml/2022.acl.xml b/data/xml/2022.acl.xml
index 4c2c168b49..e032fd2eed 100644
--- a/data/xml/2022.acl.xml
+++ b/data/xml/2022.acl.xml
@@ -7546,7 +7546,7 @@ in the Case of Unambiguous Gender</title>
       <author><first>Chao</first><last>Shang</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
       <author><first>Peng</first><last>Qi</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <pages>8017-8026</pages>
       <abstract>Question answering over temporal knowledge graphs (KGs) efficiently uses facts contained in a temporal KG, which records entity relations and when they occur in time, to answer natural language questions (e.g., “Who was the president of the US before Obama?”). These questions often involve three time-related challenges that previous work fail to adequately address: 1) questions often do not specify exact timestamps of interest (e.g., “Obama” instead of 2000); 2) subtle lexical differences in time relations (e.g., “before” vs “after”); 3) off-the-shelf temporal KG embeddings that previous work builds on ignore the temporal order of timestamps, which is crucial for answering temporal-order related questions. In this paper, we propose a time-sensitive question answering (TSQA) framework to tackle these problems. TSQA features a timestamp estimation module to infer the unwritten timestamp from the question. We also employ a time-sensitive KG encoder to inject ordering information into the temporal KG embeddings that TSQA is based on. With the help of techniques to reduce the search space for potential answers, TSQA significantly outperforms the previous state of the art on a new benchmark for question answering over temporal KGs, especially achieving a 32% (absolute) error reduction on complex questions that require multiple steps of reasoning over facts in the temporal KG.</abstract>
       <url hash="2642c44d">2022.acl-long.552</url>
diff --git a/data/xml/2022.emnlp.xml b/data/xml/2022.emnlp.xml
index 8009e4b1fa..cde05fd78a 100644
--- a/data/xml/2022.emnlp.xml
+++ b/data/xml/2022.emnlp.xml
@@ -4236,7 +4236,7 @@
       <author><first>Shereen</first><last>Oraby</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Alessandra</first><last>Cervone</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>4590-4605</pages>
@@ -4264,7 +4264,7 @@
       <author><first>Shereen</first><last>Oraby</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Shuyang</first><last>Gao</last><affiliation>Amazon.com, Inc.</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <author id="yang-liu"><first>Yang</first><last>Liu</last><affiliation>Amazon</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>4635-4648</pages>
diff --git a/data/xml/2023.acl.xml b/data/xml/2023.acl.xml
index 2be983d392..64a607eea2 100644
--- a/data/xml/2023.acl.xml
+++ b/data/xml/2023.acl.xml
@@ -7341,7 +7341,7 @@
       <author><first>Wenbo</first><last>Zhao</last><affiliation>Amazon</affiliation></author>
       <author><first>Yiwen</first><last>Chen</last><affiliation>University of Cambridge</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon Alexa AI</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>9235-9254</pages>
       <abstract>Automatic melody-to-lyric generation is a task in which song lyrics are generated to go with a given melody. It is of significant practical interest and more challenging than unconstrained lyric generation as the music imposes additional constraints onto the lyrics. The training data is limited as most songs are copyrighted, resulting in models that underfit the complicated cross-modal relationship between melody and lyrics. In this work, we propose a method for generating high-quality lyrics without training on any aligned melody-lyric data. Specifically, we design a hierarchical lyric generation framework that first generates a song outline and second the complete lyrics. The framework enables disentanglement of training (based purely on text) from inference (melody-guided text generation) to circumvent the shortage of parallel data. We leverage the segmentation and rhythm alignment between melody and lyrics to compile the given melody into decoding constraints as guidance during inference. The two-step hierarchical design also enables content control via the lyric outline, a much-desired feature for democratizing collaborative song creation. Experimental results show that our model can generate high-quality lyrics that are more on-topic, singable, intelligible, and coherent than strong baselines, for example SongMASS, a SOTA model trained on a parallel dataset, with a 24% relative overall quality improvement based on human ratings. Our code is available at <url>https://github.com/amazon-science/unsupervised-melody-to-lyrics-generation</url>.</abstract>
diff --git a/data/xml/2023.blackboxnlp.xml b/data/xml/2023.blackboxnlp.xml
index b60ef836e2..23a4f61420 100644
--- a/data/xml/2023.blackboxnlp.xml
+++ b/data/xml/2023.blackboxnlp.xml
@@ -272,7 +272,7 @@
     </paper>
     <paper id="24">
       <title>Rigorously Assessing Natural Language Explanations of Neurons</title>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last></author>
       <author><first>Atticus</first><last>Geiger</last></author>
       <author><first>Karel</first><last>D’Oosterlinck</last></author>
       <author><first>Zhengxuan</first><last>Wu</last></author>
diff --git a/data/xml/2023.findings.xml b/data/xml/2023.findings.xml
index 6f27f17909..7ff7f139a3 100644
--- a/data/xml/2023.findings.xml
+++ b/data/xml/2023.findings.xml
@@ -6935,7 +6935,7 @@
       <author><first>Avik</first><last>Ray</last><affiliation>Amazon</affiliation></author>
       <author><first>Shubham</first><last>Garg</last><affiliation>Amazon.com</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <pages>5137-5151</pages>
       <abstract>Existing efforts on text synthesis for code-switching mostly require training on code-switched texts in the target language pairs, limiting the deployment of the models to cases lacking code-switched data. In this work, we study the problem of synthesizing code-switched texts for language pairs absent from the training data. We introduce GLOSS, a model built on top of a pre-trained multilingual machine translation model (PMMTM) with an additional code-switching module. This module, either an adapter or extra prefixes, learns code-switching patterns from code-switched data during training, while the primary component of GLOSS, i.e., the PMMTM, is frozen. The design of only adjusting the code-switching module prevents our model from overfitting to the constrained training data for code-switching. Hence, GLOSS exhibits the ability to generalize and synthesize code-switched texts across a broader spectrum of language pairs. Additionally, we develop a self-training algorithm on target language pairs further to enhance the reliability of GLOSS. Automatic evaluations on four language pairs show that GLOSS achieves at least 55% relative BLEU and METEOR scores improvements compared to strong baselines. Human evaluations on two language pairs further validate the success of GLOSS.</abstract>
       <url hash="d138bc12">2023.findings-acl.318</url>
@@ -7811,7 +7811,7 @@
       <author><first>Peng</first><last>Qi</last><affiliation>AWS AI Labs</affiliation></author>
       <author><first>Nina</first><last>Du</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Manning</last><affiliation>Stanford University</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <pages>6175-6191</pages>
       <abstract>Pragmatic reasoning about another speaker’s unspoken intent and state of mind is crucial to efficient and effective human communication. It is virtually omnipresent in conversations between humans, e.g., when someone asks “do you have a minute?”, instead of interpreting it literally as a query about your schedule, you understand that the speaker might have requests that take time, and respond accordingly. In this paper, we present PragmatiCQA, the first large-scale open-domain question answering (QA) dataset featuring 6873 QA pairs that explores pragmatic reasoning in conversations over a diverse set of topics. We designed innovative crowdsourcing mechanisms for interest-based and task-driven data collection to address the common issue of incentive misalignment between crowdworkers and potential users. To compare computational models’ capability at pragmatic reasoning, we also propose several quantitative metrics to evaluate question answering systems on PragmatiCQA. We find that state-of-the-art systems still struggle to perform human-like pragmatic reasoning, and highlight their limitations for future research.</abstract>
       <url hash="9bda0e38">2023.findings-acl.385</url>
@@ -12865,7 +12865,7 @@
     </paper>
     <paper id="770">
       <title>Inducing Character-level Structure in Subword-based Language Models with Type-level Interchange Intervention Training</title>
-      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Zhengxuan</first><last>Wu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Kyle</first><last>Mahowald</last><affiliation>University of Texas at Austin</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
@@ -21260,7 +21260,7 @@
     </paper>
     <paper id="509">
       <title>Culturally Aware Natural Language Inference</title>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last></author>
       <author><first>Diyi</first><last>Yang</last></author>
       <pages>7591-7609</pages>
       <abstract>Humans produce and consume language in a particular cultural context, which includes knowledge about specific norms and practices. A listener’s awareness of the cultural context is critical for interpreting the speaker’s meaning. A simple expression like *I didn’t leave a tip* implies a strong sense of dissatisfaction when tipping is assumed to be the norm. As NLP systems reach users from different cultures, achieving culturally aware language understanding becomes increasingly important. However, current research has focused on building cultural knowledge bases without studying how such knowledge leads to contextualized interpretations of texts. In this work, we operationalize cultural variations in language understanding through a natural language inference (NLI) task that surfaces cultural variations as label disagreement between annotators from different cultural groups. We introduce the first Culturally Aware Natural Language Inference (CALI) dataset with 2.7K premise-hypothesis pairs annotated by two cultural groups located in the U.S. and India. With CALI, we categorize how cultural norms affect language understanding and present an evaluation framework to assess at which levels large language models are culturally aware. Our dataset is available at https://github.com/SALT-NLP/CulturallyAwareNLI.</abstract>
diff --git a/data/xml/2023.repl4nlp.xml b/data/xml/2023.repl4nlp.xml
index 8585cd82f4..c96604ac2e 100644
--- a/data/xml/2023.repl4nlp.xml
+++ b/data/xml/2023.repl4nlp.xml
@@ -139,7 +139,7 @@
       <author><first>Wenbo</first><last>Zhao</last><affiliation>Amazon</affiliation></author>
       <author><first>Arpit</first><last>Gupta</last><affiliation>Amazon</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
       <pages>118-130</pages>
       <abstract>Recent advances in prompt tuning have proven effective as a new language modeling paradigm for various natural language understanding tasks. However, it is challenging to adapt the soft prompt embeddings to different domains or generalize to low-data settings when learning soft prompts itself is unstable, task-specific, and bias-prone. This paper proposes a principled learning framework—soft prompt construction (SPC)—to facilitate learning domain-adaptable soft prompts. Derived from the SPC framework is a simple loss that can plug into various models and tuning approaches to improve their cross-domain performance. We show SPC can improve upon SOTA for contextual query rewriting, summarization, and paraphrase detection by up to 5%, 19%, and 16%, respectively.</abstract>
       <url hash="80cff754">2023.repl4nlp-1.10</url>
diff --git a/data/xml/2024.acl.xml b/data/xml/2024.acl.xml
index aab538721a..3b922bcc61 100644
--- a/data/xml/2024.acl.xml
+++ b/data/xml/2024.acl.xml
@@ -6504,7 +6504,7 @@
     </paper>
     <paper id="470">
       <title><fixed-case>RAVEL</fixed-case>: Evaluating Interpretability Methods on Disentangling Language Model Representations</title>
-      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Zhengxuan</first><last>Wu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <author><first>Mor</first><last>Geva</last><affiliation>Tel Aviv University and Google Research</affiliation></author>
diff --git a/data/xml/2024.emnlp.xml b/data/xml/2024.emnlp.xml
index 42c03c0b37..15135a2a68 100644
--- a/data/xml/2024.emnlp.xml
+++ b/data/xml/2024.emnlp.xml
@@ -8379,7 +8379,7 @@
     </paper>
     <paper id="598">
       <title>Demystifying Verbatim Memorization in Large Language Models</title>
-      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Diyi</first><last>Yang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Potts</last><affiliation>Stanford University</affiliation></author>
       <pages>10711-10732</pages>
diff --git a/data/xml/2024.findings.xml b/data/xml/2024.findings.xml
index 25f13ff510..0f92b1c6da 100644
--- a/data/xml/2024.findings.xml
+++ b/data/xml/2024.findings.xml
@@ -4431,7 +4431,7 @@
       <author><first>Jiun-Yu</first><last>Kao</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Emre</first><last>Barut</last><affiliation>Amazon</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Mohit</first><last>Bansal</last><affiliation>University of North Carolina at Chapel Hill</affiliation></author>
       <pages>2793-2807</pages>
       <abstract>Referring Expression Generation (REG) is the task of generating a description that unambiguously identifies a given target in the scene. Different from Image Captioning (IC), REG requires learning fine-grained characteristics of not only the scene objects but also their surrounding context. Referring expressions are usually not singular; an object can often be uniquely referenced in numerous ways, for instance, by color, by location, or by relationship with other objects. Most prior works, however, have not explored this ‘aspect-based multiplicity’ of referring expressions. Hence, in this work, we focus on the Aspect-Controlled REG task, which requires generating a referring expression conditioned on the input aspect(s), where an aspect captures a style of reference. By changing the input aspect such as color, location, action etc., one can generate multiple distinct expressions per target region. To solve this new task, we first modify BLIP for aligning image-regions and text-expressions. We achieve this through a novel approach for feeding the input by drawing a bounding box around the target image-region and prompting the model to generate the referring expression. Our base REG model already beats all prior works in CIDEr score. To tackle Aspect-Controlled REG, we append ‘aspect tokens’ to the prompt and show that distinct expressions can be generated by just changing the prompt. Finally, to prove the high-quality and diversity of the data generated by our proposed aspect-controlled REG model, we also perform data-augmentation-based evaluation on the downstream Referring Expression Comprehension (REC) task. With just half of the real data augmented with the generated synthetic data, we achieve performance comparable to training with 100% of real data, using a SOTA REC model.</abstract>
diff --git a/data/xml/2024.naacl.xml b/data/xml/2024.naacl.xml
index 2b06ed1614..f8888ebba2 100644
--- a/data/xml/2024.naacl.xml
+++ b/data/xml/2024.naacl.xml
@@ -8197,7 +8197,7 @@
       <author><first>Zhengxuan</first><last>Wu</last><affiliation>Stanford University</affiliation></author>
       <author><first>Atticus</first><last>Geiger</last><affiliation>Pr(Ai)²R Group</affiliation></author>
       <author><first>Aryaman</first><last>Arora</last></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
+      <author id="jing-huang-stanford"><first>Jing</first><last>Huang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Zheng</first><last>Wang</last><affiliation>Stanford University</affiliation></author>
       <author><first>Noah</first><last>Goodman</last><affiliation>Stanford University</affiliation></author>
       <author><first>Christopher</first><last>Manning</last><affiliation>Computer Science Department, Stanford University</affiliation></author>
diff --git a/data/xml/2024.nlp4pi.xml b/data/xml/2024.nlp4pi.xml
index c0fd700381..92158b2815 100644
--- a/data/xml/2024.nlp4pi.xml
+++ b/data/xml/2024.nlp4pi.xml
@@ -91,7 +91,7 @@
       <author><first>Spandana</first><last>Gella</last><affiliation>Amazon</affiliation></author>
       <author><first>Apurv</first><last>Verma</last><affiliation>Bloomberg</affiliation></author>
       <author><first>Tagyoung</first><last>Chung</last><affiliation>Amazon</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon Alexa AI</affiliation></author>
       <author><first>Nanyun</first><last>Peng</last><affiliation>University of California, Los Angeles</affiliation></author>
       <pages>78-97</pages>
       <abstract>Creating children’s stories through text generation is a creative task that requires stories to be both entertaining and suitable for young audiences. However, since current story generation systems often rely on pre-trained language models fine-tuned with limited story data, they may not always prioritize child-friendliness. This can lead to the unintended generation of stories containing problematic elements such as violence, profanity, and biases. Regrettably, despite the significance of these concerns, there is a lack of clear guidelines and benchmark datasets for ensuring content safety for children. In this paper, we introduce a taxonomy specifically tailored to assess content safety in text, with a strong emphasis on children’s well-being. We present PG-Story, a dataset that includes detailed annotations for both sentence-level and discourse-level safety. We demonstrate the potential of identifying unsafe content through self-diagnosis and employing controllable generation techniques during the decoding phase to minimize unsafe elements in generated stories.</abstract>
diff --git a/data/xml/2025.findings.xml b/data/xml/2025.findings.xml
index 46093971f6..6576224cae 100644
--- a/data/xml/2025.findings.xml
+++ b/data/xml/2025.findings.xml
@@ -19720,7 +19720,7 @@
       <author><first>Zhenwei</first><last>Dai</last><affiliation>Amazon</affiliation></author>
       <author><first>Yan</first><last>Han</last><affiliation>Amazon</affiliation></author>
       <author><first>Chen</first><last>Luo</last><affiliation>Amazon</affiliation></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Amazon</affiliation></author>
       <author><first>Zhen</first><last>Li</last><affiliation>Amazon</affiliation></author>
       <author><first>Suhang</first><last>Wang</last><affiliation>Pennsylvania State University</affiliation></author>
       <author><first>Yue</first><last>Xing</last><affiliation>Michigan State University</affiliation></author>
diff --git a/data/xml/2025.naacl.xml b/data/xml/2025.naacl.xml
index ebe1d21a8d..0699794ca5 100644
--- a/data/xml/2025.naacl.xml
+++ b/data/xml/2025.naacl.xml
@@ -6526,7 +6526,7 @@
       <author><first>Bingzheng</first><last>Gan</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Yufan</first><last>Zhao</last><affiliation>Huawei International Pte. Ltd.</affiliation></author>
       <author><first>Tianyi</first><last>Zhang</last></author>
-      <author><first>Jing</first><last>Huang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last><affiliation>Huawei Technologies Ltd.</affiliation></author>
       <author><first>Li</first><last>Yusu</last></author>
       <author><first>Shu Xian</first><last>Teo</last></author>
       <author><first>Changwang</first><last>Zhang</last><affiliation>CCF Theoretical Computer Science Technical Committee and OPPO Research Institute</affiliation></author>
diff --git a/data/xml/K19.xml b/data/xml/K19.xml
index 9c3c08dab4..ceaa076ffb 100644
--- a/data/xml/K19.xml
+++ b/data/xml/K19.xml
@@ -873,7 +873,7 @@
       <title>Relation Module for Non-Answerable Predictions on Reading Comprehension</title>
       <author><first>Kevin</first><last>Huang</last></author>
       <author><first>Yun</first><last>Tang</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
       <pages>747–756</pages>
diff --git a/data/xml/P01.xml b/data/xml/P01.xml
index 5cfcbe9f72..c4be77b3e9 100644
--- a/data/xml/P01.xml
+++ b/data/xml/P01.xml
@@ -391,7 +391,7 @@
     </paper>
     <paper id="39">
       <title>Information Extraction from Voicemail</title>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <author><first>Geoffrey</first><last>Zweig</last></author>
       <author><first>Mukund</first><last>Padmanabhan</last></author>
       <doi>10.3115/1073012.1073051</doi>
diff --git a/data/xml/P19.xml b/data/xml/P19.xml
index 88f7600045..49ac222356 100644
--- a/data/xml/P19.xml
+++ b/data/xml/P19.xml
@@ -3327,7 +3327,7 @@
       <title>Multi-hop Reading Comprehension across Multiple Documents by Reasoning over Heterogeneous Graphs</title>
       <author><first>Ming</first><last>Tu</last></author>
       <author><first>Guangtao</first><last>Wang</last></author>
-      <author><first>Jing</first><last>Huang</last></author>
+      <author id="jing-huang"><first>Jing</first><last>Huang</last></author>
       <author><first>Yun</first><last>Tang</last></author>
       <author><first>Xiaodong</first><last>He</last></author>
       <author><first>Bowen</first><last>Zhou</last></author>
diff --git a/data/yaml/name_variants.yaml b/data/yaml/name_variants.yaml
index f3fb10817d..3b160e8154 100644
--- a/data/yaml/name_variants.yaml
+++ b/data/yaml/name_variants.yaml
@@ -4064,6 +4064,13 @@
   - {first: Xuanjing, last: Huang}
 - canonical: {first: Xuedong, last: Huang}
   id: xuedong-huang
+- canonical: {first: Jing, last: Huang}
+  id: jing-huang-stanford
+  orcid: 0000-0001-9301-9410
+  comment: May refer to many people
+- canonical: {first: Jing, last: Huang}
+  id: jing-huang
+  comment: May refer to many people
 - canonical: {first: Richard A., last: Hudson}
   variants:
   - {first: Richard, last: Hudson}