iclr-blogposts · Tonyhrule · Nov 23, 2024 · Nov 23, 2024 · Nov 23, 2024 · Nov 23, 2024
diff --git a/_posts/2025-04-28-factual-validation-simplification.md b/_posts/2025-04-28-factual-validation-simplification.md
diff --git a/assets/bibliography/2025-04-28-factual-validation-simplification.bib b/assets/bibliography/2025-04-28-factual-validation-simplification.bib
@@ -0,0 +1,253 @@
+@article{brown2020,
+  title={Language Models Are Few-Shot Learners},
+  author={Brown, Tom B. and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and Agarwal, Sandhini and Herbert-Voss, Ariel and Krueger, Gretchen and Henighan, Tom and Child, Rewon and Ramesh, Aditya and Ziegler, Daniel M. and Wu, Jeffrey and Winter, Clemens and Hesse, Christopher and Chen, Mark and Sigler, Eric and Litwin, Mateusz and Gray, Scott and Chess, Benjamin and Clark, Jack and Berner, Christopher and McCandlish, Sam and Radford, Alec and Sutskever, Ilya and Amodei, Dario},
+  journal={arXiv preprint arXiv:2005.14165},
+  year={2020}
+}
+
+@article{ji2022,
+  title={Survey of Hallucination in Natural Language Generation},
+  author={Ji, Ziwei and Lee, Nayeon and Frieske, Rita and Yu, Tiezheng and Su, Dan and Xu, Yan and Ishii, Etsuko and Bang, Yejin and Madotto, Andrea and Fung, Pascale},
+  journal={arXiv preprint arXiv:2202.03629},
+  year={2022}
+}
+
+@article{maynez2020,
+  title={On Faithfulness and Factuality in Abstractive Summarization},
+  author={Maynez, Joshua and Narayan, Shashi and Bohnet, Bernd and McDonald, Ryan},
+  journal={arXiv preprint arXiv:2005.00661},
+  year={2020}
+}
+
+@article{holtzman2019,
+  title={The Curious Case of Neural Text Degeneration},
+  author={Holtzman, Ari and Buys, Jan and Du, Li and Forbes, Maxwell and Choi, Yejin},
+  journal={arXiv preprint arXiv:1904.09751},
+  year={2020}
+}
+
+@article{esteva2019,
+  title={A Guide to Deep Learning in Healthcare},
+  author={Esteva, Andre and Robicquet, Alexandre and Ramsundar, Bharath and Kuleshov, Volodymyr and DePristo, Mark and Chou, Katherine and Cui, Claire and Corrado, Greg and Thrun, Sebastian and Dean, Jeff},
+  journal={Nature Medicine},
+  volume={25},
+  pages={24--29},
+  year={2019}
+}
+
+@article{ullah2024,
+  title={Challenges and Barriers of Using Large Language Models (LLM) Such as ChatGPT for Diagnostic Medicine with a Focus on Digital Pathology – A Recent Scoping Review},
+  author={Ullah, Ehsan and Parwani, Anil and Baig, Mirza Mansoor and Singh, Rajendra},
+  journal={Diagnostic Pathology},
+  volume={19},
+  year={2024}
+}
+
+@article{guu2020,
+  title={REALM: Retrieval-Augmented Language Model Pre-Training},
+  author={Guu, Kelvin and Lee, Kenton and Tung, Zora and Pasupat, Panupong and Chang, Ming-Wei},
+  journal={arXiv preprint arXiv:2002.08909},
+  year={2020}
+}
+
+@article{ester1996,
+  title={A Density-Based Algorithm for Discovering Clusters in Large Spatial Databases with Noise},
+  author={Ester, M. and Kriegel, H. and Sander, J{\"o}rg and Xu, Xiaowei},
+  year={1996}
+}
+
+@article{jin2019,
+  title={PubMedQA: A Dataset for Biomedical Research Question Answering},
+  author={Jin, Qiao and Dhingra, Bhuwan and Liu, Zhengping and Cohen, William W. and Lu, Xinghua},
+  journal={arXiv preprint arXiv:1909.06146},
+  year={2019}
+}
+
+@article{radford2019,
+  title={Language Models are Unsupervised Multitask Learners},
+  author={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
+  year={2019}
+}
+
+@article{mousavi2024,
+  title={Is Your LLM Outdated? Benchmarking LLMs \& Alignment Algorithms for Time-Sensitive Knowledge},
+  author={Mousavi, Seyed Mahed and Alghisi, Simone and Riccardi, Giuseppe},
+  journal={arXiv preprint arXiv:2404.08700},
+  year={2024}
+}
+
+@inproceedings{bender2021,
+  title={On the Dangers of Stochastic Parrots: Can Language Models Be Too Big?},
+  author={Bender, Emily and McMillan-Major, Angelina and Shmitchell, Shmargaret and Gebru, Timnit},
+  booktitle={FAccT '21: Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency},
+  pages={610--623},
+  year={2021}
+}
+
+@article{wei2022,
+  title={Chain of Thought Prompting Elicits Reasoning in Large Language Models},
+  author={Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Ichter, Brian and Xia, Fei and Chi, Ed and Le, Quoc and Zhou, Denny},
+  journal={arXiv preprint arXiv:2201.11903},
+  year={2022}
+}
+
+@journal{bishop1992,
+  title={Exact Calculation of the Hessian Matrix for the Multilayer Perceptron},
+  author={Bishop, Christopher M.},
+  journal={Neural Computation},
+  volume={4},
+  number={4},
+  pages={494--501},
+  publisher={MIT Press},
+  year={1992}
+}
+
+@article{howard2018,
+  title={Universal Language Model Fine-tuning for Text Classification},
+  author={Howard, Jeremy and Ruder, Sebastian},
+  journal={arXiv preprint arXiv:1801.06146},
+  year={2018}
+}
+
+@article{zhang2022,
+  title = {Fine-Tuning Pre-Trained Language Models Effectively by Optimizing
+  Subnetworks Adaptively},
+  author = {Zhang, Haojie and Li, Ge and Li, Jia and Zhang, Zhongjin and Zhu, Yuchuan and Jin, Zhi},
+  journal = {arXiv preprint arXiv:2211.01642},
+  year = {2022}
+}
+
+@article{bommasani2021,
+  title={On the Opportunities and Risks of Foundation Models},
+  author={Bommasani, Rishi and Hudson, Drew A. and Adeli, Ehsan and Altman, Russ and Arora, Simran and von Arx, Sydney and Bernstein, Michael S. and Bohg, Jeannette and Bosselut, Antoine and Brunskill, Emma and Brynjolfsson, Erik and Buch, Shyamal and Card, Dallas and Castellon, Rodrigo and Chatterji, Niladri and Chen, Annie and Creel, Kathleen and Davis, Jared Quincy and Demszky, Dora and Donahue, Chris and Doumbouya, Moussa and Durmus, Esin and Ermon, Stefano and Etchemendy, John and Ethayarajh, Kawin and Fei-Fei, Li and Finn, Chelsea and Gale, Trevor and Gillespie, Lauren and Goel, Karan and Goodman, Noah and Grossman, Shelby and Guha, Neel and Hashimoto, Tatsunori and Henderson, Peter and Hewitt, John and Ho, Daniel E. and Hong, Jenny and Hsu, Kyle and Huang, Jing and Icard, Thomas and Jain, Saahil and Jurafsky, Dan and Kalluri, Pratyusha and Karamcheti, Siddharth and Keeling, Geoff and Khani, Fereshte and Khattab, Omar and Koh, Pang Wei and Krass, Mark and Krishna, Ranjay and Kuditipudi, Rohith and Kumar, Ananya and Ladhak, Faisal and Lee, Mina and Lee, Tony and Leskovec, Jure and Levent, Isabelle and Li, Xiang Lisa and Li, Xuechen and Ma, Tengyu and Malik, Ali and Manning, Christopher D. and Mirchandani, Suvir and Mitchell, Eric and Munyikwa, Zanele and Nair, Suraj and Narayan, Avanika and Narayanan, Deepak and Newman, Ben and Nie, Allen and Niebles, Juan Carlos and Nilforoshan, Hamed and Nyarko, Julian and Ogut, Giray and Orr, Laurel and Papadimitriou, Isabel and Park, Joon Sung and Piech, Chris and Portelance, Eva and Potts, Christopher and Raghunathan, Aditi and Reich, Rob and Ren, Hongyu and Rong, Frieda and Roohani, Yusuf and Ruiz, Camilo and Ryan, Jack and Ré, Christopher and Sadigh, Dorsa and Sagawa, Shiori and Santhanam, Keshav and Shih, Andy and Srinivasan, Krishnan and Tamkin, Alex and Taori, Rohan and Thomas, Armin W. and Tramèr, Florian and Wang, Rose E. and Wang, William and Wu, Bohan and Wu, Jiajun and Wu, Yuhuai and Xie, Sang Michael and Yasunaga, Michihiro and You, Jiaxuan and Zaharia, Matei and Zhang, Michael and Zhang, Tianyi and Zhang, Xikun and Zhang, Yuhui and Zheng, Lucia and Zhou, Kaitlyn and Liang, Percy},
+  journal={arXiv preprint arXiv:2108.07258},
+  year={2021}
+}
+
+@inproceedings{ruder2019,
+  title={Transfer Learning in Natural Language Processing},
+  author={Ruder, Sebastian and Peters, Matthew E. and Swayamdipta, Swabha and Wolf, Thomas},
+  booktitle={Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Tutorials},
+  pages={15--18},
+  year={2019}
+}
+
+@misc{ibm2023,
+  title={What is Retrieval-Augmented Generation?},
+  author={Martineau, Kim},
+  journal={IBM Research Blog},
+  year={2021}
+}
+
+@article{lewis2020,
+  title={Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks},
+  author={Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and Karpukhin, Vladimir and Goyal, Naman and Küttler, Heinrich and Lewis, Mike and Yih, Wen-tau and Rocktäschel, Tim and Riedel, Sebastian and Kiela, Douwe},
+  journal={arXiv preprint arXiv:2005.11401},
+  year={2021}
+}
+
+@article{karpukhin2020,
+  title={Dense Passage Retrieval for Open-Domain Question Answering},
+  author={Karpukhin, Vladimir and Oğuz, Barlas and Min, Sewon and Lewis, Patrick and Wu, Ledell and Edunov, Sergey and Chen, Danqi and Yih, Wen-tau},
+  journal={arXiv preprint arXiv:2004.04906},
+  year={2020}
+}
+
+@article{laban2024,
+  title={Summary of a Haystack: A Challenge to Long-Context LLMs and RAG Systems},
+  author={Laban, Philippe and Fabbri, Alexander R and Xiong, Caiming and Wu, Chien-Sheng},
+  journal={arXiv preprint arXiv:2407.01370},
+  year={2024}
+}
+
+@article{zhong2024,
+  title={Harnessing Large Language Models as Post-hoc Correctors},
+  author={Zhong, Zhiqiang and Zhou, Kuangyu and Mottin, Davide},
+  journal={arXiv preprint arXiv:2402.13414},
+  year={2024}
+}
+
+@article{li2024,
+  title={Towards a Unified Language Model for Knowledge-Intensive Tasks Utilizing External Corpus},
+  author={Li, Xiaoxi and Dou, Zhicheng and Zhou, Yujia and Liu, Fangchao},
+  journal={arXiv preprint arXiv:2402.01176},
+  year={2024}
+}
+
+@article{johnson2017,
+  title={Billion-scale Similarity Search with GPUs},
+  author={Johnson, Jeff and Douze, Matthijs and Jégou, Hervé},
+  journal={arXiv preprint arXiv:1702.08734},
+  year={2017}
+}
+
+@article{reimers2019,
+  title={Sentence-BERT: Sentence Embeddings Using Siamese BERT-Networks},
+  author={Reimers, Nils and Gurevych, Iryna},
+  journal={arXiv preprint arXiv:1908.10084},
+  year={2019}
+}
+
+@misc{gpt4omini,
+  title={GPT-4o Mini: Advancing Cost-Efficient Intelligence},
+  author={OpenAI},
+  journal={OpenAI Research},
+  year={2024}
+}
+
+@misc{openaidocs,
+  title={OpenAI Platform},
+  author={OpenAI},
+  journal={OpenAI Documentation},
+  year={2024}
+}
+
+@misc{pinecone,
+  title={Pinecone Documentation: Getting Started},
+  author={Pinecone},
+  journal={Pinecone Documentation},
+  year={2024}
+}
+
+@article{kryscinski2019,
+  title={Evaluating the Factual Consistency of Abstractive Text Summarization},
+  author={Kryściński, Wojciech and McCann, Bryan and Xiong, Caiming and Socher, Richard},
+  journal={arXiv preprint arXiv:1910.12840},
+  year={2019}
+}
+
+@inproceedings{taunk2019,
+  title = {A Brief Review of Nearest Neighbor Algorithm for Learning and Classification},
+  author = {Taunk, K. and De, S. and Verma, S. and Swetapadma, A.},
+  booktitle={2019 International Conference on Intelligent Computing and Control Systems (ICCS)},
+  pages={1255--1260},
+  year={2019}
+}
+
+@article{cunningham2020,
+  title={k-Nearest Neighbour Classifiers: 2nd Edition (with Python Examples)},
+  author={Cunningham, Padraig and Delany, Sarah Jane},
+  journal={arXiv preprint arXiv:2004.04523},
+  year={2020}
+}
+
+@article{yao2024,
+  title={VELO: A Vector Database-Assisted Cloud-Edge Collaborative LLM QoS Optimization Framework},
+  author={Yao, Zhi and Tang, Zhiqing and Lou, Jiong and Shen, Ping and Jia, Weijia},
+  journal={arXiv preprint arXiv:2406.13399},
+  year={2024}
+}
+
+@misc{simpleqa,
+  title={Introducing SimpleQA},
+  author={OpenAI},
+  journal={OpenAI Research},
+  year={2024}
+}
+
+@article{mikolov2013,
+  title={Efficient Estimation of Word Representations in Vector Space},
+  author={Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
+  journal={arXiv preprint arXiv:1301.3781},
+  year={2013}
+}
diff --git a/assets/img/2025-04-28-factual-validation-simplification/finetuning.png b/assets/img/2025-04-28-factual-validation-simplification/finetuning.png
diff --git a/assets/img/2025-04-28-factual-validation-simplification/hallucination.png b/assets/img/2025-04-28-factual-validation-simplification/hallucination.png
diff --git a/assets/img/2025-04-28-factual-validation-simplification/methodology.png b/assets/img/2025-04-28-factual-validation-simplification/methodology.png
diff --git a/assets/img/2025-04-28-factual-validation-simplification/posthoc.png b/assets/img/2025-04-28-factual-validation-simplification/posthoc.png
diff --git a/assets/img/2025-04-28-factual-validation-simplification/rag.png b/assets/img/2025-04-28-factual-validation-simplification/rag.png