From 7aa7fda64ca5640418427defa47aeb43c7ce7d6f Mon Sep 17 00:00:00 2001 From: Geemi Wellawatte <49410838+geemi725@users.noreply.github.com> Date: Sun, 15 Sep 2024 18:54:54 -0700 Subject: [PATCH 1/2] `DocMetadataClient` can now take instantiated providers and processors (#414) - [x] updated clients to work with instantiated providers and processors - [x] retraction test now looks at a stub csv file instead of downloading - [x] update RetrationDataPostProcessor -> Retra**c**tionDataPostProcessor --- paperqa/clients/__init__.py | 31 +++++++++++++++++++++------- paperqa/clients/retractions.py | 2 +- tests/stub_data/stub_retractions.csv | 21 +++++++++++++++++++ tests/test_clients.py | 18 ++++++++++------ 4 files changed, 58 insertions(+), 14 deletions(-) create mode 100644 tests/stub_data/stub_retractions.csv diff --git a/paperqa/clients/__init__.py b/paperqa/clients/__init__.py index 5505eaf6..9e7eb18c 100644 --- a/paperqa/clients/__init__.py +++ b/paperqa/clients/__init__.py @@ -14,7 +14,7 @@ from .client_models import MetadataPostProcessor, MetadataProvider from .crossref import CrossrefProvider from .journal_quality import JournalQualityPostProcessor -from .retractions import RetrationDataPostProcessor +from .retractions import RetractionDataPostProcessor from .semantic_scholar import SemanticScholarProvider from .unpaywall import UnpaywallProvider @@ -29,7 +29,7 @@ ALL_CLIENTS: Collection[type[MetadataPostProcessor | MetadataProvider]] = { *DEFAULT_CLIENTS, UnpaywallProvider, - RetrationDataPostProcessor, + RetractionDataPostProcessor, } @@ -89,12 +89,19 @@ def __init__( # pylint: disable=dangerous-default-value self.tasks.append( DocMetadataTask( providers=[ - c() for c in sub_clients if issubclass(c, MetadataProvider) + c if isinstance(c, MetadataProvider) else c() + for c in sub_clients + if (isinstance(c, type) and issubclass(c, MetadataProvider)) + or isinstance(c, MetadataProvider) ], processors=[ - c() + c if isinstance(c, MetadataPostProcessor) else c() for c in sub_clients - if issubclass(c, MetadataPostProcessor) + if ( + isinstance(c, type) + and issubclass(c, MetadataPostProcessor) + ) + or isinstance(c, MetadataPostProcessor) ], ) ) @@ -102,9 +109,19 @@ def __init__( # pylint: disable=dangerous-default-value if not self.tasks and all(not isinstance(c, Collection) for c in clients): self.tasks.append( DocMetadataTask( - providers=[c() for c in clients if issubclass(c, MetadataProvider)], # type: ignore[operator, arg-type] + providers=[ + c if isinstance(c, MetadataProvider) else c() # type: ignore[redundant-expr] + for c in clients + if (isinstance(c, type) and issubclass(c, MetadataProvider)) + or isinstance(c, MetadataProvider) + ], processors=[ - c() for c in clients if issubclass(c, MetadataPostProcessor) # type: ignore[operator, arg-type] + c if isinstance(c, MetadataPostProcessor) else c() # type: ignore[redundant-expr] + for c in clients + if ( + isinstance(c, type) and issubclass(c, MetadataPostProcessor) + ) + or isinstance(c, MetadataPostProcessor) ], ) ) diff --git a/paperqa/clients/retractions.py b/paperqa/clients/retractions.py index 40163581..1ee8516a 100644 --- a/paperqa/clients/retractions.py +++ b/paperqa/clients/retractions.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) -class RetrationDataPostProcessor(MetadataPostProcessor[DOIQuery]): +class RetractionDataPostProcessor(MetadataPostProcessor[DOIQuery]): def __init__(self, retraction_data_path: os.PathLike | str | None = None) -> None: if retraction_data_path is None: diff --git a/tests/stub_data/stub_retractions.csv b/tests/stub_data/stub_retractions.csv new file mode 100644 index 00000000..66f12ad5 --- /dev/null +++ b/tests/stub_data/stub_retractions.csv @@ -0,0 +1,21 @@ +Record ID,Title,Subject,Institution,Journal,Publisher,Country,Author,URLS,ArticleType,RetractionDate,RetractionDOI,RetractionPubMedID,OriginalPaperDate,OriginalPaperDOI,OriginalPaperPubMedID,RetractionNature,Reason,Paywalled,Notes +56416,The Feature Recognition of Motor Noise Based on the Improved EEMD Model,(B/T) Computer Science;(PHY) Engineering - Mechanical;,"Intelligent Manufacturing College of Wuhan Guanggu Vocational College, Wuhan 430079, Hubei, China; Wuhan Technology and Business University, Wuhan 430065, Hubei, China;",Computational Intelligence and Neuroscience,Hindawi,China,Qing Liao;Long Li;Jiaqi Luo,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9756480,37476288.0,8/18/2022 0:00,10.1155/2022/9172719,36035816.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56415,Technical Evaluation of Commercial Sperm DFI Quality Control Products in SCSA Testing,(BLS) Biochemistry;(BLS) Genetics;(HSC) Medicine - Urology/Nephrology;,"NHC Key Laboratory of Male Reproduction and Genetics, Guangdong Provincial Reproductive Science Institute (Guangdong Provincial Fertility Hospital), Guangzhou 510600, Guangdong, China;",Journal of Healthcare Engineering,Hindawi,China,Tao Pang;Xinzong Zhang,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9812053,37476804.0,3/4/2022 0:00,10.1155/2022/9552123,35281543.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56414,Study on Syndrome Intervention of Nonalcoholic Fatty Liver Disease from the Perspective of Deficiency and Excess,(HSC) Medicine - Gastroenterology;(HSC) Public Health and Safety;,"Zhang Zhongjing School of TCM, Nanyang Institute of Technology, Nanyang 473004, Henan, China;",Journal of Healthcare Engineering,Hindawi,China,Sanhang Wang;Hu Jiu lüe;Jiao Zhao,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9863546,37476791.0,9/16/2021 0:00,10.1155/2021/9928160,34567490.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Informed/Patient Consent - None/Withdrawn;+Investigation by Journal/Publisher;+Investigation by Third Party;+Lack of IRB/IACUC Approval;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56413,Study of Deep Learning-Based Legal Judgment Prediction in Internet of Things Era,(B/T) Computer Science;(SOC) Law/Legal Issues;,"Hubei University of Science and Technology, Xianning, Hubei, China; Nanjing University of Information Science and Technology, Nanjing, China;",Computational Intelligence and Neuroscience,Hindawi,China,Min Zheng;Bo Liu;Le Sun,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9856834,37476266.0,8/8/2022 0:00,10.1155/2022/8490760,35978889.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56412,Research on Modular Management of Railway Bridge Technology Innovation in Complex and Difficult Mountainous Areas,(B/T) Computer Science;(B/T) Transportation;,"School of Civil Engineering, Central South University, Changsha 410083, Hunan, China; Guangzhou Municipal Engineering Design & Research Institute Co.,Ltd., Guangzhou 510000, China; China Academy of Railway Science Corporation Limited, Beijing 100081, China;",Computational Intelligence and Neuroscience,Hindawi,China,Huihua Chen;Qintao Cheng;Yingxue Xie;Chaoxun Cai;Xinlin Ban;Xiaodong Hu,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9789261,37476212.0,8/11/2022 0:00,10.1155/2022/8799586,35990112.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56411,Research and Implementation of Intelligent Evaluation System of Teaching Quality in Universities Based on Artificial Intelligence Neural Network Model,(B/T) Computer Science;(SOC) Education;,"Education Department, Shaanxi Normal University, Xi’an 710062, Shaanxi, China;",Mathematical Problems in Engineering,Hindawi,China,Bo Gao,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9860810,0.0,3/16/2022 0:00,10.1155/2022/8224184,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56410,Relationship between Long Noncoding RNA H19 Polymorphisms and Risk of Coronary Artery Disease in a Chinese Population: A Case-Control Study,(BLS) Genetics;(HSC) Biostatistics/Epidemiology;(HSC) Medicine - Cardiovascular;,"Department of Cardiology, The Fourth Affiliated Hospital of China Medical University, Shenyang, Liaoning 110034, China; Tumor Etiology and Screening Department of Cancer Institute and General Surgery, The First Affiliated Hospital of China Medical University, Key Laboratory of Cancer Etiology and Prevention (China Medical University), Liaoning Provincial Education Department, Shenyang, Liaoning 110001, China; China Medical University, China;",Disease Markers,Hindawi,China,Weina Hu;Hanxi Ding;Qian Xu;Xueying Zhang;Datong Yang;Yuanzhe Jin,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9761794,37476633.0,5/11/2020 0:00,10.1155/2020/9839612,32454910.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56409,Preliminary Evaluation of Artificial Intelligence-Based Anti-Hepatocellular Carcinoma Molecular Target Study in Hepatocellular Carcinoma Diagnosis Research,(B/T) Computer Science;(BLS) Biology - Molecular;(HSC) Medicine - Gastroenterology;(HSC) Medicine - Oncology;,"Infectious Disease Department, Hospital of Chengdu University of Traditional Chinese Medicine, Chengdu, Sichuan 610072, China; Qijiang Hospital of The First Affiliated Hospital of Chongqing Medical University, Chongqing 401420, China; Wenlong Hospital of Qijiang, Chongqing 401420, China; Taiyuan Hospital of Traditional Chinese Medicine, Taiyuan, Shanxi 030000, China;",BioMed Research International,Hindawi,China,Yuan Wang;Chao Wei;Xiangui Deng;Shudi Gao;Jing Chen,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,7/12/2023 0:00,10.1155/2023/9850584,37475788.0,9/19/2022 0:00,10.1155/2022/8365565,36193305.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Informed/Patient Consent - None/Withdrawn;+Investigation by Journal/Publisher;+Investigation by Third Party;+Lack of IRB/IACUC Approval;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No, +56406,The Identification and Dissemination of Creative Elements of New Media Original Film and Television Works Based on Review Text Mining and Machine Learning,(B/T) Computer Science;(B/T) Data Science;(B/T) Technology;(HUM) Arts - Film Studies;,"Deputy Director of Graduate Office, Shandong College of Arts, Jinan, Shandong 250300, China;",Mathematical Problems in Engineering,Hindawi,China,Xiaonan Yu,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,10/4/2023 0:00,10.1155/2023/9838340,0.0,7/16/2022 0:00,10.1155/2022/5856069,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/83D596FB181C1E3A7426780D29CB70 +56397,Teaching Innovation and Development of Ideological and Political Courses in Colleges and Universities: Based on the Background of Wireless Communication and Artificial Intelligence Decision Making,(B/T) Computer Science;(B/T) Technology;(SOC) Education;(SOC) Philosophy;(SOC) Political Science;,"School of Marxism, Huaiyin Institute of Technology, Huaian 223003, China;",Mathematical Problems in Engineering,Hindawi,China,Zhenpeng Xia;Juan Liu,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,10/4/2023 0:00,10.1155/2023/9784298,0.0,5/6/2022 0:00,10.1155/2022/3768224,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/DC5E7901F6266F9A6FA6F7B2C4BDFF +56396,TCM Physical Health Management Training and Nursing Effect Evaluation Based on Digital Twin,(B/T) Computer Science;(B/T) Technology;(BLS) Anatomy/Physiology;(HSC) Medicine - General;(HSC) Medicine - Nursing;,"Hospital of Chengdu University of Traditional Chinese Medicine, Chengdu 610072, Sichuan, China;",Scientific Programming,Hindawi,China,Jing Jiang;Qijia Li;Fei Yang,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,10/4/2023 0:00,10.1155/2023/9840830,0.0,9/27/2022 0:00,10.1155/2022/3907481,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Informed/Patient Consent - None/Withdrawn;+Investigation by Journal/Publisher;+Investigation by Third Party;+Lack of IRB/IACUC Approval;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/8F8D76C66942A80D70FA3563489298 +56380,Curriculum Reform and Adaptive Teaching of Computer Education Based on Online Education and B5G Model,(B/T) Computer Science;(B/T) Technology;(SOC) Education;,"School of Big Data and Automation Engineering, Chongqing Chemical Industry Vocational College, Chongqing 401228, China",Wireless Communications and Mobile Computing,Hindawi,China,Rongming Tian;Yan Tang,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9818904,0.0,8/25/2022 0:00,10.1155/2022/9636452,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/EDB42A43B4671D904792B70576E086 +56379,Application of Support Vector Machine Model Based on Machine Learning in Art Teaching,(B/T) Computer Science;(B/T) Technology;(HUM) Arts - General;,"Department of Fine Arts, Changji University, Changji, 831100 Xinjiang, China; Department of Computer Engineering, Changji University, Changji, 831100 Xinjiang, China; Department of Fine Arts, Chongqing Normal University, Chongqing 401331, China",Wireless Communications and Mobile Computing,Hindawi,China,YongMing Hua;Fang Li;Shuwen Yang,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9789170,0.0,6/20/2022 0:00,10.1155/2022/7954589,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/82357471108F95B6112C5ED9DEC534 +56378,Application of Emotional Education in College Physical Education Based on Mobile Internet of Things Model,(B/T) Technology;(SOC) Education;(SOC) Psychology;,"Department of Public Basic Education, Henan Vocational University of Science and Technology, Zhoukou, 466000 Henan, China; Office of International Cooperation and Exchange, Zhoukou Normal University, Zhoukou, 466000 Henan, China",Wireless Communications and Mobile Computing,Hindawi,China,Yanqun Huang;Kun Li,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9807147,0.0,11/19/2022 0:00,10.1155/2022/8728925,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/091C35C103E4A33487FED77346B68C +56363,Preliminary Study on the Evaluation of Multimodal Effects of Type 2 Diabetic Nephropathy,(HSC) Medicine - Diabetes;(HSC) Medicine - Urology/Nephrology;,"Henan University of Chinese Medicine, Zhengzhou 450000, China;",Scientific Programming,Hindawi,China,Huandong Zhao,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,10/4/2023 0:00,10.1155/2023/9820827,0.0,9/13/2021 0:00,10.1155/2021/2563477,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Informed/Patient Consent - None/Withdrawn;+Investigation by Journal/Publisher;+Investigation by Third Party;+Lack of IRB/IACUC Approval;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/A88D39F9DC3A72A2E62560C028738D +56362,The Dilemma and Countermeasures of Music Education under the Background of Big Data,(B/T) Data Science;(B/T) Technology;(HUM) Arts - Music;(SOC) Education;,"Pingdingshan Polytechnic College, PingDingShan, Henan 467001, China; National University of Life and Environmental Sciences of Ukraine, Kyiv 03041, Ukraine",Wireless Communications and Mobile Computing,Hindawi,China;Ukraine,Jiaye Han,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9753475,0.0,9/16/2022 0:00,10.1155/2022/8341966,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Informed/Patient Consent - None/Withdrawn;+Investigation by Journal/Publisher;+Investigation by Third Party;+Lack of IRB/IACUC Approval;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/09293BF5BE20A9DF8B659F90FD4D77 +56360,The Components of Smart Sports in Universities with the Assistance of Smart Devices,(B/T) Technology;(SOC) Education;(SOC) Sports and Recreation;,"Northeast Normal University, Changchun, 130000 Jilin, China",Wireless Communications and Mobile Computing,Hindawi,China,Zhengyang Shi,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9868041,0.0,9/19/2022 0:00,10.1155/2022/6469162,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/CA854C2400CA247C3AB481D9FFD7D0 +56358,Reform and Challenges of Ideological and Political Education for College Students Based on Wireless Communication and Virtual Reality Technology,(B/T) Technology;(SOC) Education;(SOC) Political Science;,"Business School, Chengdu University, Chengdu, Sichuan 610106, China",Wireless Communications and Mobile Computing,Hindawi,China,Xing Hang,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9797068,0.0,8/15/2021 0:00,10.1155/2021/6151249,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/C3CDE7309B893622FDF6ADB2337E14 +56357,Construction of Intelligent Textbook Courseware Management System Based on Artificial Intelligence Technology,(B/T) Technology;(SOC) Education;,"Educational Administration, Chengdu Normal University, Chengdu 611130, China",Wireless Communications and Mobile Computing,Hindawi,China,Qiu Zhao,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9826149,0.0,8/12/2022 0:00,10.1155/2022/9993183,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/CBCF6814AB30FC8EB062AE4105C1CF +56355,Construction of Ideological and Political Practice Teaching System of Documentary Creation Course Based on Deep Learning,(B/T) Computer Science;(B/T) Data Science;(B/T) Technology;(HUM) Arts - Film Studies;(SOC) Education;(SOC) Political Science;,"School of Culture and Media Huanghuai University, Zhumadian City, Henan Province 463000, China; School of Arts Cheongju University, Cheongju City, Republic of Korea",Wireless Communications and Mobile Computing,Hindawi,China;South Korea,Qiwei Ren,https://retractionwatch.com/2022/09/28/exclusive-hindawi-and-wiley-to-retract-over-500-papers-linked-to-peer-review-rings/;https://retractionwatch.com/2023/04/05/wiley-and-hindawi-to-retract-1200-more-papers-for-compromised-peer-review/;https://retractionwatch.com/2023/12/19/hindawi-reveals-process-for-retracting-more-than-8000-paper-mill-articles/,Research Article;,9/27/2023 0:00,10.1155/2023/9818015,0.0,8/28/2022 0:00,10.1155/2022/6299168,0.0,Retraction,+Concerns/Issues About Data;+Concerns/Issues About Results;+Concerns/Issues about Referencing/Attributions;+Concerns/Issues with Peer Review;+Investigation by Journal/Publisher;+Investigation by Third Party;+Paper Mill;+Randomly Generated Content;+Unreliable Results;,No,See also: https://pubpeer.com/publications/76B5A8D5923D6901935160551185DB diff --git a/tests/test_clients.py b/tests/test_clients.py index f389b802..8b35c6a4 100644 --- a/tests/test_clients.py +++ b/tests/test_clients.py @@ -17,7 +17,7 @@ ) from paperqa.clients.client_models import MetadataPostProcessor, MetadataProvider from paperqa.clients.journal_quality import JournalQualityPostProcessor -from paperqa.clients.retractions import RetrationDataPostProcessor +from paperqa.clients.retractions import RetractionDataPostProcessor @pytest.mark.vcr @@ -100,8 +100,9 @@ @pytest.mark.asyncio async def test_title_search(paper_attributes: dict[str, str]) -> None: async with aiohttp.ClientSession() as session: - client_list = list(ALL_CLIENTS) - client_list.remove(RetrationDataPostProcessor) + client_list = [ + client for client in ALL_CLIENTS if client != RetractionDataPostProcessor + ] client = DocMetadataClient( session, clients=cast( @@ -206,8 +207,9 @@ async def test_title_search(paper_attributes: dict[str, str]) -> None: @pytest.mark.asyncio async def test_doi_search(paper_attributes: dict[str, str]) -> None: async with aiohttp.ClientSession() as session: - client_list = list(ALL_CLIENTS) - client_list.remove(RetrationDataPostProcessor) + client_list = [ + client for client in ALL_CLIENTS if client != RetractionDataPostProcessor + ] client = DocMetadataClient( session, clients=cast( @@ -572,16 +574,20 @@ async def test_ensure_sequential_run_early_stop( assert record_indices["early_stop"] != -1, "We should stop early." +@pytest.mark.vcr @pytest.mark.asyncio async def test_crossref_retraction_status(): async with aiohttp.ClientSession() as session: + retract_processor = RetractionDataPostProcessor( + "stub_data/stub_retractions.csv" + ) crossref_client = DocMetadataClient( session, clients=cast( Collection[ type[MetadataPostProcessor[Any]] | type[MetadataProvider[Any]] ], - [CrossrefProvider, RetrationDataPostProcessor], + [CrossrefProvider, retract_processor], ), ) crossref_details = await crossref_client.query( From 846f136088b00cfee276f4d439a9fbda14c6cbc0 Mon Sep 17 00:00:00 2001 From: James Braza Date: Sun, 15 Sep 2024 21:37:26 -0700 Subject: [PATCH 2/2] Promoting agent factories to `Settings` (#407) --- paperqa/agents/main.py | 111 +++------------------------------------ paperqa/settings.py | 114 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 106 deletions(-) diff --git a/paperqa/agents/main.py b/paperqa/agents/main.py index 7443b15c..d1bd0483 100644 --- a/paperqa/agents/main.py +++ b/paperqa/agents/main.py @@ -2,8 +2,7 @@ import logging import os from collections.abc import Awaitable, Callable -from pydoc import locate -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any from aviary.message import MalformedMessageError, Message from aviary.tools import ( @@ -13,7 +12,8 @@ ToolSelector, ToolSelectorLedger, ) -from pydantic import BaseModel, TypeAdapter +from pydantic import BaseModel +from rich.console import Console from tenacity import ( Retrying, before_sleep_log, @@ -21,28 +21,7 @@ stop_after_attempt, ) -try: - from ldp.agent import ( - Agent, - HTTPAgentClient, - MemoryAgent, - ReActAgent, - SimpleAgent, - SimpleAgentState, - ) - from ldp.graph.memory import Memory, UIndexMemoryModel - from ldp.graph.op_utils import set_training_mode - from ldp.llms import EmbeddingModel - - _Memories = TypeAdapter(dict[int, Memory] | list[Memory]) # type: ignore[var-annotated] - - HAS_LDP_INSTALLED = True -except ImportError: - HAS_LDP_INSTALLED = False -from rich.console import Console - from paperqa.docs import Docs -from paperqa.settings import Settings from paperqa.types import Answer from paperqa.utils import pqa_directory @@ -53,6 +32,7 @@ from .tools import EnvironmentState, GatherEvidence, GenerateAnswer, PaperSearch if TYPE_CHECKING: + from ldp.agent import Agent, SimpleAgentState from ldp.graph.ops import OpResult logger = logging.getLogger(__name__) @@ -96,85 +76,6 @@ async def agent_query( return response -def to_aviary_tool_selector( - agent_type: str | type, settings: Settings -) -> ToolSelector | None: - """Attempt to convert the agent type to an aviary ToolSelector.""" - if agent_type is ToolSelector or ( - isinstance(agent_type, str) - and ( - agent_type == ToolSelector.__name__ - or ( - agent_type.startswith(ToolSelector.__module__.split(".", maxsplit=1)[0]) - and locate(agent_type) is ToolSelector - ) - ) - ): - return ToolSelector( - model_name=settings.agent.agent_llm, - acompletion=settings.get_agent_llm().router.acompletion, - **(settings.agent.agent_config or {}), - ) - return None - - -async def to_ldp_agent( - agent_type: str | type, settings: Settings -) -> "Agent[SimpleAgentState] | None": - """Attempt to convert the agent type to an ldp Agent.""" - if not isinstance(agent_type, str): # Convert to fully qualified name - agent_type = f"{agent_type.__module__}.{agent_type.__name__}" - if not agent_type.startswith("ldp"): - return None - if not HAS_LDP_INSTALLED: - raise ImportError( - "ldp agents requires the 'ldp' extra for 'ldp'. Please:" - " `pip install paper-qa[ldp]`." - ) - - # TODO: support general agents - agent_cls = cast(type[Agent], locate(agent_type)) - agent_settings = settings.agent - agent_llm, config = agent_settings.agent_llm, agent_settings.agent_config or {} - if issubclass(agent_cls, ReActAgent | MemoryAgent): - if ( - issubclass(agent_cls, MemoryAgent) - and "memory_model" in config - and "memories" in config - ): - if "embedding_model" in config["memory_model"]: - # Work around EmbeddingModel not yet supporting deserialization - config["memory_model"]["embedding_model"] = EmbeddingModel.from_name( - embedding=config["memory_model"].pop("embedding_model")["name"] - ) - config["memory_model"] = UIndexMemoryModel(**config["memory_model"]) - memories = _Memories.validate_python(config.pop("memories")) - await asyncio.gather( - *( - config["memory_model"].add_memory(memory) - for memory in ( - memories.values() if isinstance(memories, dict) else memories - ) - ) - ) - return agent_cls( - llm_model={"model": agent_llm, "temperature": settings.temperature}, - **config, - ) - if issubclass(agent_cls, SimpleAgent): - return agent_cls( - llm_model={"model": agent_llm, "temperature": settings.temperature}, - sys_prompt=agent_settings.agent_system_prompt, - **config, - ) - if issubclass(agent_cls, HTTPAgentClient): - set_training_mode(False) - return HTTPAgentClient[SimpleAgentState]( - agent_state_type=SimpleAgentState, **config - ) - raise NotImplementedError(f"Didn't yet handle agent type {agent_type}.") - - async def run_agent( docs: Docs, query: QueryRequest, @@ -205,11 +106,11 @@ async def run_agent( if agent_type == "fake": answer, agent_status = await run_fake_agent(query, docs, **runner_kwargs) - elif tool_selector_or_none := to_aviary_tool_selector(agent_type, query.settings): + elif tool_selector_or_none := query.settings.make_aviary_tool_selector(agent_type): answer, agent_status = await run_aviary_agent( query, docs, tool_selector_or_none, **runner_kwargs ) - elif ldp_agent_or_none := await to_ldp_agent(agent_type, query.settings): + elif ldp_agent_or_none := await query.settings.make_ldp_agent(agent_type): answer, agent_status = await run_ldp_agent( query, docs, ldp_agent_or_none, **runner_kwargs ) diff --git a/paperqa/settings.py b/paperqa/settings.py index c058c7ec..0720abcc 100644 --- a/paperqa/settings.py +++ b/paperqa/settings.py @@ -1,12 +1,41 @@ +import asyncio import importlib.resources import os from enum import StrEnum from pathlib import Path +from pydoc import locate from typing import Any, ClassVar, assert_never, cast -from pydantic import BaseModel, ConfigDict, Field, computed_field, field_validator +from aviary.tools import ToolSelector +from pydantic import ( + BaseModel, + ConfigDict, + Field, + TypeAdapter, + computed_field, + field_validator, +) from pydantic_settings import BaseSettings, CliSettingsSource, SettingsConfigDict +try: + from ldp.agent import ( + Agent, + HTTPAgentClient, + MemoryAgent, + ReActAgent, + SimpleAgent, + SimpleAgentState, + ) + from ldp.graph.memory import Memory, UIndexMemoryModel + from ldp.graph.op_utils import set_training_mode + from ldp.llms import EmbeddingModel as LDPEmbeddingModel + + _Memories = TypeAdapter(dict[int, Memory] | list[Memory]) # type: ignore[var-annotated] + + HAS_LDP_INSTALLED = True +except ImportError: + HAS_LDP_INSTALLED = False + from paperqa.llms import EmbeddingModel, LiteLLMModel, embedding_model_factory from paperqa.prompts import ( citation_prompt, @@ -520,6 +549,89 @@ def get_agent_llm(self) -> LiteLLMModel: def get_embedding_model(self) -> EmbeddingModel: return embedding_model_factory(self.embedding, **(self.embedding_config or {})) + def make_aviary_tool_selector(self, agent_type: str | type) -> ToolSelector | None: + """Attempt to convert the input agent type to an aviary ToolSelector.""" + if agent_type is ToolSelector or ( + isinstance(agent_type, str) + and ( + agent_type == ToolSelector.__name__ + or ( + agent_type.startswith( + ToolSelector.__module__.split(".", maxsplit=1)[0] + ) + and locate(agent_type) is ToolSelector + ) + ) + ): + return ToolSelector( + model_name=self.agent.agent_llm, + acompletion=self.get_agent_llm().router.acompletion, + **(self.agent.agent_config or {}), + ) + return None + + async def make_ldp_agent( + self, agent_type: str | type + ) -> "Agent[SimpleAgentState] | None": + """Attempt to convert the input agent type to an ldp Agent.""" + if not isinstance(agent_type, str): # Convert to fully qualified name + agent_type = f"{agent_type.__module__}.{agent_type.__name__}" + if not agent_type.startswith("ldp"): + return None + if not HAS_LDP_INSTALLED: + raise ImportError( + "ldp agents requires the 'ldp' extra for 'ldp'. Please:" + " `pip install paper-qa[ldp]`." + ) + + # TODO: support general agents + agent_cls = cast(type[Agent], locate(agent_type)) + agent_settings = self.agent + agent_llm, config = agent_settings.agent_llm, agent_settings.agent_config or {} + if issubclass(agent_cls, ReActAgent | MemoryAgent): + if ( + issubclass(agent_cls, MemoryAgent) + and "memory_model" in config + and "memories" in config + ): + if "embedding_model" in config["memory_model"]: + # Work around LDPEmbeddingModel not yet supporting deserialization + config["memory_model"]["embedding_model"] = ( + LDPEmbeddingModel.from_name( + embedding=config["memory_model"].pop("embedding_model")[ + "name" + ] + ) + ) + config["memory_model"] = UIndexMemoryModel(**config["memory_model"]) + memories = _Memories.validate_python(config.pop("memories")) + await asyncio.gather( + *( + config["memory_model"].add_memory(memory) + for memory in ( + memories.values() + if isinstance(memories, dict) + else memories + ) + ) + ) + return agent_cls( + llm_model={"model": agent_llm, "temperature": self.temperature}, + **config, + ) + if issubclass(agent_cls, SimpleAgent): + return agent_cls( + llm_model={"model": agent_llm, "temperature": self.temperature}, + sys_prompt=agent_settings.agent_system_prompt, + **config, + ) + if issubclass(agent_cls, HTTPAgentClient): + set_training_mode(False) + return HTTPAgentClient[SimpleAgentState]( + agent_state_type=SimpleAgentState, **config + ) + raise NotImplementedError(f"Didn't yet handle agent type {agent_type}.") + MaybeSettings = Settings | str | None