forked from AstroKriel/arXivScraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rank_articles.py
executable file
·132 lines (118 loc) · 4.65 KB
/
rank_articles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env python3
## ###############################################################
## LOAD MODULES
## ###############################################################
import sys, time, re, os
from openai import OpenAI
from MyLibrary import HelperFuncs
## ###############################################################
## SEARCH PARAMETERS
## ###############################################################
OUTPUT_DIRECTORY = "/Users/necoturb/Library/CloudStorage/OneDrive-Personal/Obsidian/arXiv-articles"
CONFIG_DIRECTORY = "./configs"
CONFIG_MODELNAME = "plasma"
## ###############################################################
## HELPER FUNCTIONS
## ###############################################################
def readTextFile(filepath):
return HelperFuncs.readFile(filepath, expected_file_extension=".txt")
## ###############################################################
## OPERATOR FUNCTION
## ###############################################################
def evaluateArticle(dict_article_info, prompt_rules, prompt_criteria):
article_title = dict_article_info.get("title", "")
article_abstract = dict_article_info.get("abstract", "")
if not article_title or not article_abstract:
return {
"status" : "Missing title or abstract",
"ai_rating" : None,
"ai_reason" : None
}
prompt_input = f"{prompt_criteria} \n\nTITLE: {article_title}\n\nABSTRACT: {article_abstract}"
try:
client = OpenAI()
response = client.chat.completions.create(
model = "gpt-4o-mini",
messages = [
{ "role": "system", "content": prompt_rules },
{ "role": "user", "content": prompt_input },
]
)
except Exception as e:
return {
"status" : f"API call failed: {e}",
"ai_rating" : None,
"ai_reason" : None
}
try:
response_text = response.choices[0].message.content
pattern = r"(?i)JUSTIFICATION:\s*(.*)\s*RATING:\s*([\d.]+)"
match = re.search(pattern, response_text)
if match:
ai_reason = match.group(1).strip()
ai_rating = float(match.group(2).strip())
else:
return {
"status" : "Failed to extract rating and justification",
"ai_message" : response_text,
"ai_rating" : None,
"ai_reason" : None
}
except Exception as e:
return {
"status" : f"Parsing error occurred: {e}",
"ai_message" : response_text,
"ai_rating" : None,
"ai_reason" : None
}
return {
"status" : "success",
"ai_rating" : ai_rating,
"ai_reason" : ai_reason
}
## ###############################################################
## MAIN PROGRAM
## ###############################################################
def main():
OpenAI.api_key = os.getenv("OPENAI_API_KEY")
list_filenames_in_directory = os.listdir(OUTPUT_DIRECTORY)
list_filenames = [
filename
for filename in list_filenames_in_directory
if filename.endswith(".md")
]
num_articles = len(list_filenames)
prompt_rules = readTextFile(f"{CONFIG_DIRECTORY}/ai_rules.txt")
prompt_criteria = readTextFile(f"{CONFIG_DIRECTORY}/ai_criteria.txt")
for article_index, filename in enumerate(list_filenames):
filepath_file = f"{OUTPUT_DIRECTORY}/{filename}"
HelperFuncs.print2Terminal(f"({article_index+1}/{num_articles})")
HelperFuncs.print2Terminal(f"Looking at: {filepath_file}")
dict_article_info = HelperFuncs.readMarkdownFile2Dict(filepath_file)
if dict_article_info.get("ai_rating") is not None:
HelperFuncs.print2Terminal("Skipping because the article has already been rated.\n")
continue
time_start = time.time()
dict_ai_result = evaluateArticle(
dict_article_info = dict_article_info,
prompt_rules = prompt_rules,
prompt_criteria = prompt_criteria,
)
time_elapsed = time.time() - time_start
if not("success" == dict_ai_result["status"].lower()):
HelperFuncs.print2Terminal("Error: {}".format(dict_ai_result["status"]))
if "ai_message" in dict_ai_result.keys():
HelperFuncs.print2Terminal("LLM response:\n{}".format(dict_ai_result["ai_message"]))
else:
HelperFuncs.print2Terminal("Rating: {}".format(dict_ai_result['ai_rating']))
dict_article_info["ai_rating"] = dict_ai_result["ai_rating"]
dict_article_info["ai_reason"] = dict_ai_result["ai_reason"]
HelperFuncs.saveArticle(OUTPUT_DIRECTORY, dict_article_info)
HelperFuncs.print2Terminal(f"Elapsed time: {time_elapsed:.2f} seconds.\n")
## ###############################################################
## PROGRAM ENTRY POINT
## ###############################################################
if __name__ == "__main__":
main()
sys.exit(0)
## END OF PROGRAM