-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
117 lines (100 loc) · 5.44 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#author: Zesheng Xu
#date: 1-14-2021
from urllib.request import Request, urlopen
import re
# defining the scope of majors we are looking at
majors=["bmen","cs","ee","mech","se","ce"]
#a collection of all the course url
url_list = []
#intiniating the rules that we will be writing into the prolog file
rule =[]
#funtion to extract details from the input string, such as corequisite, prerequisite ect
def parse_course_info(info_string, course):
#breaking down each sentence for further analysis
sentence_list = info_string.split(". ")
#print(sentence_list)
for s in sentence_list:
if "Credit cannot be received for " in s or "Same as" in s:
#print(s)
temp_string = re.split(", | or | and | >\) ",s)
#finding course name to write corresponding rules to
for t_s in temp_string:
if "href=\"https://catalog.utdallas.edu/2020/undergraduate/courses" in t_s:
index = t_s.find("https://catalog.utdallas.edu/2020/undergraduate/courses")+1
length = len("https://catalog.utdallas.edu/2020/undergraduate/courses")
course_b = t_s[index: index + length + 10]
course_b = course_b[course_b.rfind("/")+1:course_b.rfind("\"")]
if course_b != course:
#stating that they are equivalent
rule.append("equivalent(%s,%s)." % (course,course_b))
rule.append("equivalent(%s,%s)." % (course_b,course))
if "Prerequisites or Corequisites" in s:
s = s[s.find(":"):]
temp_string = re.split(", | or | and ", s)
# finding course name to write corresponding rules to
for t_s in temp_string:
if "href=\"https://catalog.utdallas.edu/2020/undergraduate/courses" in t_s:
index = t_s.find("https://catalog.utdallas.edu/2020/undergraduate/courses") + 1
length = len("https://catalog.utdallas.edu/2020/undergraduate/courses")
course_b = t_s[index: index + length + 10]
course_b = course_b[course_b.rfind("/") + 1:course_b.rfind("\"")]
if course_b != course:
# establishing rule for prolog that course a is the next level of course B
rule.append("next_level(%s,%s)." % (course, course_b))
# meanwhile they can also be taken together, but with some rules
rule.append("corequisites(%s,%s):- not credit(%s)." % (course, course_b,course))
rule.append("corequisites(%s,%s):- not credit(%s)." % (course_b, course, course_b))
rule.append("corequisites(%s,%s):- not credit(%s)." % (course, course_b, course_b))
rule.append("corequisites(%s,%s):- not credit(%s)." % (course_b, course, course))
if "Prerequisite" in s and not "Corequisites" in s:
s = s[s.find(":"):]
temp_string = re.split(", | or | and ",s)
#finding course name to write corresponding rules to
for t_s in temp_string:
if "href=\"https://catalog.utdallas.edu/2020/undergraduate/courses" in t_s:
index = t_s.find("https://catalog.utdallas.edu/2020/undergraduate/courses") + 1
length = len("https://catalog.utdallas.edu/2020/undergraduate/courses")
course_b = t_s[index: index + length + 10]
course_b = course_b[course_b.rfind("/") + 1:course_b.rfind("\"")]
if course_b != course :
# establishing rule for prolog that course a is the next level of course B
rule.append("next_level(%s,%s)." % (course, course_b))
# print(temp_string)
def write_rules():
f = open("main.lp", "w")
for r in rule:
f.write(r +"\n")
f.close()
# function to parse each individual course and find useful information
def parse_course(url,major):
url = url
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read().decode("utf-8")
temp = webpage.split("\n")
#we find the string containing all the info we need
useful = max(temp, key=len)
#finding course name
course_name = url[url.rfind("/")+1:]
#adding this course to major() rules
rule.append("course(%s):- major(%s)."%(course_name,major))
#passing it down to further parsing and eventually turning into prolog facts
parse_course_info(useful,course_name)
print(major,course_name)
#requesting to access the website through url of each major within our scope
for major in majors:
url = "https://catalog.utdallas.edu/2020/undergraduate/courses/" + major
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
webpage = urlopen(req).read().decode("utf-8")
#parsing url to each individual course webpage from the majors webpage
temp = webpage.split(" ")
for s in temp:
#finding the link
if "href=\"https://catalog.utdallas.edu/2020/undergraduate/courses" in s:
#further eliminating useless links
if s[s.rfind(">")+1:].lower() == major:
url2 = (s[s.find("\"")+1:s.rfind("\"")])
#avoiding repeats
if url2 not in url_list:
url_list.append(url2)
parse_course(url2,major)
write_rules()