-
Notifications
You must be signed in to change notification settings - Fork 0
/
tmp.py
37 lines (28 loc) · 938 Bytes
/
tmp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from BeautifulSoup import BeautifulSoup
import re
#with open("destinationuppsaladump.html") as f:
# doc = "".join(f)
#
#soup = BeautifulSoup(doc, fromEncoding="utf-8")
#
#header = soup.find("font", "head1").find(text=True)
#description = soup.find("span", id=re.compile("Description$")).find(text=True)
#phone = soup.find("span", id=re.compile("Phone$"))
#print phone.text.replace("Tfn:", "")
with open("untguidendump.html") as f:
doc = "".join(f)
soup = BeautifulSoup(doc)
header = soup.find("h1")
h1 = header.find(text=True)
desc = header.parent.text
print h1
print desc
print re.sub(r"^%s" % h1, "", desc, count=1)
print header.parent.text
print header.parent.find(text=True)
print header.find(text=True)
web = soup.find("strong", text="Webb")
email = soup.find("strong", text="E-post")
phone = soup.find("strong", text="Kontakt")
print phone.parent.parent.text
#print email.parent.findNextSibling("a").find(text=True)