Skip to content

Commit

Permalink
Bugfix for saving the data to local file, and some encoding related s…
Browse files Browse the repository at this point in the history
…tuffs.
  • Loading branch information
fivesheep committed Jul 31, 2009
1 parent d3b7fa0 commit 27904b4
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions UrlReader/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class UrlReader(callbacks.Plugin):

databases={}
URL_PATTERN=re.compile(r'https?:\/\/[^ ]+',re.I)
TITLE_PATTERN=re.compile(r'<title.*?>(.*?)<\/title>',re.I)
TITLE_PATTERN=re.compile(r'<title.*?>(.*?)(\n|<\/title>)',re.I)
CHAR_PATTERN=re.compile(r'charset=([a-z0-9_-]+)',re.I)

def doPrivmsg(self, irc, msg):
Expand Down Expand Up @@ -86,7 +86,8 @@ def _handleUrl(self,irc,channel,nick,url):
try:
title=self._getTitle(url)
if title != None:
urldb[url]=pickle.dumps([nick,title])
urldb[url]=pickle.dumps([nick.decode('utf-8'),title])
urldb.sync()
irc.reply(title.encode('utf-8'))
except:
traceback.print_exc()
Expand All @@ -98,8 +99,8 @@ def _getTitle(self,url):
req=urllib2.Request(url,None,{'User-agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)'})
handler=urllib2.urlopen(req)
if handler.headers['Content-Type'].find('html')>-1:
# read the first 4096 bytes
text=handler.read(4096)
# read the first 16kbytes
text=handler.read(16384)
handler.close()
tm=UrlReader.TITLE_PATTERN.search(text)
cm=UrlReader.CHAR_PATTERN.search(text)
Expand Down

0 comments on commit 27904b4

Please sign in to comment.