Skip to content

Commit

Permalink
Add a new plugin for reading the url's title for the channel
Browse files Browse the repository at this point in the history
  • Loading branch information
fivesheep committed Jul 31, 2009
1 parent b9fb584 commit d3b7fa0
Show file tree
Hide file tree
Showing 5 changed files with 277 additions and 0 deletions.
1 change: 1 addition & 0 deletions UrlReader/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Insert a description of your plugin here, with any notes, etc. about using it.
66 changes: 66 additions & 0 deletions UrlReader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
###
# Copyright (c) 2009, Young Ng
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

###

"""
Add a description of the plugin (to be presented to the user inside the wizard)
here. This should describe *what* the plugin does.
"""

import supybot
import supybot.world as world

# Use this for the version of this plugin. You may wish to put a CVS keyword
# in here if you're keeping the plugin in CVS or some similar system.
__version__ = ""

# XXX Replace this with an appropriate author or supybot.Author instance.
__author__ = supybot.authors.unknown

# This is a dictionary mapping supybot.Author instances to lists of
# contributions.
__contributors__ = {}

# This is a url where the most recent plugin package can be downloaded.
__url__ = '' # 'http://supybot.com/Members/yourname/UrlReader/download'

import config
import plugin
reload(plugin) # In case we're being reloaded.
# Add more reloads here if you add third-party modules and want them to be
# reloaded when this plugin is reloaded. Don't forget to import them as well!

if world.testing:
import test

Class = plugin.Class
configure = config.configure


# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:
49 changes: 49 additions & 0 deletions UrlReader/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
###
# Copyright (c) 2009, Young Ng
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

###

import supybot.conf as conf
import supybot.registry as registry

def configure(advanced):
# This will be called by supybot to configure this module. advanced is
# a bool that specifies whether the user identified himself as an advanced
# user or not. You should effect your configuration by manipulating the
# registry as appropriate.
from supybot.questions import expect, anything, something, yn
conf.registerPlugin('UrlReader', True)


UrlReader = conf.registerPlugin('UrlReader')
# This is where your configuration variables (if any) should go. For example:
# conf.registerGlobalValue(UrlReader, 'someConfigVariableName',
# registry.Boolean(False, """Help for someConfigVariableName."""))


# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:
124 changes: 124 additions & 0 deletions UrlReader/plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
###
# Copyright (c) 2009, Young Ng
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

###

import supybot.utils as utils
from supybot.commands import *
import supybot.plugins as plugins
import supybot.ircutils as ircutils
import supybot.callbacks as callbacks

import supybot.ircmsgs as ircmsgs

import re
import urllib
import urllib2
import bsddb
import pickle
import socket

import traceback

socket.setdefaulttimeout(10)

class UrlReader(callbacks.Plugin):
"""Add the help for "@plugin help UrlReader" here
This should describe *how* to use this plugin."""
threaded = True

databases={}
URL_PATTERN=re.compile(r'https?:\/\/[^ ]+',re.I)
TITLE_PATTERN=re.compile(r'<title.*?>(.*?)<\/title>',re.I)
CHAR_PATTERN=re.compile(r'charset=([a-z0-9_-]+)',re.I)

def doPrivmsg(self, irc, msg):
if irc.isChannel(msg.args[0]):
nick=msg.nick
channel=msg.args[0]
message=msg.args[1]
urls=UrlReader.URL_PATTERN.findall(message)
for url in urls:
self._handleUrl(irc,channel,nick,url)

def _handleUrl(self,irc,channel,nick,url):
# 1 lookup from the db
scname='%s_%s'%(irc.network,channel)

if not UrlReader.databases.has_key(scname):
# new db
dbpath=plugins.makeChannelFilename('%s.db'%scname ,'urldata')
UrlReader.databases[scname]=bsddb.btopen(dbpath,'c')

urldb = UrlReader.databases[scname]

if urldb.has_key(url):
poster,title=pickle.loads(urldb[url])
msg='%s has already posted it: %s'%(poster,title)
irc.reply(msg.encode('utf-8'))
else:

try:
title=self._getTitle(url)
if title != None:
urldb[url]=pickle.dumps([nick,title])
irc.reply(title.encode('utf-8'))
except:
traceback.print_exc()
irc.reply('No Title')


def _getTitle(self,url):
title=None
req=urllib2.Request(url,None,{'User-agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)'})
handler=urllib2.urlopen(req)
if handler.headers['Content-Type'].find('html')>-1:
# read the first 4096 bytes
text=handler.read(4096)
handler.close()
tm=UrlReader.TITLE_PATTERN.search(text)
cm=UrlReader.CHAR_PATTERN.search(text)
if tm:
title=tm.groups()[0]
if cm:
encoding=cm.groups()[0]
else:
# use 'gbk' as default encoding
encoding='gbk'
title=title.decode(encoding)

return title

else:
return None


Class = UrlReader


# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:
37 changes: 37 additions & 0 deletions UrlReader/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
###
# Copyright (c) 2009, Young Ng
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions, and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the author of this software nor the name of
# contributors to this software may be used to endorse or promote products
# derived from this software without specific prior written consent.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

###

from supybot.test import *

class UrlReaderTestCase(PluginTestCase):
plugins = ('UrlReader',)


# vim:set shiftwidth=4 tabstop=4 expandtab textwidth=79:

0 comments on commit d3b7fa0

Please sign in to comment.