|
| 1 | +#!/usr/bin/env python |
| 2 | +# |
| 3 | +# Copyright (C) 2010 by the Free Software Foundation, Inc. |
| 4 | +# |
| 5 | +# This program is free software; you can redistribute it and/or |
| 6 | +# modify it under the terms of the GNU General Public License |
| 7 | +# as published by the Free Software Foundation; either version 2 |
| 8 | +# of the License, or (at your option) any later version. |
| 9 | +# |
| 10 | +# This program is distributed in the hope that it will be useful, |
| 11 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | +# GNU General Public License for more details. |
| 14 | +# |
| 15 | +# You should have received a copy of the GNU General Public License |
| 16 | +# along with this program; if not, write to the Free Software |
| 17 | +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, |
| 18 | +# USA. |
| 19 | + |
| 20 | +"""This is a template for constructing an external archiver for situations |
| 21 | +where one wants to archive posts in Mailman's pipermail archive, but also |
| 22 | +wants to invoke some other process on the archived message after its URL |
| 23 | +and/or path are known. |
| 24 | +
|
| 25 | +It assumes this is invoked by mm_cfg.py settings like |
| 26 | +PUBLIC_EXTERNAL_ARCHIVER = '/path/to/Ext_Arch.py %(hostname)s %(listname)s' |
| 27 | +PRIVATE_EXTERNAL_ARCHIVER = '/path/to/Ext_Arch.py %(hostname)s %(listname)s' |
| 28 | +
|
| 29 | +The path in the sys.path.insert() below must be adjusted to the actual path |
| 30 | +to Mailman's bin/ directory, or you can simply put this script in Mailman's |
| 31 | +bin/ directory and it will work without the sys.path.insert() and of course, |
| 32 | +you must add the code you want to the ext_process function. |
| 33 | +""" |
| 34 | + |
| 35 | +import sys |
| 36 | +sys.path.insert(0, '/usr/local/mailman/bin') # path to your mailman dir |
| 37 | +import paths |
| 38 | + |
| 39 | +import os |
| 40 | +import email |
| 41 | +import time |
| 42 | + |
| 43 | +from cStringIO import StringIO |
| 44 | + |
| 45 | +from Mailman import Message |
| 46 | +from Mailman import MailList |
| 47 | +from Mailman.Archiver import HyperArch |
| 48 | +from Mailman.Logging.Syslog import syslog |
| 49 | +from Mailman.Logging.Utils import LogStdErr |
| 50 | + |
| 51 | +# For debugging, log stderr to Mailman's 'debug' log |
| 52 | +LogStdErr('debug', 'mailmanctl', manual_reprime=0) |
| 53 | + |
| 54 | +def ext_process(listname, hostname, url, filepath, msg): |
| 55 | + """Here's where you put your code to deal with the just archived message. |
| 56 | +
|
| 57 | + Arguments here are the list name, the host name, the URL to the just |
| 58 | + archived message, the file system path to the just archived message and |
| 59 | + the message object. |
| 60 | +
|
| 61 | + These can be replaced or augmented as needed. |
| 62 | + """ |
| 63 | + from pyes import ES |
| 64 | + from pyes.exceptions import ClusterBlockException, NoServerAvailable |
| 65 | + import datetime |
| 66 | + |
| 67 | + #CHANGE this settings to reflect your configuration |
| 68 | + _ES_SERVERS = ['127.0.0.1:9500'] # I prefer thrift |
| 69 | + _indexname = "mailman" |
| 70 | + _doctype = "mail" |
| 71 | + date = datetime.datetime.today() |
| 72 | + |
| 73 | + try: |
| 74 | + iconn = ES(_ES_SERVERS) |
| 75 | + status = None |
| 76 | + try: |
| 77 | + status = iconn.status(_indexname) |
| 78 | + logger.debug("Indexer status:%s" % status) |
| 79 | + except: |
| 80 | + iconn.create_index(_indexname) |
| 81 | + time.sleep(1) |
| 82 | + status = iconn.status(_indexname) |
| 83 | + mappings = { u'text': {'boost': 1.0, |
| 84 | + 'index': 'analyzed', |
| 85 | + 'store': 'yes', |
| 86 | + 'type': u'string', |
| 87 | + "term_vector" : "with_positions_offsets"}, |
| 88 | + u'url': {'boost': 1.0, |
| 89 | + 'index': 'not_analyzed', |
| 90 | + 'store': 'yes', |
| 91 | + 'type': u'string', |
| 92 | + "term_vector" : "no"}, |
| 93 | + u'title': {'boost': 1.0, |
| 94 | + 'index': 'analyzed', |
| 95 | + 'store': 'yes', |
| 96 | + 'type': u'string', |
| 97 | + "term_vector" : "with_positions_offsets"}, |
| 98 | + u'date': {'store': 'yes', |
| 99 | + 'type': u'date'}} |
| 100 | + time.sleep(1) |
| 101 | + status = iconn.put_mapping(_doctype, mappings, _indexname) |
| 102 | + |
| 103 | + |
| 104 | + data = dict(url=url, |
| 105 | + title=msg.get('subject'), |
| 106 | + date=date, |
| 107 | + text=str(msg) |
| 108 | + ) |
| 109 | + iconn.index(data, _indexname, _doctype) |
| 110 | + |
| 111 | + syslog('debug', 'listname: %s, hostname: %s, url: %s, path: %s, msg: %s', |
| 112 | + listname, hostname, url, filepath, msg) |
| 113 | + except ClusterBlockException: |
| 114 | + syslog('error', 'Cluster in revocery state: listname: %s, hostname: %s, url: %s, path: %s, msg: %s', |
| 115 | + listname, hostname, url, filepath, msg) |
| 116 | + except NoServerAvailable: |
| 117 | + syslog('error', 'No server available: listname: %s, hostname: %s, url: %s, path: %s, msg: %s', |
| 118 | + listname, hostname, url, filepath, msg) |
| 119 | + except: |
| 120 | + import traceback |
| 121 | + syslog('error', 'Unknown: listname: %s, hostname: %s, url: %s, path: %s, msg: %s\nstacktrace: %s', |
| 122 | + listname, hostname, url, filepath, msg, repr(traceback.format_exc())) |
| 123 | + |
| 124 | + return |
| 125 | + |
| 126 | +def main(): |
| 127 | + """This is the mainline. |
| 128 | +
|
| 129 | + It first invokes the pipermail archiver to add the message to the archive, |
| 130 | + then calls the function above to do whatever with the archived message |
| 131 | + after it's URL and path are known. |
| 132 | + """ |
| 133 | + |
| 134 | + listname = sys.argv[2] |
| 135 | + hostname = sys.argv[1] |
| 136 | + |
| 137 | + # We must get the list unlocked here because it is already locked in |
| 138 | + # ArchRunner. This is safe because we aren't actually changing our list |
| 139 | + # object. ArchRunner's lock plus pipermail's archive lock will prevent |
| 140 | + # any race conditions. |
| 141 | + mlist = MailList.MailList(listname, lock=False) |
| 142 | + |
| 143 | + # We need a seekable file for processUnixMailbox() |
| 144 | + f = StringIO(sys.stdin.read()) |
| 145 | + |
| 146 | + # If we don't need a Message.Message instance, we can skip the next and |
| 147 | + # the imports of email and Message above. |
| 148 | + msg = email.message_from_file(f, Message.Message) |
| 149 | + |
| 150 | + h = HyperArch.HyperArchive(mlist) |
| 151 | + # Get the message number for the next message |
| 152 | + sequence = h.sequence |
| 153 | + # and add the message. |
| 154 | + h.processUnixMailbox(f) |
| 155 | + f.close() |
| 156 | + |
| 157 | + # Get the archive name, etc. |
| 158 | + archive = h.archive |
| 159 | + msgno = '%06d' % sequence |
| 160 | + filename = msgno + '.html' |
| 161 | + filepath = os.path.join(h.basedir, archive, filename) |
| 162 | + h.close() |
| 163 | + |
| 164 | + url = '%s%s/%s' % (mlist.GetBaseArchiveURL(), archive, filename) |
| 165 | + |
| 166 | + ext_process(listname, hostname, url, filepath, msg) |
| 167 | + |
| 168 | +if __name__ == '__main__': |
| 169 | + main() |
0 commit comments