PaulSonOfLars · GaryBeez · May 25, 2018 · May 26, 2018 · Jun 6, 2018 · PaulSonOfLars
diff --git a/tg_bot/modules/locks.py b/tg_bot/modules/locks.py
@@ -1,12 +1,12 @@
 import html
 from typing import Optional, List
 
-from telegram import Message, Chat, Update, Bot, ParseMode, User, MessageEntity
+from telegram import Message, Chat, Update, Bot, ParseMode, User, MessageEntity, MAX_MESSAGE_LENGTH
 from telegram import TelegramError
 from telegram.error import BadRequest
 from telegram.ext import CommandHandler, MessageHandler, Filters
 from telegram.ext.dispatcher import run_async
-from telegram.utils.helpers import mention_html
+from telegram.utils.helpers import mention_html, escape_markdown
 
 import tg_bot.modules.sql.locks_sql as sql
 from tg_bot import dispatcher, SUDO_USERS, LOGGER
@@ -25,7 +25,10 @@
               'contact': Filters.contact,
               'photo': Filters.photo,
               'gif': Filters.document & CustomFilters.mime_type("video/mp4"),
-              'url': Filters.entity(MessageEntity.URL) | Filters.caption_entity(MessageEntity.URL),
+              'url': Filters.entity(MessageEntity.URL) |
+                     Filters.caption_entity(MessageEntity.URL) |
+                     Filters.entity(MessageEntity.TEXT_LINK) |
+                     Filters.caption_entity(MessageEntity.TEXT_LINK),
               'bots': Filters.status_update.new_chat_members,
               'forward': Filters.forwarded,
               'game': Filters.game
@@ -80,6 +83,59 @@ def locktypes(bot: Bot, update: Update):
     update.effective_message.reply_text("\n - ".join(["Locks: "] + list(LOCK_TYPES) + list(RESTRICTION_TYPES)))
 
 
+@user_admin
+def add_whitelist(bot: Bot, update: Update):
+    chat = update.effective_chat  # type: Optional[Chat]
+    message = update.effective_message  # type: Optional[Message]
+    entities = message.parse_entities(MessageEntity.URL)
+    added = []
+    for url in entities.values():
+        if sql.add_whitelist(chat.id, url):
+            added.append(url)
+    if added:
+        message.reply_text("Added {} to whitelist.".format(', '.join(w for w in added)))
+    else:
+        message.reply_text("No URLs were added to the whitelist")
+
+
+@user_admin
+def remove_whitelist(bot: Bot, update: Update):
+    chat = update.effective_chat  # type: Optional[Chat]
+    message = update.effective_message  # type: Optional[Message]
+    entities = message.parse_entities(MessageEntity.URL)
+    removed = []
+    for url in entities.values():
+        if sql.remove_whitelist(chat.id, url):
+            removed.append(url)
+    if removed:
+        message.reply_text("Removed `{}` from whitelist.".format('`, `'.join(escape_markdown(w) for w in removed)),
+            parse_mode=ParseMode.MARKDOWN)
+    else:
+        message.reply_text("Could not remove URL from whitelist or URL not found.")
+
+def list_white(bot: Bot, update: Update):
+    chat = update.effective_chat  # type: Optional[Chat]
+    message = update.effective_message  # type: Optional[Message]
+    all_whitelisted = sql.get_whitelist(chat.id)
+
+    if not all_whitelisted:
+        message.reply_text("No URLs are whitelisted here!")
+        return
+
+    BASIC_WHITE_STRING = "Whitelisted URLs:\n"
+    listwhite = BASIC_WHITE_STRING
+    for url in sorted(all_whitelisted.keys()):
+        entry = "{}, ".format(url)
+        if len(entry) + len(listwhite) > MAX_MESSAGE_LENGTH:
+            message.reply_text(listwhite)
+            listwhite = entry
+        else:
+            listwhite += entry
+
+    if not listwhite == BASIC_WHITE_STRING:
+        update.effective_message.reply_text(listwhite)
+
+
 @user_admin
 @bot_can_delete
 @loggable
@@ -199,6 +255,16 @@ def del_lockables(bot: Bot, update: Update):
                         chat.kick_member(new_mem.id)
                         message.reply_text("Only admins are allowed to add bots to this chat! Get outta here.")
             else:
+                #allow whitelisted URLs
+                if lockable == 'url':
+                    entities = set(url for url in message.parse_entities(MessageEntity.URL).values())
+                    #MessageEntity.TEXT_LINK could be added in the filter above, but would return the text, not the url,
+                    #so add all entities that have a 'url' field separately
+                    entities = entities | set(entity.url for entity in message.entities if entity.url)
+                    #if all URLs are any of the whitelisted ones, accept the message
+                    if all( any(regexp.search(text) for regexp in sql.get_whitelist(chat.id).values())
+                            for text in entities):
+                        continue
                 try:
                     message.delete()
                 except BadRequest as excp:
@@ -279,11 +345,14 @@ def __chat_settings__(chat_id, user_id):
 
 __help__ = """
  - /locktypes: a list of possible locktypes
+- /whitelisted: lists urls in this chat's whitelist
 
 *Admin only:*
  - /lock <type>: lock items of a certain type (not available in private)
  - /unlock <type>: unlock items of a certain type (not available in private)
  - /locks: the current list of locks in this chat.
+ - /whitelist <url>: add url to whitelist so it's not deleted by URL lock (accepts multiple)
+ - /unwhitelist <url>: remove url from whitelist (accepts multiple)
 
 Locks can be used to restrict a group's users.
 eg:
@@ -298,11 +367,17 @@ def __chat_settings__(chat_id, user_id):
 LOCK_HANDLER = CommandHandler("lock", lock, pass_args=True, filters=Filters.group)
 UNLOCK_HANDLER = CommandHandler("unlock", unlock, pass_args=True, filters=Filters.group)
 LOCKED_HANDLER = CommandHandler("locks", list_locks, filters=Filters.group)
+WHITELIST_HANDLER = CommandHandler("whitelist", add_whitelist, filters=Filters.group)
+UNWHITELIST_HANDLER = CommandHandler("unwhitelist", remove_whitelist, filters=Filters.group)
+WHITELISTED_HANDLER = DisableAbleCommandHandler("whitelisted", list_white, filters=Filters.group, admin_ok=True)
 
 dispatcher.add_handler(LOCK_HANDLER)
 dispatcher.add_handler(UNLOCK_HANDLER)
 dispatcher.add_handler(LOCKTYPES_HANDLER)
 dispatcher.add_handler(LOCKED_HANDLER)
+dispatcher.add_handler(WHITELIST_HANDLER)
+dispatcher.add_handler(UNWHITELIST_HANDLER)
+dispatcher.add_handler(WHITELISTED_HANDLER)
 
 dispatcher.add_handler(MessageHandler(Filters.all & Filters.group, del_lockables), PERM_GROUP)
 dispatcher.add_handler(MessageHandler(Filters.all & Filters.group, rest_handler), REST_GROUP)
diff --git a/tg_bot/modules/sql/locks_sql.py b/tg_bot/modules/sql/locks_sql.py
@@ -1,7 +1,8 @@
 # New chat added -> setup permissions
 import threading
+import re
 
-from sqlalchemy import Column, String, Boolean
+from sqlalchemy import Column, String, Boolean, UnicodeText
 
 from tg_bot.modules.sql import SESSION, BASE
 
@@ -61,13 +62,27 @@ def __init__(self, chat_id):
     def __repr__(self):
         return "<Restrictions for %s>" % self.chat_id
 
+class URLWhitelist(BASE):
+    __tablename__ = "permissions_urls"
+    chat_id = Column(String(14), primary_key=True, nullable=False)
+    url = Column(UnicodeText, primary_key=True, nullable=False)
+
+    def __init__(self, chat_id, url):
+        self.chat_id = str(chat_id)  # ensure string
+        self.url = url
+
+    def __repr__(self):
+        return "<Permission url whitelist for %s>" % self.chat_id
 
 Permissions.__table__.create(checkfirst=True)
 Restrictions.__table__.create(checkfirst=True)
+URLWhitelist.__table__.create(checkfirst=True)
 
 
 PERM_LOCK = threading.RLock()
 RESTR_LOCK = threading.RLock()
+WHITELIST_LOCK = threading.RLock()
+CHAT_WHITELIST = {}
 
 
 def init_permissions(chat_id, reset=False):
@@ -216,6 +231,64 @@ def get_restr(chat_id):
         SESSION.close()
 
 
+def get_whitelist(chat_id):
+    return CHAT_WHITELIST.get(str(chat_id), {})
+
+
+def add_whitelist(chat_id, url):
+    global CHAT_WHITELIST
+    with WHITELIST_LOCK:
+        url = re.search(r'(^http:\/\/|^https:\/\/|^ftp:\/\/|^)(www\.)?(\S*)', url, flags=re.I).group(3).lower()
+        if url.endswith('/'):
+            url = url[:-1]
+        prev = SESSION.query(URLWhitelist).get((str(chat_id), url))
+        if not prev:
+            whitelisted = URLWhitelist(str(chat_id), url)
+            SESSION.add(whitelisted)
+            SESSION.commit()
+        chat_whitelist = CHAT_WHITELIST.setdefault(str(chat_id), {})
+        chat_whitelist.update(
+                {url: re.compile(r'(^http:\/\/|^https:\/\/|^ftp:\/\/|^)(www\.)?'+re.escape(url)+'($|\W)', flags=re.I)}
+            )
+        return True
+
+
+def remove_whitelist(chat_id, url):
+    global CHAT_WHITELIST
+    with WHITELIST_LOCK:
+        url = re.search(r'(^http:\/\/|^https:\/\/|^ftp:\/\/|^)(www\.)?(\S*)', url, flags=re.I).group(3).lower()
+        if url.endswith('/'):
+            url = url[:-1]
+        CHAT_WHITELIST.get(str(chat_id), {}).pop(url, None)
+        white = SESSION.query(URLWhitelist).get((str(chat_id), url))
+        if white:
+            SESSION.delete(white)
+            SESSION.commit()
+            return True
+
+        SESSION.close()
+        return False
+
+
+def __load_chat_whitelist():
+    #whitelist for each group is a dict(url: compiled_regexp for url in group)
+    global CHAT_WHITELIST
+    try:
+        chats = SESSION.query(URLWhitelist.chat_id).distinct().all()
+        for (chat_id,) in chats:  # remove tuple by ( ,)
+            CHAT_WHITELIST[str(chat_id)] = {}
+
+        all_whites = SESSION.query(URLWhitelist).all()
+        for row in all_whites:
+            CHAT_WHITELIST[str(row.chat_id)].update(
+                    {row.url: re.compile(r'(^http:\/\/|^https:\/\/|^ftp:\/\/|^)(www\.)?'+re.escape(row.url)+'($|\W)',
+                                         flags=re.I
+                                         )
+                    }
+                )
+    finally:
+        SESSION.close()
+
 def migrate_chat(old_chat_id, new_chat_id):
     with PERM_LOCK:
         perms = SESSION.query(Permissions).get(str(old_chat_id))
@@ -228,3 +301,11 @@ def migrate_chat(old_chat_id, new_chat_id):
         if rest:
             rest.chat_id = str(new_chat_id)
         SESSION.commit()
+
+    with WHITELIST_LOCK:
+        white = SESSION.query(URLWhitelist).filter(URLWhitelist.chat_id == str(old_chat_id)).all()
+        for row in white:
+            row.chat_id = str(new_chat_id)
+        SESSION.commit()
+
+__load_chat_whitelist()