diff --git a/.gitignore b/.gitignore index 8989f962..65c976ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.log DiscordBot/tokens.json TABot/tokens.json +*.csv +*.joblib diff --git a/DiscordBot/bot.py b/DiscordBot/bot.py index ec5dddb6..c887eb1a 100644 --- a/DiscordBot/bot.py +++ b/DiscordBot/bot.py @@ -6,8 +6,13 @@ import logging import re import requests -from report import Report +from report import Report, AbuseType, MisinfoCategory, HealthCategory, NewsCategory, State +from user_stats import UserStats +# from classifier.misinfo_classifier import predict_misinformation, load_model import pdb +import openai +import time +import asyncio # Set up logging to the console logger = logging.getLogger('discord') @@ -24,16 +29,29 @@ # If you get an error here, it means your token is formatted incorrectly. Did you put it in quotes? tokens = json.load(f) discord_token = tokens['discord'] + openai_api_key = tokens['openai'] + +openai.api_key = openai_api_key +client = openai.OpenAI(api_key=openai_api_key) class ModBot(discord.Client): def __init__(self): intents = discord.Intents.default() intents.message_content = True + intents.members = True super().__init__(command_prefix='.', intents=intents) self.group_num = None self.mod_channels = {} # Map from guild to the mod channel id for that guild self.reports = {} # Map from user IDs to the state of their report + self.pending_appeals = {} + self.active_mod_flow = None # State for the current moderation flow + self.user_stats = UserStats() # Initialize user statistics tracking + self.awaiting_appeal_confirmation = {} + self.awaiting_appeal_reason = {} + self.openai_client = openai.OpenAI(api_key=openai_api_key) + # Initialize the report queue + self.report_queue = asyncio.Queue() async def on_ready(self): print(f'{self.user.name} has connected to Discord! It is these guilds:') @@ -53,7 +71,10 @@ async def on_ready(self): for channel in guild.text_channels: if channel.name == f'group-{self.group_num}-mod': self.mod_channels[guild.id] = channel - + + # Start the report queue processor + asyncio.create_task(self.process_report_queue()) + print("Report queue processor started.") async def on_message(self, message): ''' @@ -71,6 +92,65 @@ async def on_message(self, message): await self.handle_dm(message) async def handle_dm(self, message): + if message.author.id in self.pending_appeals: + # Retrieve all pending appeals for the user + user_appeals = self.pending_appeals[message.author.id] + if not user_appeals: + return + + # Check if the user is in the middle of an appeal confirmation + if self.awaiting_appeal_confirmation.get(message.author.id): + if message.content.strip() == '1': # User wants to appeal + await message.channel.send("Please provide your reasoning for appealing:") + self.awaiting_appeal_confirmation[message.author.id] = False + self.awaiting_appeal_reason[message.author.id] = True + return + elif message.content.strip() == '2': # User does not want to appeal + await message.channel.send("Thank you.") + self.awaiting_appeal_confirmation[message.author.id] = False + # Reset the appeal state for the user + del self.pending_appeals[message.author.id] + return + else: + await message.channel.send("Invalid response. Please reply with 1 for Yes or 2 for No.") + return + + # Check if the user is providing their appeal reasoning + if self.awaiting_appeal_reason.get(message.author.id): + # Process the appeal reasoning + info = user_appeals[0] + + mod_chan = self.mod_channels[info['guild_id']] + + # Build the appeal notice + text = ( + f"APPEAL RECEIVED:\n" + f"User: {info['reported_name']}\n" + f"Outcome: {info['outcome']}\n\n" + f"Original Message:\n{info['original_message']}" + ) + if info.get('explanation'): + text += f"\n\nReason: {info['explanation']}" + text += f"\n\nAppeal Reason:\n{message.content}" + + # Send to mod channel + await mod_chan.send(text) + + # Prompt mods for ACCEPT/UPHOLD + self.active_mod_flow = { + 'step': 'appeal_review', + 'info': info, + 'message_author': info['reported_name'], + 'context': {}, + 'guild_id': info['guild_id'] + } + await mod_chan.send("Moderators, please respond with:\n1. ACCEPT\n2. UPHOLD") + + # Acknowledge to user + await message.channel.send("Your appeal has been submitted and is under review.") + self.awaiting_appeal_reason[message.author.id] = False + return + # Handle a help message if message.content == Report.HELP_KEYWORD: reply = "Use the `report` command to begin the reporting process.\n" @@ -89,7 +169,7 @@ async def handle_dm(self, message): if author_id not in self.reports: self.reports[author_id] = Report(self) - # Let the report class handle this message; forward all the messages it returns to uss + # Let the report class handle this message; forward all the messages it returns to us responses = await self.reports[author_id].handle_message(message) for r in responses: await message.channel.send(r) @@ -99,33 +179,1032 @@ async def handle_dm(self, message): self.reports.pop(author_id) async def handle_channel_message(self, message): - # Only handle messages sent in the "group-#" channel - if not message.channel.name == f'group-{self.group_num}': + # Only handle messages sent in the "group-#-mod" channel + if message.channel.name == f'group-{self.group_num}-mod': + await self.handle_mod_channel_message(message) + elif message.channel.name == f'group-{self.group_num}': + # Create a task for message classification that runs independently + asyncio.create_task(self.process_message(message)) + + async def process_report_queue(self): + """ + Continuously process reports from the queue. + This ensures reports are handled one at a time. + """ + while True: + try: + # Get the next report from the queue + report_data = await self.report_queue.get() + + print("\n=== Processing Report from Queue ===") + print(f"Queue size before processing: {self.report_queue.qsize()}") + print(f"Processing report for: {report_data['message_author']}") + print(f"Report type: {report_data['report_type']}") + print("================================\n") + + # Wait for any active moderation flow to complete + while self.active_mod_flow is not None: + print("Waiting for active moderation flow to complete...") + await asyncio.sleep(1) # Check every second + + # Process the report + await self.start_moderation_flow( + report_type=report_data['report_type'], + report_content=report_data['report_content'], + message_author=report_data['message_author'], + message_link=report_data.get('message_link'), + user_context=report_data.get('user_context') + ) + + # Mark the task as done + self.report_queue.task_done() + + print("\n=== Report Processing Complete ===") + print(f"Queue size after processing: {self.report_queue.qsize()}") + print("================================\n") + + except Exception as e: + print(f"Error processing report from queue: {e}") + # Continue processing the queue even if one report fails + continue + + async def process_message(self, message): + """ + Process a message for misinformation detection independently of the moderation flow. + This runs in parallel with other message processing and moderation tasks. + """ + # Check for misinformation in the message + has_misinfo = await self.detect_misinformation(message.content) + + if has_misinfo: + # If misinformation is detected, classify the type for the report + abuse_type_raw = await self.classify_abuse_type(message.content) + abuse_type = self.normalize_abuse_type(abuse_type_raw) + + if abuse_type: + # Add the report to the queue instead of processing it directly + report_data = { + 'report_type': abuse_type, + 'report_content': message.content, + 'message_author': message.author.name, + 'message_link': message.jump_url + } + await self.report_queue.put(report_data) + print("\n=== Report Added to Queue ===") + print(f"Author: {message.author.name}") + print(f"Type: {abuse_type}") + print(f"Current queue size: {self.report_queue.qsize()}") + print("============================\n") + + async def start_moderation_flow( + self, + report_type, + report_content, + message_author, + user_context=None, + message_link=None + ): + # Determine the initial step based on report type + if report_type.startswith('ADVERTISING MISINFO'): + initial_step = 'advertising_done' + elif ( + report_type.startswith('MISINFORMATION') + or report_type.startswith('HEALTH MISINFO') + or report_type.startswith('NEWS MISINFO') + ): + initial_step = 'danger_level' + else: + initial_step = 'default_done' + + # Store everything (including user_context) up front + self.active_mod_flow = { + 'step': initial_step, + 'report_type': report_type, + 'report_content': report_content, + 'message_author': message_author, + 'message_link': message_link, + 'user_context': user_context, + 'context': {} + } + + # Pick any one moderator channel + mod_channel = None + for channel in self.mod_channels.values(): + mod_channel = channel + break + + if not mod_channel: + return + + # If this is a misinformation‐type report, run the danger‐level flow + if initial_step == 'danger_level': + # Update the step + self.active_mod_flow['step'] = 'confirm_danger_level' + + # Let LLM guess LOW/MEDIUM/HIGH, passing along user_context + predicted = await self.classify_danger_level( + report_content, + user_context + ) + self.active_mod_flow['context']['predicted_danger'] = predicted + + # Build "new report" message and include user_context if provided + base_msg = ( + f"A new report has been submitted:\n" + f"Type: {report_type}\n" + f"Content: {report_content}\n" + f"Reported user: {message_author}\n" + ) + if user_context: + base_msg += f"User context: {user_context}\n" + + base_msg += ( + f"\nSystem suggests danger level: {predicted.upper()}. Do you agree?\n" + "1. Yes\n" + "2. No" + ) + await mod_channel.send(base_msg) + return + + # Otherwise, handle the other two cases + if initial_step == 'advertising_done': + await mod_channel.send( + "Report sent to advertising team. No further action required." + ) + self.active_mod_flow = None + return + + if initial_step == 'default_done': + # Just show the report, do not prompt for reply + await mod_channel.send( + f"A new report has been submitted:\n" + f"Type: {report_type}\n" + f"Content: {report_content}\n" + f"Reported user: {message_author}" + ) + self.active_mod_flow = None + return + await self.prompt_next_moderation_step(mod_channel) + + async def notify_reported_user(self, user_name, guild, outcome, explanation=None, original_message=None): + """Notify the user about the outcome and provide an appeal option.""" + user = discord.utils.get(guild.members, name=user_name) + if user: + try: + msg = ( + f"Your message was reviewed by moderators. Outcome: {outcome}.\n\n" + f"Original Message:\n{original_message}\n\n" + f"Reason: {explanation}\n\n" + "If you believe this was a mistake, you may reply to this message to appeal. " + "Would you like to appeal this decision?\n1. Yes\n2. No" + ) + await user.send(msg) + + # Track pending appeal + if user.id not in self.pending_appeals: + self.pending_appeals[user.id] = [] + self.pending_appeals[user.id].append({ + 'guild_id': guild.id, + 'reported_name': user_name, + 'outcome': outcome, + 'original_message': original_message, + 'explanation': explanation + }) + + # Initialize appeal confirmation state + if not hasattr(self, 'awaiting_appeal_confirmation'): + self.awaiting_appeal_confirmation = {} + self.awaiting_appeal_confirmation[user.id] = True + + except Exception as e: + print(f"Failed to DM user {user_name}: {e}") + + async def notify_user_of_appeal_option(self, user_name, guild, explanation): + """Notify the user about the appeal process after their post is removed.""" + user = discord.utils.get(guild.members, name=user_name) + if user: + try: + msg = f"Your post was removed for the following reason: {explanation}.\n" + msg += "If you believe this was a mistake, you can appeal by replying with your reason." + await user.send(msg) + except Exception as e: + print(f"Failed to notify user {user_name}: {e}") + + async def handle_mod_channel_message(self, message): + if not self.active_mod_flow: return + step = self.active_mod_flow['step'] + content = message.content.strip().lower() + mod_channel = message.channel + guild = mod_channel.guild if hasattr(mod_channel, 'guild') else None - # Forward the message to the mod channel - mod_channel = self.mod_channels[message.guild.id] - await mod_channel.send(f'Forwarded message:\n{message.author.name}: "{message.content}"') - scores = self.eval_text(message.content) - await mod_channel.send(self.code_format(scores)) + if step == 'appeal_review': + # Pull the info dict that was stashed earlier + info = self.active_mod_flow.get('info', {}) + reported_name = info.get('reported_name') - - def eval_text(self, message): - '''' - TODO: Once you know how you want to evaluate messages in your channel, - insert your code here! This will primarily be used in Milestone 3. - ''' - return message + # Look up the User object in this guild + reported_user = discord.utils.get(guild.members, name=reported_name) + user_id = reported_user.id if reported_user else None + + # 1) Pop this appeal out of the queue + if user_id in self.pending_appeals: + self.pending_appeals[user_id].pop(0) + if not self.pending_appeals[user_id]: + del self.pending_appeals[user_id] + + # 2) Send the DM back to the user with the moderator's decision + if content == '1': # ACCEPT + await mod_channel.send("The appeal has been accepted. The original decision has been overturned.") + if reported_user: + await reported_user.send( + "Your appeal has been accepted. The original decision has been overturned." + ) + + elif content == '2': # UPHOLD + await mod_channel.send("The appeal has been reviewed and the original decision is upheld.") + if reported_user: + await reported_user.send( + "Your appeal has been reviewed, and the original decision is upheld." + ) + + else: + await mod_channel.send("Invalid response. Please respond with:\n1. ACCEPT\n2. UPHOLD") + return + + # Clear this flow + self.active_mod_flow = None + + # 3) If that user still has more pending appeals, prompt them again + if user_id in self.pending_appeals and self.pending_appeals[user_id]: + next_info = self.pending_appeals[user_id][0] + try: + prompt_text = ( + f"Your message was reviewed by moderators. Outcome: {next_info['outcome']}.\n\n" + f"Original Message:\n{next_info['original_message']}\n\n" + ) + if next_info.get('explanation'): + prompt_text += f"Reason: {next_info['explanation']}\n\n" + prompt_text += ( + "If you believe this was a mistake, you may reply to this message to appeal. " + "Would you like to appeal this decision?\n1. Yes\n2. No" + ) + await reported_user.send(prompt_text) + self.awaiting_appeal_confirmation[user_id] = True + except Exception: + pass + return + + ctx = self.active_mod_flow.get('context', {}) + report_type = self.active_mod_flow['report_type'] + report_content = self.active_mod_flow['report_content'] + reported_user_name = self.active_mod_flow['message_author'] + + if step == 'confirm_danger_level': + if content == '1': # Moderator agrees with LLM + predicted = ctx.get('predicted_danger', 'medium') + ctx['danger_level'] = predicted + + # Now ask LLM to recommend a post‐action + post_action = await self.classify_post_action( + report_content, + predicted, + self.active_mod_flow.get('user_context') + ) + ctx['predicted_post_action'] = post_action # e.g. "remove", etc. + + label_map = { + "do_not_recommend": "DO NOT RECOMMEND", + "flag_as_unproven": "FLAG AS UNPROVEN", + "remove": "REMOVE", + "raise": "RAISE", + "report_to_authorities": "REPORT TO AUTHORITIES" + } + action_label = label_map.get(post_action, None) + + if action_label: + await mod_channel.send( + f"System suggests post action: {action_label}. Do you agree?\n" + "1. Yes\n" + "2. No" + ) + self.active_mod_flow['step'] = 'confirm_post_action' + return + else: + # If LLM failed to return a valid post‐action, fall back to manual + if predicted == 'low': + await mod_channel.send( + "Predicted LOW danger. After claim is investigated, what action should be taken on post?\n" + "1. DO NOT RECOMMEND\n" + "2. FLAG AS UNPROVEN" + ) + self.active_mod_flow['step'] = 'low_action_on_post' + return + else: + await mod_channel.send( + f"Predicted {predicted.upper()} danger. After claim is investigated, what action should be taken on post?\n" + "1. REMOVE\n" + "2. RAISE\n" + "3. REPORT TO AUTHORITIES" + ) + self.active_mod_flow['step'] = ('medium_action_on_post' + if predicted == 'medium' else 'high_action_on_post') + return + + if content == '2': # Moderator disagrees with LLM's danger‐level + await mod_channel.send( + "What is the level of danger for this report?\n" + "1. LOW\n" + "2. MEDIUM\n" + "3. HIGH" + ) + self.active_mod_flow['step'] = 'danger_level_manual' + return + + await mod_channel.send("Invalid response. Please reply with:\n1. Yes\n2. No") + return + + if step == 'danger_level_manual': + if content not in ['1','2','3']: + await mod_channel.send("Invalid option. Please choose:\n1. LOW\n2. MEDIUM\n3. HIGH") + return + levels = {'1':'low','2':'medium','3':'high'} + chosen = levels[content] + ctx['danger_level'] = chosen + + # Ask LLM to recommend a post‐action given the manually chosen danger level: + predicted_action = await self.classify_post_action( + report_content, + chosen, + self.active_mod_flow.get('user_context') + ) + ctx['predicted_post_action'] = predicted_action + + label_map = { + "do_not_recommend": "DO NOT RECOMMEND", + "flag_as_unproven": "FLAG AS UNPROVEN", + "remove": "REMOVE", + "raise": "RAISE", + "report_to_authorities": "REPORT TO AUTHORITIES" + } + action_label = label_map.get(predicted_action, None) + + if action_label: + await mod_channel.send( + f"System suggests post action: {action_label}. Do you agree?\n" + "1. Yes\n" + "2. No" + ) + self.active_mod_flow['step'] = 'confirm_post_action' + else: + # Fallback if LLM failed to return a valid post‐action: + if chosen == 'low': + await mod_channel.send( + "Predicted LOW danger. After claim is investigated, what action should be taken on post?\n" + "1. DO NOT RECOMMEND\n" + "2. FLAG AS UNPROVEN" + ) + self.active_mod_flow['step'] = 'low_action_on_post' + else: + await mod_channel.send( + f"Predicted {chosen.upper()} danger. After claim is investigated, what action should be taken on post?\n" + "1. REMOVE\n" + "2. RAISE\n" + "3. REPORT TO AUTHORITIES" + ) + self.active_mod_flow['step'] = ( + 'medium_action_on_post' if chosen == 'medium' else 'high_action_on_post' + ) + return + + if step == 'confirm_post_action': + if content == '1': # Mod agrees with LLM's post‐action + post_action = ctx.get('predicted_post_action') + danger = ctx.get('danger_level') + # Retrieve the reported User object + reported_user = discord.utils.get(guild.members, name=reported_user_name) + + # LOW‐danger branches + if danger == 'low': + if post_action == 'do_not_recommend': + await mod_channel.send( + "Post will not be recommended. Action recorded. " + "(Update algorithm so post is not recommended.)" + ) + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post not recommended" + ) + await self.notify_reported_user( + reported_user_name, guild, + outcome="Post not recommended." + ) + self.active_mod_flow = None + return + + elif post_action == 'flag_as_unproven': + await mod_channel.send( + "System suggests FLAG AS UNPROVEN. " + "Please add explanation for why post is being flagged." + ) + self.active_mod_flow['step'] = 'flag_explanation' + return + + # MEDIUM/HIGH‐danger branches + else: + if post_action == 'remove': + await mod_channel.send( + "System suggests REMOVE. Please add explanation for why post is being removed." + ) + self.active_mod_flow['step'] = 'remove_explanation' + return + + elif post_action == 'raise': + await mod_channel.send( + "System suggests RAISE to higher level moderator. " + "Report sent to higher level moderators." + ) + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Report raised to higher level moderator" + ) + self.active_mod_flow = None + return + + elif post_action == 'report_to_authorities': + await mod_channel.send( + "System suggests REPORT TO AUTHORITIES. Report sent to authorities." + ) + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Reported to authorities" + ) + self.active_mod_flow = None + return + + # Fallback if LLM recommendation is invalid + await mod_channel.send("Could not interpret recommended post action. Please choose manually.") + danger = ctx.get('danger_level') + if danger == 'low': + await mod_channel.send( + "After claim is investigated, what action should be taken on post?\n" + "1. DO NOT RECOMMEND\n" + "2. FLAG AS UNPROVEN" + ) + self.active_mod_flow['step'] = 'low_action_on_post' + else: + await mod_channel.send( + "After claim is investigated, what action should be taken on post?\n" + "1. REMOVE\n" + "2. RAISE\n" + "3. REPORT TO AUTHORITIES" + ) + self.active_mod_flow['step'] = ( + 'medium_action_on_post' if danger == 'medium' else 'high_action_on_post' + ) + return + + if content == '2': # Mod overrides–go manual + danger = ctx.get('danger_level') + if danger == 'low': + await mod_channel.send( + "What action should be taken on post?\n" + "1. DO NOT RECOMMEND\n" + "2. FLAG AS UNPROVEN" + ) + self.active_mod_flow['step'] = 'low_action_on_post' + else: + await mod_channel.send( + "What action should be taken on post?\n" + "1. REMOVE\n" + "2. RAISE\n" + "3. REPORT TO AUTHORITIES" + ) + self.active_mod_flow['step'] = ( + 'medium_action_on_post' if danger == 'medium' else 'high_action_on_post' + ) + return + + await mod_channel.send("Invalid response. Please reply with:\n1. Yes\n2. No") + return + + if step == 'confirm_user_action': + if content == '1': # Mod agrees with LLM's user‐action + user_action = ctx.get('predicted_user_action') + reported_user = discord.utils.get(guild.members, name=reported_user_name) + + if user_action == 'record_incident': + await mod_channel.send("Incident recorded for internal use. (Add to internal incident count for user.)") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post removed and incident recorded", + ctx.get('remove_explanation', '') + ) + self.active_mod_flow = None + return + + elif user_action == 'temporarily_mute': + await mod_channel.send("User will be muted for 24 hours.") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post removed and user temporarily muted", + ctx.get('remove_explanation', '') + ) + await self.notify_reported_user( + reported_user_name, + guild, + outcome="You have been temporarily muted.", + explanation="You violated the community guidelines.", + original_message=report_content + ) + self.active_mod_flow = None + return + + elif user_action == 'remove_user': + await mod_channel.send("User will be removed.") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post removed and user removed", + ctx.get('remove_explanation', '') + ) + await self.notify_reported_user( + reported_user_name, + guild, + outcome="You have been removed from the server.", + explanation="You violated the community guidelines.", + original_message=report_content + ) + # Track for appeal if removed + user_obj = reported_user + if user_obj: + if user_obj.id not in self.pending_appeals: + self.pending_appeals[user_obj.id] = [] + self.pending_appeals[user_obj.id].append({ + 'guild_id': guild.id, + 'reported_name': reported_user_name, + 'outcome': "You have been removed from the server.", + 'original_message': report_content, + 'explanation': "You violated the community guidelines." + }) + self.active_mod_flow = None + return + + # Fallback to manual if LLM output was unexpected + await mod_channel.send( + "Could not interpret recommended user action. Please choose manually:\n" + "1. RECORD INCIDENT\n" + "2. TEMPORARILY MUTE\n" + "3. REMOVE USER" + ) + self.active_mod_flow['step'] = 'action_on_user' + return + + if content == '2': # Mod overrides → manual user‐action + await mod_channel.send( + "What action should be taken on the creator of the post?\n" + "1. RECORD INCIDENT\n" + "2. TEMPORARILY MUTE\n" + "3. REMOVE USER" + ) + self.active_mod_flow['step'] = 'action_on_user' + return + + await mod_channel.send("Invalid response. Please reply with:\n1. Yes\n2. No") + return + + ctx = self.active_mod_flow['context'] + report_type = self.active_mod_flow['report_type'] + report_content = self.active_mod_flow['report_content'] + reported_user_name = self.active_mod_flow['message_author'] + + # Get the user ID from the reported user's name + reported_user = discord.utils.get(guild.members, name=reported_user_name) + if not reported_user: + await mod_channel.send(f"Could not find user {reported_user_name}. Please verify the username is correct.") + return + + # Misinformation moderation flow + if step == 'advertising_done': + # Already handled + self.active_mod_flow = None + return + if step == 'low_action_on_post': + if content not in ['1', '2']: + await mod_channel.send("Invalid option. Please choose:\n1. DO NOT RECOMMEND\n2. FLAG AS UNPROVEN") + return + if content == '1': # DO NOT RECOMMEND + await mod_channel.send("Post will not be recommended. Action recorded. (Update algorithm so post is not recommended.)") + await self.notify_reported_user(reported_user_name, guild, outcome="Post not recommended.") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post not recommended" + ) + self.active_mod_flow = None + return + elif content == '2': # FLAG AS UNPROVEN + await mod_channel.send("Post will be flagged as unproven/non-scientific. Please add explanation for why post is being flagged.") + self.active_mod_flow['step'] = 'flag_explanation' + return + if step == 'flag_explanation': + await mod_channel.send(f"Explanation recorded: {message.content}\nFlagged post as not proven.") + await self.notify_reported_user(reported_user_name, guild, outcome="Post flagged as unproven/non-scientific.", explanation=message.content) + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post flagged as unproven/non-scientific", + message.content + ) + self.active_mod_flow = None + return + if step == 'medium_action_on_post' or step == 'high_action_on_post': + if content not in ['1', '2', '3']: + await mod_channel.send("Invalid option. Please choose:\n1. REMOVE\n2. RAISE\n3. REPORT TO AUTHORITIES") + return + if content == '1': # REMOVE + await mod_channel.send("Post will be removed. Please add explanation for why post is being removed.") + self.active_mod_flow['step'] = 'remove_explanation' + return + elif content == '2': # RAISE + await mod_channel.send("Raising to higher level moderator. Report sent to higher level moderators.") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Report raised to higher level moderator" + ) + self.active_mod_flow = None + return + elif content == '3': # REPORT TO AUTHORITIES + await mod_channel.send("Reporting to authorities. Report sent to authorities.") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Reported to authorities" + ) + self.active_mod_flow = None + return + if step == 'remove_explanation': + explanation = message.content + ctx['remove_explanation'] = explanation + + # Notify user that their post was removed + await self.notify_reported_user( + reported_user_name, + guild, + outcome="Post removed.", + explanation=explanation, + original_message=report_content + ) + + # 1) Let LLM recommend a user‐action now that post is removed + recommended = await self.classify_user_action( + report_content, + ctx.get('danger_level', 'medium'), + 'remove', + self.active_mod_flow.get('user_context') + ) + ctx['predicted_user_action'] = recommended + + label_map = { + "record_incident": "RECORD INCIDENT", + "temporarily_mute": "TEMPORARILY MUTE", + "remove_user": "REMOVE USER" + } + action_label = label_map.get(recommended, None) + + if action_label: + await mod_channel.send( + f"System suggests user action: {action_label}. Do you agree?\n" + "1. Yes\n" + "2. No" + ) + self.active_mod_flow['step'] = 'confirm_user_action' + return + else: + # If LLM failed, fall back to manual: + await mod_channel.send( + "What action should be taken on the creator of the post?\n" + "1. RECORD INCIDENT\n" + "2. TEMPORARILY MUTE\n" + "3. REMOVE USER" + ) + self.active_mod_flow['step'] = 'action_on_user' + return + if step == 'action_on_user': + if content not in ['1', '2', '3']: + await mod_channel.send("Invalid option. Please choose:\n1. RECORD INCIDENT\n2. TEMPORARILY MUTE\n3. REMOVE USER") + return + if content == '1': # RECORD INCIDENT + await mod_channel.send("Incident recorded for internal use. (Add to internal incident count for user.)") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post removed and incident recorded", + ctx.get('remove_explanation', '') + ) + self.active_mod_flow = None + return + elif content == '2': # TEMPORARILY MUTE + await mod_channel.send("User will be muted for 24 hours.") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post removed and user temporarily muted", + ctx.get('remove_explanation', '') + ) + await self.notify_reported_user( + reported_user_name, + guild, + outcome="You have been temporarily muted.", + explanation="You violated the community guidelines.", + original_message=report_content + ) + self.active_mod_flow = None + return + elif content == '3': # REMOVE USER + await mod_channel.send("User will be removed.") + self.user_stats.add_report( + reported_user.id, + report_type, + report_content, + "Post removed and user removed", + ctx.get('remove_explanation', '') + ) + await self.notify_reported_user( + reported_user_name, + guild, + outcome="You have been removed from the server.", + explanation="You violated the community guidelines.", + original_message=report_content + ) + user = discord.utils.get(guild.members, name=reported_user_name) + if user: + # Track for incoming DM in pending_appeals + if user.id not in self.pending_appeals: + self.pending_appeals[user.id] = [] + self.pending_appeals[user.id].append({ + 'guild_id': guild.id, + 'reported_name': reported_user_name, + 'outcome': "You have been removed from the server.", + 'original_message': report_content, + 'explanation': "You violated the community guidelines." + }) + self.active_mod_flow = None + return - def code_format(self, text): - '''' - TODO: Once you know how you want to show that a message has been - evaluated, insert your code here for formatting the string to be - shown in the mod channel. - ''' - return "Evaluated: '" + text+ "'" + async def classify_abuse_type(self, message_content, user_context=None): + system_prompt = ( + "You are a content moderation assistant. Your job is to classify messages into one of the following top-level abuse types: " + "BULLYING, SUICIDE/SELF-HARM, SEXUALLY EXPLICIT/NUDITY, MISINFORMATION, HATE SPEECH, or DANGER.\n\n" + "If the abuse type is MISINFORMATION, you must specify the misinformation category as:\n" + "- HEALTH (with one of these subcategories: EMERGENCY, MEDICAL RESEARCH, REPRODUCTIVE HEALTH, TREATMENTS, ALTERNATIVE MEDICINE)\n" + "- ADVERTISEMENT\n" + "- NEWS (with one of these subcategories: HISTORICAL, POLITICAL, SCIENTIFIC)\n\n" + "Respond in this format exactly:\n" + "- For general types: `BULLYING`, `HATE SPEECH`, etc.\n" + "- For misinformation types: `HEALTH (EMERGENCY) MISINFORMATION`, `NEWS (POLITICAL) MISINFORMATION`, `ADVERTISEMENT MISINFORMATION`, etc.\n" + "- If the message does not fit any of these categories, respond with: `UNKNOWN`\n\n" + "Only return the final category label." + "Here is additional information about what is classified as misinformation to aid in your decision, however note that this is not complete and may miss some cases:\n" + "News and Current Events: False representations of ongoing news events. Misrepresenting dangerous natural events.\n" + "Political Content: False information about voting processes. Content supporting voter suppression. Illegitimate questioning of candidate eligibility. Inaccurate political quotes or statements.Encouraging interference with democratic processes.\n" + "Health Information: Dangerous, unproven medical treatments. Content encouraging dangerous health activities. Alternative medicine that has potentially dangerous effects. False medical research." + + ) + if user_context: + user_prompt = f"User context: {user_context}\nMessage: {message_content}\n\nClassify the abuse type:" + else: + user_prompt = f"Message: {message_content}\n\nClassify the abuse type:" + try: + response = self.openai_client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + ) + abuse_type = response.choices[0].message.content.strip().upper() + return abuse_type + except Exception as e: + print(f"Error classifying abuse type: {e}") + return "UNKNOWN" + + def normalize_abuse_type(self, label): + label = label.upper() + if "MISINFORMATION" in label: + # Handle misinformation categories + if "HEALTH" in label: + subcategory = re.search(r"\((.*?)\)", label) + if subcategory: + return f"HEALTH MISINFO - {subcategory.group(1).upper()}" + return "HEALTH MISINFO" + if "ADVERTISEMENT" in label: + return "ADVERTISING MISINFO" + if "NEWS" in label: + subcategory = re.search(r"\((.*?)\)", label) + if subcategory: + return f"NEWS MISINFO - {subcategory.group(1).upper()}" + return "NEWS MISINFO" + # Handle general abuse types + valid_labels = { + "BULLYING": "BULLYING", + "SUICIDE/SELF-HARM": "SUICIDE/SELF-HARM", + "SEXUALLY EXPLICIT/NUDITY": "SEXUALLY EXPLICIT/NUDITY", + "HATE SPEECH": "HATE SPEECH", + "DANGER": "DANGER" + } + return valid_labels.get(label, None) + + async def classify_danger_level(self, report_content, user_context=None): + system_prompt = ( + "You are a content moderation assistant. " + "Given a single user post, classify how dangerous its misinformation is to real-world safety. " + "Choose exactly one of: LOW, MEDIUM, or HIGH." + ) + if user_context: + user_prompt = f"User context: {user_context}\nPost: {report_content}\n\nClassify danger level:" + else: + user_prompt = f"Post: {report_content}\n\nClassify danger level:" + try: + response = self.openai_client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + ) + lvl = response.choices[0].message.content.strip().upper() + if lvl in ("LOW", "MEDIUM", "HIGH"): + return lvl.lower() + return "medium" + except Exception: + return "medium" + + async def classify_post_action(self, report_content, danger_level, user_context=None): + system_prompt = ( + "You are a content moderation assistant. " + "Given a user post and its danger level (LOW, MEDIUM, or HIGH), " + "recommend exactly one action to take on the post.\n" + "- If danger is LOW, choose ONE of: DO NOT RECOMMEND or FLAG AS UNPROVEN.\n" + "- If danger is MEDIUM or HIGH, choose ONE of: REMOVE, RAISE, or REPORT TO AUTHORITIES.\n" + "Respond with exactly one of those labels." + ) + if user_context: + user_prompt = ( + f"User context: {user_context}\n" + f"Post: {report_content}\n" + f"Danger level: {danger_level.upper()}\n\n" + "Recommended post action:" + ) + else: + user_prompt = ( + f"Post: {report_content}\n" + f"Danger level: {danger_level.upper()}\n\n" + "Recommended post action:" + ) + try: + response = self.openai_client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + ) + action = response.choices[0].message.content.strip().upper() + mapping = { + "DO NOT RECOMMEND": "do_not_recommend", + "FLAG AS UNPROVEN": "flag_as_unproven", + "REMOVE": "remove", + "RAISE": "raise", + "REPORT TO AUTHORITIES": "report_to_authorities" + } + return mapping.get(action, None) + except Exception: + return None + + async def classify_user_action(self, report_content, danger_level, post_action, user_context=None): + if post_action != "remove": + return None + + system_prompt = ( + "You are a content moderation assistant. A post has been determined to be REMOVED. " + "Recommend exactly one follow‐up action on the user:\n" + "- RECORD INCIDENT\n" + "- TEMPORARILY MUTE\n" + "- REMOVE USER\n" + "Respond with exactly one label." + ) + if user_context: + user_prompt = ( + f"User context: {user_context}\n" + f"Post: {report_content}\n" + f"Danger level: {danger_level.upper()}\n\n" + "Recommended user action:" + ) + else: + user_prompt = ( + f"Post: {report_content}\n" + f"Danger level: {danger_level.upper()}\n\n" + "Recommended user action:" + ) + try: + response = self.openai_client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + ) + action = response.choices[0].message.content.strip().upper() + mapping = { + "RECORD INCIDENT": "record_incident", + "TEMPORARILY MUTE": "temporarily_mute", + "REMOVE USER": "remove_user" + } + return mapping.get(action, None) + except Exception: + return None + + async def prompt_next_moderation_step(self, mod_channel): + await mod_channel.send("Moderator, please review the report and respond with your decision.") + + async def detect_misinformation(self, message_content): + """ + Uses OpenAI API to detect if a message contains misinformation. + Returns 1 if misinformation is detected, 0 otherwise. + Prints classification outcome and timing. + """ + import time + start_time = time.time() + + system_prompt = ( + "You are a content moderation assistant specialized in detecting misinformation. " + "Your task is to analyze if a message contains misinformation in any of these categories:\n" + "- Health misinformation (emergency, medical research, reproductive health, treatments, alternative medicine)\n" + "- News misinformation (historical, political, scientific)\n" + "- Advertising misinformation\n\n" + "Respond with ONLY a single digit:\n" + "1 - if the message contains misinformation\n" + "0 - if the message does not contain misinformation\n\n" + "Do not provide any explanation, just the number." + "Here is additional information about what is classified as misinformation to aid in your decision, however note that this is not complete and may miss some cases\n" + "News and Current Events: False representations of ongoing news events. Misrepresenting dangerous natural events.\n" + "Political Content: False information about voting processes. Content supporting voter suppression. Illegitimate questioning of candidate eligibility. Inaccurate political quotes or statements.Encouraging interference with democratic processes.\n" + "Health Information: Dangerous, unproven medical treatments. Content encouraging dangerous health activities. Alternative medicine that has potentially dangerous effects. False medical research." + ) + + try: + response = self.openai_client.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": message_content} + ] + ) + result = response.choices[0].message.content.strip() + prediction = int(result) if result in ['0', '1'] else 0 + end_time = time.time() + processing_time = end_time - start_time + + # Print classification outcome + print(f"\nMessage Classification:") + print(f"Message: {message_content}") + print(f"Prediction: {prediction}") + print(f"Processing time: {processing_time:.2f} seconds") + print("-" * 50) + return prediction + except Exception as e: + end_time = time.time() + processing_time = end_time - start_time + + # Print error + print(f"\nClassification Error:") + print(f"Message: {message_content}") + print(f"Error: {str(e)}") + print(f"Processing time: {processing_time:.2f} seconds") + print("-" * 50) + + return 0 client = ModBot() client.run(discord_token) \ No newline at end of file diff --git a/DiscordBot/classifier/misinfo_classifier.py b/DiscordBot/classifier/misinfo_classifier.py new file mode 100644 index 00000000..45c6f15e --- /dev/null +++ b/DiscordBot/classifier/misinfo_classifier.py @@ -0,0 +1,161 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import classification_report, accuracy_score +from sklearn.pipeline import Pipeline +import nltk +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize +import re +import joblib +import os + +# Download required NLTK data +nltk.download('punkt') +nltk.download('stopwords') + +def preprocess_text(text): + """Preprocess text by removing special characters, converting to lowercase, and removing stopwords.""" + if isinstance(text, str): + # Convert to lowercase + text = text.lower() + # Remove special characters and digits + text = re.sub(r'[^a-zA-Z\s]', '', text) + # Tokenize + tokens = word_tokenize(text) + # Remove stopwords + stop_words = set(stopwords.words('english')) + tokens = [token for token in tokens if token not in stop_words] + return ' '.join(tokens) + return '' + +def load_and_preprocess_data(): + """Load and preprocess the dataset.""" + # Load the datasets + fake_df = pd.read_csv('DataSet_Misinfo_FAKE.csv') + true_df = pd.read_csv('DataSet_Misinfo_TRUE.csv') + + # Rename columns for consistency + fake_df.columns = ['index', 'text'] + true_df.columns = ['index', 'text'] + + # Add labels + fake_df['label'] = 1 # 1 for fake/misinformation + true_df['label'] = 0 # 0 for true + + # Combine datasets + df = pd.concat([fake_df, true_df], ignore_index=True) + + # Preprocess text + df['processed_text'] = df['text'].apply(preprocess_text) + + return df + +def train_classifier(save_model=True): + """Train and evaluate the misinformation classifier with cross-validation and hyperparameter tuning.""" + # Load and preprocess data + df = load_and_preprocess_data() + + # Split data into training and testing sets + X_train, X_test, y_train, y_test = train_test_split( + df['processed_text'], + df['label'], + test_size=0.2, + random_state=42 + ) + + # Create a pipeline with TF-IDF vectorizer and classifier + pipeline = Pipeline([ + ('tfidf', TfidfVectorizer()), + ('clf', LogisticRegression()) + ]) + + # Define hyperparameter grid + param_grid = { + 'tfidf__max_features': [3000, 5000, 7000], + 'tfidf__ngram_range': [(1, 1), (1, 2)], + 'clf__C': [0.1, 1.0, 10.0], + 'clf__max_iter': [1000] + } + + # Perform grid search with cross-validation + grid_search = GridSearchCV( + pipeline, + param_grid, + cv=5, # 5-fold cross-validation + scoring='accuracy', + n_jobs=-1 # Use all available CPU cores + ) + + print("Performing grid search with cross-validation...") + grid_search.fit(X_train, y_train) + + # Get best parameters and score + print("\nBest parameters:", grid_search.best_params_) + print("Best cross-validation score:", grid_search.best_score_) + + # Evaluate on test set + best_model = grid_search.best_estimator_ + y_pred = best_model.predict(X_test) + + print("\nTest Set Classification Report:") + print(classification_report(y_test, y_pred)) + print("\nTest Set Accuracy:", accuracy_score(y_test, y_pred)) + + # Perform additional cross-validation on the best model + cv_scores = cross_val_score(best_model, X_train, y_train, cv=5) + print("\nCross-validation scores:", cv_scores) + print("Mean CV score:", cv_scores.mean()) + print("CV score std:", cv_scores.std()) + + if save_model: + # Create models directory if it doesn't exist + os.makedirs('models', exist_ok=True) + + # Save the best model and vectorizer + model_path = 'models/misinfo_classifier.joblib' + joblib.dump(best_model, model_path) + print(f"\nModel saved to {model_path}") + + return best_model + +def load_model(model_path='classifier/models/misinfo_classifier.joblib'): + """Load a trained model from file.""" + if not os.path.exists(model_path): + raise FileNotFoundError(f"Model file not found at {model_path}") + return joblib.load(model_path) + +def predict_misinformation(text, model): + """Predict if a given text is misinformation or not.""" + # Make prediction using the loaded model + prediction = model.predict([text])[0] + probability = model.predict_proba([text])[0] + + return { + 'is_misinformation': bool(prediction), + 'confidence': float(probability[prediction]), + 'true_probability': float(probability[0]), + 'fake_probability': float(probability[1]) + } + +if __name__ == "__main__": + # # Train the classifier + # print("Training classifier...") + # model = train_classifier(save_model=True) + + # # Example usage with loaded model + # test_text = "This is an example text to test the classifier." + # result = predict_misinformation(test_text, model) + + # print("\nExample prediction:") + # print(f"Text: {test_text}") + # print(f"Is misinformation: {result['is_misinformation']}") + # print(f"Confidence: {result['confidence']:.2f}") + # print(f"True probability: {result['true_probability']:.2f}") + # print(f"Fake probability: {result['fake_probability']:.2f}") + + model = load_model() + prediction = predict_misinformation("Russia did not interfere in the 2016 presidential election", model) + print(prediction) \ No newline at end of file diff --git a/DiscordBot/classifier/requirements.txt b/DiscordBot/classifier/requirements.txt new file mode 100644 index 00000000..7c2754b0 --- /dev/null +++ b/DiscordBot/classifier/requirements.txt @@ -0,0 +1,4 @@ +pandas>=1.3.0 +numpy>=1.21.0 +scikit-learn>=0.24.2 +nltk>=3.6.0 \ No newline at end of file diff --git a/DiscordBot/report.py b/DiscordBot/report.py index d2bba994..d04568d0 100644 --- a/DiscordBot/report.py +++ b/DiscordBot/report.py @@ -6,7 +6,41 @@ class State(Enum): REPORT_START = auto() AWAITING_MESSAGE = auto() MESSAGE_IDENTIFIED = auto() + AWAITING_ABUSE_TYPE = auto() + AWAITING_MISINFO_CATEGORY = auto() + AWAITING_HEALTH_CATEGORY = auto() + AWAITING_NEWS_CATEGORY = auto() REPORT_COMPLETE = auto() + AWAITING_APPEAL = auto() + APPEAL_REVIEW = auto() + AWAITING_USER_CONFIRMATION = auto() + AWAITING_CONTEXT_CONFIRMATION = auto() + AWAITING_CONTEXT_TEXT = auto() + +class AbuseType(Enum): + BULLYING = "bullying" + SUICIDE = "suicide/self-harm" + EXPLICIT = "sexually explicit/nudity" + MISINFORMATION = "misinformation" + HATE = "hate speech" + DANGER = "danger" + +class MisinfoCategory(Enum): + HEALTH = "health" + ADVERTISEMENT = "advertisement" + NEWS = "news" + +class HealthCategory(Enum): + EMERGENCY = "emergency" + MEDICAL_RESEARCH = "medical research" + REPRODUCTIVE = "reproductive healthcare" + TREATMENTS = "treatments" + ALTERNATIVE = "alternative medicine" + +class NewsCategory(Enum): + HISTORICAL = "historical" + POLITICAL = "political" + SCIENCE = "science" class Report: START_KEYWORD = "report" @@ -17,28 +51,25 @@ def __init__(self, client): self.state = State.REPORT_START self.client = client self.message = None - - async def handle_message(self, message): - ''' - This function makes up the meat of the user-side reporting flow. It defines how we transition between states and what - prompts to offer at each of those states. You're welcome to change anything you want; this skeleton is just here to - get you started and give you a model for working with Discord. - ''' + self.abuse_type = None + self.misinfo_category = None + self.specific_category = None + self.user_context = None - if message.content == self.CANCEL_KEYWORD: + async def handle_message(self, message): + if message.content.lower() == self.CANCEL_KEYWORD: self.state = State.REPORT_COMPLETE return ["Report cancelled."] - + if self.state == State.REPORT_START: - reply = "Thank you for starting the reporting process. " + reply = "Thank you for starting the reporting process. " reply += "Say `help` at any time for more information.\n\n" reply += "Please copy paste the link to the message you want to report.\n" reply += "You can obtain this link by right-clicking the message and clicking `Copy Message Link`." self.state = State.AWAITING_MESSAGE return [reply] - + if self.state == State.AWAITING_MESSAGE: - # Parse out the three ID strings from the message link m = re.search('/(\d+)/(\d+)/(\d+)', message.content) if not m: return ["I'm sorry, I couldn't read that link. Please try again or say `cancel` to cancel."] @@ -49,24 +80,214 @@ async def handle_message(self, message): if not channel: return ["It seems this channel was deleted or never existed. Please try again or say `cancel` to cancel."] try: - message = await channel.fetch_message(int(m.group(3))) + self.message = await channel.fetch_message(int(m.group(3))) except discord.errors.NotFound: return ["It seems this message was deleted or never existed. Please try again or say `cancel` to cancel."] - # Here we've found the message - it's up to you to decide what to do next! - self.state = State.MESSAGE_IDENTIFIED - return ["I found this message:", "```" + message.author.name + ": " + message.content + "```", \ - "This is all I know how to do right now - it's up to you to build out the rest of my reporting flow!"] - - if self.state == State.MESSAGE_IDENTIFIED: - return [""] + abuse_type_raw = await self.client.classify_abuse_type(self.message.content) + self.abuse_type = self.client.normalize_abuse_type(abuse_type_raw) + if self.abuse_type: + self.state = State.AWAITING_USER_CONFIRMATION + return [ + f"I found this message:", + f"```{self.message.author.name}: {self.message.content}```", + f"The system classified this message as {self.abuse_type}.", + "Do you agree with this classification?\n1. Yes\n2. No" + ] + else: + # If the LLM cannot classify, fall back to manual abuse type selection + self.state = State.AWAITING_ABUSE_TYPE + reply = "What type of abuse would you like to report?\n" + reply += "1. BULLYING\n" + reply += "2. SUICIDE/SELF-HARM\n" + reply += "3. SEXUALLY EXPLICIT/NUDITY\n" + reply += "4. MISINFORMATION\n" + reply += "5. HATE SPEECH\n" + reply += "6. DANGER" + return [ + f"I found this message:", + f"```{self.message.author.name}: {self.message.content}```", + reply + ] - return [] + if self.state == State.AWAITING_USER_CONFIRMATION: + if message.content.strip() == '1': # User agrees with classification + self.state = State.AWAITING_CONTEXT_CONFIRMATION + # stash everything needed: + self.pending_report = { + 'report_type': self.abuse_type, + 'report_content': self.message.content, + 'message_author': self.message.author.name + } + return ["Do you want to add additional context for why you are reporting this message?\n1. Yes\n2. No"] - def report_complete(self): - return self.state == State.REPORT_COMPLETE - + elif message.content.strip() == '2': # User disagrees with classification + self.state = State.AWAITING_ABUSE_TYPE + reply = "What type of abuse would you like to report?\n" + reply += "1. BULLYING\n" + reply += "2. SUICIDE/SELF-HARM\n" + reply += "3. SEXUALLY EXPLICIT/NUDITY\n" + reply += "4. MISINFORMATION\n" + reply += "5. HATE SPEECH\n" + reply += "6. DANGER" + return [reply] + else: + return ["Invalid response. Please reply with 1 for Yes or 2 for No."] + + if self.state == State.AWAITING_ABUSE_TYPE: + abuse_type = message.content.strip() + abuse_types = { + '1': AbuseType.BULLYING, + '2': AbuseType.SUICIDE, + '3': AbuseType.EXPLICIT, + '4': AbuseType.MISINFORMATION, + '5': AbuseType.HATE, + '6': AbuseType.DANGER + } + + if abuse_type not in abuse_types: + return ["Please select a valid option (1-6) from the list above."] + + self.abuse_type = abuse_types[abuse_type] + + if self.abuse_type == AbuseType.MISINFORMATION: + self.state = State.AWAITING_MISINFO_CATEGORY + return ["Please select the misinformation category:\n1. HEALTH\n2. ADVERTISEMENT\n3. NEWS"] + else: + self.state = State.AWAITING_CONTEXT_CONFIRMATION + self.pending_report = { + 'report_type': self.abuse_type.value.upper(), + 'report_content': self.message.content, + 'message_author': self.message.author.name + } + return ["Do you want to add additional context for why you are reporting this message?\n1. Yes\n2. No"] + + if self.state == State.AWAITING_CONTEXT_CONFIRMATION: + if message.content.strip() == '1': # wants to add context + self.state = State.AWAITING_CONTEXT_TEXT + return ["Please enter additional context (why you are reporting):"] + elif message.content.strip() == '2': # no context + # call start_moderation_flow without context + data = self.pending_report + self.pending_report = None + self.state = State.REPORT_COMPLETE + await self.client.start_moderation_flow( + report_type=data['report_type'], + report_content=data['report_content'], + message_author=data['message_author'], + user_context=None + ) + return ["Thank you. Your report has been sent to the moderation team."] + else: + return ["Invalid choice. Reply with 1 (Yes) or 2 (No)."] + + if self.state == State.AWAITING_CONTEXT_TEXT: + ctx_text = message.content.strip() + data = self.pending_report + self.pending_report = None + self.user_context = ctx_text + self.state = State.REPORT_COMPLETE + await self.client.start_moderation_flow( + report_type=data['report_type'], + report_content=data['report_content'], + message_author=data['message_author'], + user_context=ctx_text + ) + return ["Thank you. Your report and context have been sent to the moderation team."] + if self.state == State.AWAITING_MISINFO_CATEGORY: + category = message.content.strip() + misinfo_categories = { + '1': MisinfoCategory.HEALTH, + '2': MisinfoCategory.ADVERTISEMENT, + '3': MisinfoCategory.NEWS + } - + if category not in misinfo_categories: + return ["Please select a valid option (1-3) from the list above."] + self.misinfo_category = misinfo_categories[category] + + if self.misinfo_category == MisinfoCategory.HEALTH: + self.state = State.AWAITING_HEALTH_CATEGORY + return ["Please specify the health misinformation category:\n1. EMERGENCY\n2. MEDICAL RESEARCH\n3. REPRODUCTIVE HEALTHCARE\n4. TREATMENTS\n5. ALTERNATIVE MEDICINE"] + elif self.misinfo_category == MisinfoCategory.NEWS: + self.state = State.AWAITING_NEWS_CATEGORY + return ["Please specify the news category:\n1. HISTORICAL\n2. POLITICAL\n3. SCIENCE"] + else: # Advertisement + self.state = State.REPORT_COMPLETE + mod_channel = self.client.mod_channels[self.message.guild.id] + await mod_channel.send(f"ADVERTISING MISINFO:\n{self.message.author.name}: {self.message.content}") + await self.client.start_moderation_flow( + report_type="ADVERTISING MISINFO", + report_content=self.message.content, + message_author=self.message.author.name + ) + return ["This has been reported to our ad team."] + + if self.state == State.AWAITING_HEALTH_CATEGORY: + health_cat = message.content.strip() + health_categories = { + '1': HealthCategory.EMERGENCY, + '2': HealthCategory.MEDICAL_RESEARCH, + '3': HealthCategory.REPRODUCTIVE, + '4': HealthCategory.TREATMENTS, + '5': HealthCategory.ALTERNATIVE + } + + if health_cat not in health_categories: + return ["Please select a valid option (1-5) from the list above."] + + self.specific_category = health_categories[health_cat] + self.state = State.REPORT_COMPLETE + mod_channel = self.client.mod_channels[self.message.guild.id] + await mod_channel.send(f"HEALTH MISINFO - {self.specific_category.value.upper()}:\n{self.message.author.name}: {self.message.content}") + await self.client.start_moderation_flow( + report_type=f"HEALTH MISINFO - {self.specific_category.value.upper()}", + report_content=self.message.content, + message_author=self.message.author.name + ) + return ["This has been sent to our moderation team."] + + if self.state == State.AWAITING_NEWS_CATEGORY: + news_cat = message.content.strip() + news_categories = { + '1': NewsCategory.HISTORICAL, + '2': NewsCategory.POLITICAL, + '3': NewsCategory.SCIENCE + } + + if news_cat not in news_categories: + return ["Please select a valid option (1-3) from the list above."] + + self.specific_category = news_categories[news_cat] + self.state = State.REPORT_COMPLETE + mod_channel = self.client.mod_channels[self.message.guild.id] + await mod_channel.send(f"NEWS MISINFO - {self.specific_category.value.upper()}:\n{self.message.author.name}: {self.message.content}") + await self.client.start_moderation_flow( + report_type=f"NEWS MISINFO - {self.specific_category.value.upper()}", + report_content=self.message.content, + message_author=self.message.author.name + ) + return ["This has been sent to our team."] + + return [] + + async def notify_reported_user(self, user_name, guild, outcome, explanation=None): + # Find the user object by name in the guild + user = discord.utils.get(guild.members, name=user_name) + if user: + try: + msg = f"Your message was reviewed by moderators. Outcome: {outcome}." + if explanation: + msg += f"\nReason: {explanation}" + msg += "\nIf you believe this was a mistake, you may reply to this message to appeal." + await user.send(msg) + if outcome == "Post removed.": + await self.notify_user_of_appeal_option(user_name, guild, explanation) + except Exception as e: + print(f"Failed to DM user {user_name}: {e}") + + def report_complete(self): + """Returns whether the current report is in a completed state""" + return self.state == State.REPORT_COMPLETE \ No newline at end of file diff --git a/DiscordBot/user_stats.json b/DiscordBot/user_stats.json new file mode 100644 index 00000000..d83a703e --- /dev/null +++ b/DiscordBot/user_stats.json @@ -0,0 +1,35 @@ +{ + "484531188581793803": { + "total_reports": 4, + "reports": [ + { + "timestamp": "2025-06-02T15:30:40.219246", + "report_type": "NEWS MISINFO - POLITICAL", + "report_content": "covid was created in a chinese lab", + "outcome": "Post removed and incident recorded", + "explanation": "bad" + }, + { + "timestamp": "2025-06-02T15:31:34.897102", + "report_type": "NEWS MISINFO - POLITICAL", + "report_content": "The recent election was rigged using a secret satellite controlled by a foreign government.", + "outcome": "Post removed and incident recorded", + "explanation": "f" + }, + { + "timestamp": "2025-06-02T15:31:50.237193", + "report_type": "NEWS MISINFO - SCIENTIFIC", + "report_content": "covid was created in a chinese lab", + "outcome": "Post removed and incident recorded", + "explanation": "f" + }, + { + "timestamp": "2025-06-02T15:32:04.112294", + "report_type": "NEWS MISINFO - POLITICAL", + "report_content": "obama is not a us citizen", + "outcome": "Post flagged as unproven/non-scientific", + "explanation": "he is" + } + ] + } +} \ No newline at end of file diff --git a/DiscordBot/user_stats.py b/DiscordBot/user_stats.py new file mode 100644 index 00000000..cc2165f7 --- /dev/null +++ b/DiscordBot/user_stats.py @@ -0,0 +1,53 @@ +import json +import os +from datetime import datetime + +class UserStats: + def __init__(self): + self.stats_file = 'user_stats.json' + # Clear the stats file on initialization + self._clear_stats() + self.stats = self._load_stats() + + def _clear_stats(self): + # Create an empty stats file + with open(self.stats_file, 'w') as f: + json.dump({}, f) + + def _load_stats(self): + if os.path.exists(self.stats_file): + with open(self.stats_file, 'r') as f: + return json.load(f) + return {} + + def _save_stats(self): + with open(self.stats_file, 'w') as f: + json.dump(self.stats, f, indent=2) + + def add_report(self, user_id, report_type, report_content, outcome, explanation=None): + if user_id not in self.stats: + self.stats[user_id] = { + 'total_reports': 0, + 'reports': [] + } + + report = { + 'timestamp': datetime.now().isoformat(), + 'report_type': report_type, + 'report_content': report_content, + 'outcome': outcome, + 'explanation': explanation + } + + self.stats[user_id]['reports'].append(report) + self.stats[user_id]['total_reports'] = len(self.stats[user_id]['reports']) + self._save_stats() + + def get_user_stats(self, user_id): + return self.stats.get(user_id, { + 'total_reports': 0, + 'reports': [] + }) + + def get_all_stats(self): + return self.stats \ No newline at end of file