# Python Script for Data Validation
# Addressing OWASP Top 10 for LLMs Mapped to MITRE ATLAS
import re
import html
from cryptography.fernet import Fernet
import numpy as np
from sklearn.ensemble import IsolationForest
# Sample key generation - In production, use a securely stored key
key = Fernet.generate_key()
cipher_suite = Fernet(key)
# LLM01 & LLM02: Input Validation and Secure Output Handling
def validate_input(input_string):
# Basic input validation to prevent injection attacks
if re.match("^[a-zA-Z0-9 .,!?']+$", input_string):
return True
else:
return False
def secure_output(output_string):
# HTML encode the output to prevent XSS attacks
return html.escape(output_string)
# LLM03: Anomaly Detection in Training Data
def detect_anomalies(data):
# Assuming data is a NumPy array for simplicity
iso_forest = IsolationForest()
anomalies = iso_forest.fit_predict(data)
return np.where(anomalies == -1)
# LLM06: Encrypting Sensitive Information
def encrypt_data(plain_text):
# Encrypt data using Fernet symmetric encryption
encrypted_text = cipher_suite.encrypt(bytes(plain_text, 'utf-8'))
return encrypted_text
def decrypt_data(encrypted_text):
# Decrypt data
decrypted_text = cipher_suite.decrypt(encrypted_text).decode('utf-8')
return decrypted_text
# Example Usage
if __name__ == "__main__":
# Validate Input Example
user_input = "Hello, world!"
if validate_input(user_input):
print("Valid input.")
else:
print("Invalid input detected!")
# Secure Output Example
secure_html = secure_output("<script>alert('XSS')</script>")
print(f"Secured HTML: {secure_html}")
# Anomaly Detection Example
training_data = np.random.rand(100, 5) # Dummy training data
outlier_indices = detect_anomalies(training_data)
print(f"Outlier Indices: {outlier_indices}")
# Encrypting/Decrypting Data Example
secret_info = "Sensitive information"
encrypted_info = encrypt_data(secret_info)
print(f"Encrypted Info: {encrypted_info}")
decrypted_info = decrypt_data(encrypted_info)
print(f"Decrypted Info: {decrypted_info}")
# Additional imports for extended functionalities
import subprocess
import os
import logging
# LLM07: Secure Plugin Design
def load_secure_plugin(plugin_path):
# Validate and securely load a plugin (mock example)
if not os.path.exists(plugin_path) or not plugin_path.endswith('.py'):
logging.error("Invalid plugin path.")
return False
try:
# Example of securely loading a plugin with restricted capabilities
subprocess.run(['python', plugin_path], check=True, timeout=30)
return True
except Exception as e:
logging.error(f"Failed to load plugin securely: {e}")
return False
# LLM08: Limiting Excessive Agency
def limit_decision_making(decision_function):
# Mock function to demonstrate limiting decision-making capabilities
def wrapper(*args, **kwargs):
# Insert logic here to limit the decision-making capabilities
# For example, check for user confirmation or implement additional oversight
decision = decision_function(*args, **kwargs)
logging.info(f"Decision made: {decision}. Confirm before proceeding.")
return decision
return wrapper
@limit_decision_making
def make_decision(data):
# Dummy decision-making function
return "approve" if data else "deny"
# LLM09: Educating Stakeholders on LLM Capabilities and Limitations
def educate_stakeholders():
# Mock function to symbolize actions taken to educate stakeholders
print("Educating stakeholders on the capabilities and limitations of LLMs.")
# LLM10: Secure Model Storage and Transmission
def secure_model_storage(model_data):
# Encrypt model data for secure storage
return encrypt_data(model_data)
def secure_model_transmission(model_data):
# Encrypt model data for secure transmission
return encrypt_data(model_data)
# Example Usage
if __name__ == "__main__":
# Secure Plugin Loading Example
if load_secure_plugin('secure_plugin.py'):
print("Plugin loaded securely.")
else:
print("Failed to load plugin securely.")
# Limit Decision Making Example
decision = make_decision(True)
print(f"Decision: {decision}")
# Educate Stakeholders
educate_stakeholders()
# Secure Model Storage and Transmission Example
model_data = "Dummy model data"
secure_storage = secure_model_storage(model_data)
print(f"Model securely stored: {secure_storage}")
secure_transmission = secure_model_transmission(model_data)
print(f"Model securely transmitted: {secure_transmission}")
### Implementing the Code
For a practical implementation of the code above:
- Ensure to replace dummy and mock functionality with actual logic suited to your application's architecture and security requirements.
- The secure plugin loading function (`load_secure_plugin`) and decision-limiting wrapper (`limit_decision_making`) provide basic templates; customize them based on the specific security considerations and operational requirements of your plugins and decision-making processes.
- Regularly review and update your security practices, including encryption methods and key management strategies, to adapt to evolving cybersecurity threats and standards.
By integrating these practices into your development and security processes, you can address the identified vulnerabilities in LLM applications, enhancing both security and reliability.
### Recommended Automated Validation Tools
For the vulnerabilities addressed, the following automated tools can be integrated into your development and deployment pipelines:
- **OWASP ZAP**: Automate web application security testing.
- **Bandit**: Integrate into CI/CD pipelines for automatic scanning of Python code for security issues.
- **SQLMap**: Use in automated testing environments to check for SQL injection vulnerabilities.
- **PyCQA/flake8**: Integrate into your development process for continuous code quality and security checks.
- **TensorFlow Privacy**: Use for training machine learning models while preserving privacy, to mitigate the risk of training data poisoning.
Incorporating these tools and practices will help enhance the security posture of applications using LLMs against the identified vulnerabilities and align with best practices in cybersecurity.