-
Notifications
You must be signed in to change notification settings - Fork 3
Tech Debt: Decompose ParameterConfiguration.cs (547 lines) into small... #113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,149 @@ | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Linq; | ||
| using System.Security; | ||
| using System.Text; | ||
| using Microsoft.Extensions.Logging; | ||
| using Microsoft.Health.Fhir.Anonymizer.Core.Exceptions; | ||
|
|
||
| namespace Microsoft.Health.Fhir.Anonymizer.Core.AnonymizerConfigurations | ||
| { | ||
| /// <summary> | ||
| /// Static utility class that encapsulates all cryptographic key security validation logic | ||
| /// for anonymization configuration parameters. | ||
| /// | ||
| /// SECURITY: This class enforces that keys are not placeholder values, not whitespace-only, | ||
| /// not obviously weak values, and meet minimum length/size requirements. | ||
| /// </summary> | ||
| public static class CryptographicKeyValidator | ||
| { | ||
| private static readonly ILogger s_logger = AnonymizerLogging.CreateLogger<ParameterConfiguration>(); | ||
|
|
||
| /// <summary> | ||
| /// Valid AES key sizes in bits. Used to validate EncryptKey without allocating an Aes instance. | ||
| /// AES supports 128-bit (16 bytes), 192-bit (24 bytes), and 256-bit (32 bytes) keys. | ||
| /// </summary> | ||
| private static readonly HashSet<int> s_validAesKeySizeBits = new HashSet<int> { 128, 192, 256 }; | ||
|
|
||
| /// <summary> | ||
| /// Dangerous placeholder patterns that must be rejected. | ||
| /// </summary> | ||
| private static readonly string[] s_dangerousPlaceholderPatterns = new[] | ||
| { | ||
| "$HMAC_KEY", | ||
| "YOUR_KEY_HERE", | ||
| "YOUR_SECURE_KEY", | ||
| "YOUR_ENCRYPTION_KEY", | ||
| "PLACEHOLDER", | ||
| "CHANGE_ME", | ||
| "CHANGEME", | ||
| "REPLACE_ME", | ||
| "EXAMPLE_KEY", | ||
| "TEST_KEY", | ||
| "SAMPLE_KEY", | ||
| "INSERT_KEY_HERE", | ||
| "<YOUR_KEY>", | ||
| "[YOUR_KEY]", | ||
| "{{YOUR_KEY}}", | ||
| "TODO", | ||
| "FIXME" | ||
| }; | ||
|
|
||
| /// <summary> | ||
| /// Validate a key parameter doesn't contain placeholder values or consist solely of whitespace. | ||
| /// SECURITY CRITICAL: Prevents use of example/template keys and whitespace-only values in production. | ||
| /// </summary> | ||
| /// <param name="keyValue">The key value to validate.</param> | ||
| /// <param name="parameterName">The configuration parameter name (for error messages).</param> | ||
| /// <param name="keyType">Human-readable key type description (for error messages).</param> | ||
| public static void ValidateKeyParameter(string keyValue, string parameterName, string keyType) | ||
| { | ||
| if (string.IsNullOrEmpty(keyValue)) | ||
| { | ||
| return; // Empty/null keys are allowed if the feature is not used | ||
| } | ||
|
|
||
| // SECURITY: Reject whitespace-only keys — they provide no entropy | ||
| if (string.IsNullOrWhiteSpace(keyValue)) | ||
| { | ||
| throw new SecurityException( | ||
| $"SECURITY ERROR: Whitespace-only {keyType} key detected in '{parameterName}'. " + | ||
| "A key consisting entirely of whitespace characters provides no entropy and must not be used. " + | ||
| "Generate a cryptographically secure random key using: openssl rand -base64 32"); | ||
| } | ||
|
|
||
| // Trim and convert to uppercase for case-insensitive comparison | ||
| var normalizedKey = keyValue.Trim().ToUpperInvariant(); | ||
|
|
||
| // Check against all dangerous placeholder patterns | ||
| foreach (var pattern in s_dangerousPlaceholderPatterns) | ||
| { | ||
| if (normalizedKey.Contains(pattern)) | ||
| { | ||
| throw new SecurityException( | ||
| $"SECURITY ERROR: Placeholder {keyType} key detected in '{parameterName}'.\n\n" + | ||
| $"The configuration contains a placeholder value ('{pattern}') that must be replaced " + | ||
| "with a cryptographically secure key before use.\n\n" + | ||
| "TO GENERATE A SECURE KEY:\n" + | ||
| " Linux/macOS: openssl rand -base64 32\n" + | ||
| " Windows: pwsh -Command \"[Convert]::ToBase64String((1..32 | ForEach-Object { Get-Random -Minimum 0 -Maximum 256 }))\"\n" + | ||
| " .NET: var key = Convert.ToBase64String(RandomNumberGenerator.GetBytes(32));\n\n" + | ||
| "SECURITY WARNING: Using placeholder keys in production:\n" + | ||
| " - Compromises cryptographic operations\n" + | ||
| " - May lead to predictable hash values\n" + | ||
| " - Enables re-identification attacks\n" + | ||
| " - Violates privacy guarantees\n\n" + | ||
| "BEST PRACTICES:\n" + | ||
| " - Never commit actual keys to version control\n" + | ||
| " - Use environment variables: Environment.GetEnvironmentVariable(\"CRYPTO_KEY\")\n" + | ||
| " - Use Azure Key Vault, AWS Secrets Manager, or similar for production\n" + | ||
| " - Rotate keys periodically according to your security policy\n" + | ||
| " - Use different keys for different environments (dev/staging/production)\n"); | ||
| } | ||
| } | ||
|
|
||
| // Additional checks for weak or test keys | ||
| if (keyValue.Length < 16) | ||
| { | ||
| s_logger.LogWarning( | ||
| $"The {keyType} key in '{parameterName}' is very short ({keyValue.Length} characters). " + | ||
| "Recommended minimum is 32 bytes (44 characters in Base64). " + | ||
| "Short keys provide inadequate security and may be vulnerable to brute force attacks."); | ||
| } | ||
|
|
||
| // Check for obviously weak patterns | ||
| if (keyValue.Equals("12345678", StringComparison.Ordinal) || | ||
| keyValue.Equals("password", StringComparison.OrdinalIgnoreCase) || | ||
| keyValue.Equals("secret", StringComparison.OrdinalIgnoreCase) || | ||
| keyValue.Equals("key", StringComparison.OrdinalIgnoreCase) || | ||
| keyValue.All(c => c == keyValue[0])) // All same character | ||
| { | ||
| throw new SecurityException( | ||
| $"SECURITY ERROR: Weak {keyType} key detected in '{parameterName}'. " + | ||
| "The key appears to be a common weak value (e.g., 'password', '12345678', repeated characters). " + | ||
| "Generate a cryptographically secure random key using: openssl rand -base64 32"); | ||
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Validate that the encrypt key size is a valid AES key size (128, 192, or 256 bits). | ||
| /// Uses a static HashSet of valid sizes to avoid allocating an Aes instance on every call. | ||
| /// Only validates when encryptKey is non-null and non-empty. | ||
| /// </summary> | ||
| /// <param name="encryptKey">The encryption key string to validate.</param> | ||
| public static void ValidateEncryptKeySize(string encryptKey) | ||
| { | ||
| if (string.IsNullOrEmpty(encryptKey)) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| var encryptKeySize = Encoding.UTF8.GetByteCount(encryptKey) * 8; | ||
| if (!s_validAesKeySizeBits.Contains(encryptKeySize)) | ||
| { | ||
| throw new AnonymizerConfigurationException( | ||
| $"Invalid encrypt key size : {encryptKeySize} bits! Please provide key sizes of 128, 192 or 256 bits."); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,175 @@ | ||
| using System; | ||
| using System.Runtime.Serialization; | ||
| using Microsoft.Extensions.Logging; | ||
|
|
||
| namespace Microsoft.Health.Fhir.Anonymizer.Core.AnonymizerConfigurations | ||
| { | ||
| /// <summary> | ||
| /// Configuration parameters for differential privacy processing. | ||
| /// | ||
| /// REFERENCES: | ||
| /// - NIST Special Publication 800-188: "De-Identifying Government Datasets" (2023 Draft) | ||
| /// https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-188-draft2.pdf | ||
| /// - Dwork, C., & Roth, A. (2014). "The Algorithmic Foundations of Differential Privacy." | ||
| /// Foundations and Trends in Theoretical Computer Science, 9(3-4), 211-407. | ||
| /// - Apple Differential Privacy Team (2017). "Learning with Privacy at Scale." | ||
| /// Apple Machine Learning Journal, Vol. 1, Issue 8. | ||
| /// </summary> | ||
| [DataContract] | ||
| public class DifferentialPrivacyParameterConfiguration | ||
| { | ||
| private static readonly ILogger s_logger = AnonymizerLogging.CreateLogger<DifferentialPrivacyParameterConfiguration>(); | ||
|
|
||
| /// <summary> | ||
| /// Privacy budget (epsilon) - lower values provide stronger privacy. | ||
| /// | ||
| /// GUIDANCE (NIST SP 800-188): | ||
| /// - ε ≤ 0.1: Strong privacy protection (recommended for sensitive health data) | ||
| /// - ε = 0.5-1.0: Moderate privacy (reasonable for many applications) | ||
| /// - ε = 1.0-10.0: Weak privacy (use only when data utility is critical) | ||
| /// - ε > 10: Minimal privacy guarantee | ||
| /// | ||
| /// DEFAULT: 1.0 (reasonable starting point; adjust based on sensitivity analysis) | ||
| /// </summary> | ||
| [DataMember(Name = "epsilon")] | ||
| public double Epsilon { get; set; } = 1.0; | ||
|
|
||
| /// <summary> | ||
| /// Delta parameter for (epsilon, delta)-differential privacy. | ||
| /// Represents the probability of privacy failure. Should be cryptographically small. | ||
| /// | ||
| /// GUIDANCE: | ||
| /// - For (ε,δ)-differential privacy, δ should be much smaller than 1/n where n is dataset size | ||
| /// - Typical values: 1e-5 to 1e-8 for healthcare datasets | ||
| /// - δ = 0 gives pure ε-differential privacy (Laplace mechanism) | ||
| /// - Only applies to Gaussian mechanism; Laplace mechanism has δ=0 | ||
| /// | ||
| /// DEFAULT: 1e-5 (appropriate for datasets of up to ~100,000 records) | ||
| /// </summary> | ||
| [DataMember(Name = "delta")] | ||
| public double Delta { get; set; } = 1e-5; | ||
|
|
||
| /// <summary> | ||
| /// Sensitivity of the query function (global sensitivity). | ||
| /// Measures the maximum change in output when one record is added/removed. | ||
| /// | ||
| /// GUIDANCE: | ||
| /// - For counting queries: sensitivity = 1 | ||
| /// - For sum queries: sensitivity = max possible value | ||
| /// - For average queries: sensitivity = range / n | ||
| /// - Higher sensitivity requires more noise for same epsilon | ||
| /// | ||
| /// DEFAULT: 1.0 (appropriate for counts and bounded numeric fields) | ||
| /// </summary> | ||
| [DataMember(Name = "sensitivity")] | ||
| public double Sensitivity { get; set; } = 1.0; | ||
|
|
||
| /// <summary> | ||
| /// Maximum cumulative epsilon budget before warning. | ||
| /// | ||
| /// COMPOSITION: Under sequential composition, total privacy loss is sum of individual ε values. | ||
| /// Advanced composition theorems can provide tighter bounds but are not yet implemented. | ||
| /// | ||
| /// DEFAULT: 1.0 (reasonable for most healthcare research applications per NIST guidance) | ||
| /// | ||
| /// WARNING: Exceeding this budget across multiple queries degrades privacy guarantees. | ||
| /// </summary> | ||
| [DataMember(Name = "maxCumulativeEpsilon")] | ||
| public double MaxCumulativeEpsilon { get; set; } = 1.0; | ||
|
|
||
| /// <summary> | ||
| /// Whether to use advanced composition for better privacy accounting. | ||
| /// | ||
| /// ADVANCED COMPOSITION THEOREM (Dwork et al.): | ||
| /// k queries with (ε,δ)-DP satisfy (ε', kδ+δ')-DP where: | ||
| /// ε' ≈ √(2k ln(1/δ')) * ε + k*ε*(e^ε - 1) | ||
| /// | ||
| /// This can significantly improve privacy accounting for many queries. | ||
| /// | ||
| /// DEFAULT: false (uses simple sequential composition: total ε = Σε_i) | ||
| /// | ||
| /// NOTE: Advanced composition is not yet implemented. Setting this to true will | ||
| /// log a warning and fall back to sequential composition. | ||
| /// </summary> | ||
| [DataMember(Name = "useAdvancedComposition")] | ||
| public bool UseAdvancedComposition { get; set; } = false; | ||
|
|
||
| /// <summary> | ||
| /// Noise mechanism to use for differential privacy. | ||
| /// | ||
| /// MECHANISMS: | ||
| /// - "laplace": Laplace mechanism (ε-DP, δ=0). Standard choice for numeric queries. | ||
| /// Noise scale = sensitivity/ε. Use for unbounded queries. | ||
| /// - "gaussian": Gaussian mechanism ((ε,δ)-DP). Use when approximate DP is acceptable. | ||
| /// Requires δ > 0. Better utility for large datasets. Use for L2-sensitivity queries. | ||
| /// - "exponential": Exponential mechanism. For categorical/selection queries. | ||
| /// Currently implemented using Laplace for numeric data. | ||
| /// | ||
| /// DEFAULT: "laplace" (provides pure ε-differential privacy) | ||
| /// </summary> | ||
| [DataMember(Name = "mechanism")] | ||
| public string Mechanism { get; set; } = "laplace"; | ||
|
|
||
| /// <summary> | ||
| /// Whether to track and enforce cumulative privacy budget across queries. | ||
| /// When enabled, the system monitors total epsilon spend and warns when approaching | ||
| /// <see cref="MaxCumulativeEpsilon"/>. | ||
| /// | ||
| /// DEFAULT: false (budget tracking disabled) | ||
| /// </summary> | ||
| [DataMember(Name = "privacyBudgetTrackingEnabled")] | ||
| public bool PrivacyBudgetTrackingEnabled { get; set; } = false; | ||
|
|
||
| /// <summary> | ||
| /// Whether to clip input values to a bounded range before adding noise. | ||
| /// Clipping limits the sensitivity of queries to a known maximum, which can | ||
| /// improve the privacy-utility tradeoff for bounded data. | ||
| /// | ||
| /// DEFAULT: false (clipping disabled) | ||
|
||
| /// </summary> | ||
| [DataMember(Name = "clippingEnabled")] | ||
| public bool ClippingEnabled { get; set; } = false; | ||
|
|
||
| /// <summary> | ||
| /// Validates the differential privacy configuration parameters. | ||
| /// </summary> | ||
| /// <exception cref="ArgumentException">Thrown when parameters are invalid.</exception> | ||
| public void Validate() | ||
| { | ||
| if (Epsilon <= 0) | ||
| { | ||
| throw new ArgumentException("Differential privacy epsilon must be greater than 0"); | ||
| } | ||
|
|
||
| if (Epsilon > 10.0) | ||
| { | ||
| throw new ArgumentException( | ||
| $"Differential privacy epsilon value {Epsilon} exceeds maximum of 10.0. " + | ||
| "High epsilon values provide minimal privacy protection. See configuration comments for guidance."); | ||
| } | ||
|
|
||
| if (Epsilon > 1.0) | ||
| { | ||
| s_logger.LogWarning( | ||
| $"Differential privacy epsilon value {Epsilon} is high (>1.0). " + | ||
| "This provides weaker privacy guarantees. Consider using epsilon <= 1.0 for moderate privacy " + | ||
| "or epsilon <= 0.1 for strong privacy (NIST SP 800-188 guidance for health data)."); | ||
| } | ||
|
|
||
| if (Delta < 0 || Delta > 1) | ||
| { | ||
| throw new ArgumentException("Differential privacy delta must be between 0 and 1"); | ||
| } | ||
|
|
||
| if (Sensitivity <= 0) | ||
| { | ||
| throw new ArgumentException("Differential privacy sensitivity must be greater than 0"); | ||
| } | ||
|
|
||
| if (MaxCumulativeEpsilon <= 0) | ||
| { | ||
| throw new ArgumentException("Differential privacy maxCumulativeEpsilon must be greater than 0"); | ||
| } | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔒 Security (medium): The static logger in CryptographicKeyValidator is created with the wrong generic type argument: AnonymizerLogging.CreateLoggerParameterConfiguration(). This means all log entries from CryptographicKeyValidator will be categorized under the 'ParameterConfiguration' logger category. While not a direct data-exposure vulnerability, this creates misleading audit/log trails — security-related key validation warnings will appear attributed to ParameterConfiguration rather than CryptographicKeyValida...
💡 Change to AnonymizerLogging.CreateLoggerCryptographicKeyValidator() so log categories correctly reflect the source component.