Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ public void TestValidate_CryptoHashKey_AboveMinimum_DoesNotThrow()
}

// -----------------------------------------------------------------------
// DateShiftKey + DateShiftScope validation — new tests
// DateShiftKey + DateShiftScope validation
// -----------------------------------------------------------------------

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Security;
using System.Text;
using Microsoft.Extensions.Logging;
using Microsoft.Health.Fhir.Anonymizer.Core.Exceptions;

namespace Microsoft.Health.Fhir.Anonymizer.Core.AnonymizerConfigurations
{
/// <summary>
/// Static utility class that encapsulates all cryptographic key security validation logic
/// for anonymization configuration parameters.
///
/// SECURITY: This class enforces that keys are not placeholder values, not whitespace-only,
/// not obviously weak values, and meet minimum length/size requirements.
/// </summary>
public static class CryptographicKeyValidator
{
private static readonly ILogger s_logger = AnonymizerLogging.CreateLogger<ParameterConfiguration>();

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔒 Security (medium): The static logger in CryptographicKeyValidator is created with the wrong generic type argument: AnonymizerLogging.CreateLoggerParameterConfiguration(). This means all log entries from CryptographicKeyValidator will be categorized under the 'ParameterConfiguration' logger category. While not a direct data-exposure vulnerability, this creates misleading audit/log trails — security-related key validation warnings will appear attributed to ParameterConfiguration rather than CryptographicKeyValida...

💡 Change to AnonymizerLogging.CreateLoggerCryptographicKeyValidator() so log categories correctly reflect the source component.

/// <summary>
/// Valid AES key sizes in bits. Used to validate EncryptKey without allocating an Aes instance.
/// AES supports 128-bit (16 bytes), 192-bit (24 bytes), and 256-bit (32 bytes) keys.
/// </summary>
private static readonly HashSet<int> s_validAesKeySizeBits = new HashSet<int> { 128, 192, 256 };

/// <summary>
/// Dangerous placeholder patterns that must be rejected.
/// </summary>
private static readonly string[] s_dangerousPlaceholderPatterns = new[]
{
"$HMAC_KEY",
"YOUR_KEY_HERE",
"YOUR_SECURE_KEY",
"YOUR_ENCRYPTION_KEY",
"PLACEHOLDER",
"CHANGE_ME",
"CHANGEME",
"REPLACE_ME",
"EXAMPLE_KEY",
"TEST_KEY",
"SAMPLE_KEY",
"INSERT_KEY_HERE",
"<YOUR_KEY>",
"[YOUR_KEY]",
"{{YOUR_KEY}}",
"TODO",
"FIXME"
};

/// <summary>
/// Validate a key parameter doesn't contain placeholder values or consist solely of whitespace.
/// SECURITY CRITICAL: Prevents use of example/template keys and whitespace-only values in production.
/// </summary>
/// <param name="keyValue">The key value to validate.</param>
/// <param name="parameterName">The configuration parameter name (for error messages).</param>
/// <param name="keyType">Human-readable key type description (for error messages).</param>
public static void ValidateKeyParameter(string keyValue, string parameterName, string keyType)
{
if (string.IsNullOrEmpty(keyValue))
{
return; // Empty/null keys are allowed if the feature is not used
}

// SECURITY: Reject whitespace-only keys — they provide no entropy
if (string.IsNullOrWhiteSpace(keyValue))
{
throw new SecurityException(
$"SECURITY ERROR: Whitespace-only {keyType} key detected in '{parameterName}'. " +
"A key consisting entirely of whitespace characters provides no entropy and must not be used. " +
"Generate a cryptographically secure random key using: openssl rand -base64 32");
}

// Trim and convert to uppercase for case-insensitive comparison
var normalizedKey = keyValue.Trim().ToUpperInvariant();

// Check against all dangerous placeholder patterns
foreach (var pattern in s_dangerousPlaceholderPatterns)
{
if (normalizedKey.Contains(pattern))
{
throw new SecurityException(
$"SECURITY ERROR: Placeholder {keyType} key detected in '{parameterName}'.\n\n" +
$"The configuration contains a placeholder value ('{pattern}') that must be replaced " +
"with a cryptographically secure key before use.\n\n" +
"TO GENERATE A SECURE KEY:\n" +
" Linux/macOS: openssl rand -base64 32\n" +
" Windows: pwsh -Command \"[Convert]::ToBase64String((1..32 | ForEach-Object { Get-Random -Minimum 0 -Maximum 256 }))\"\n" +
" .NET: var key = Convert.ToBase64String(RandomNumberGenerator.GetBytes(32));\n\n" +
"SECURITY WARNING: Using placeholder keys in production:\n" +
" - Compromises cryptographic operations\n" +
" - May lead to predictable hash values\n" +
" - Enables re-identification attacks\n" +
" - Violates privacy guarantees\n\n" +
"BEST PRACTICES:\n" +
" - Never commit actual keys to version control\n" +
" - Use environment variables: Environment.GetEnvironmentVariable(\"CRYPTO_KEY\")\n" +
" - Use Azure Key Vault, AWS Secrets Manager, or similar for production\n" +
" - Rotate keys periodically according to your security policy\n" +
" - Use different keys for different environments (dev/staging/production)\n");
}
}

// Additional checks for weak or test keys
if (keyValue.Length < 16)
{
s_logger.LogWarning(
$"The {keyType} key in '{parameterName}' is very short ({keyValue.Length} characters). " +
"Recommended minimum is 32 bytes (44 characters in Base64). " +
"Short keys provide inadequate security and may be vulnerable to brute force attacks.");
}

// Check for obviously weak patterns
if (keyValue.Equals("12345678", StringComparison.Ordinal) ||
keyValue.Equals("password", StringComparison.OrdinalIgnoreCase) ||
keyValue.Equals("secret", StringComparison.OrdinalIgnoreCase) ||
keyValue.Equals("key", StringComparison.OrdinalIgnoreCase) ||
keyValue.All(c => c == keyValue[0])) // All same character
{
throw new SecurityException(
$"SECURITY ERROR: Weak {keyType} key detected in '{parameterName}'. " +
"The key appears to be a common weak value (e.g., 'password', '12345678', repeated characters). " +
"Generate a cryptographically secure random key using: openssl rand -base64 32");
}
}

/// <summary>
/// Validate that the encrypt key size is a valid AES key size (128, 192, or 256 bits).
/// Uses a static HashSet of valid sizes to avoid allocating an Aes instance on every call.
/// Only validates when encryptKey is non-null and non-empty.
/// </summary>
/// <param name="encryptKey">The encryption key string to validate.</param>
public static void ValidateEncryptKeySize(string encryptKey)
{
if (string.IsNullOrEmpty(encryptKey))
{
return;
}

var encryptKeySize = Encoding.UTF8.GetByteCount(encryptKey) * 8;
if (!s_validAesKeySizeBits.Contains(encryptKeySize))
{
throw new AnonymizerConfigurationException(
$"Invalid encrypt key size : {encryptKeySize} bits! Please provide key sizes of 128, 192 or 256 bits.");
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
using System;
using System.Runtime.Serialization;
using Microsoft.Extensions.Logging;

namespace Microsoft.Health.Fhir.Anonymizer.Core.AnonymizerConfigurations
{
/// <summary>
/// Configuration parameters for differential privacy processing.
///
/// REFERENCES:
/// - NIST Special Publication 800-188: "De-Identifying Government Datasets" (2023 Draft)
/// https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-188-draft2.pdf
/// - Dwork, C., &amp; Roth, A. (2014). "The Algorithmic Foundations of Differential Privacy."
/// Foundations and Trends in Theoretical Computer Science, 9(3-4), 211-407.
/// - Apple Differential Privacy Team (2017). "Learning with Privacy at Scale."
/// Apple Machine Learning Journal, Vol. 1, Issue 8.
/// </summary>
[DataContract]
public class DifferentialPrivacyParameterConfiguration
{
private static readonly ILogger s_logger = AnonymizerLogging.CreateLogger<DifferentialPrivacyParameterConfiguration>();

/// <summary>
/// Privacy budget (epsilon) - lower values provide stronger privacy.
///
/// GUIDANCE (NIST SP 800-188):
/// - ε ≤ 0.1: Strong privacy protection (recommended for sensitive health data)
/// - ε = 0.5-1.0: Moderate privacy (reasonable for many applications)
/// - ε = 1.0-10.0: Weak privacy (use only when data utility is critical)
/// - ε > 10: Minimal privacy guarantee
///
/// DEFAULT: 1.0 (reasonable starting point; adjust based on sensitivity analysis)
/// </summary>
[DataMember(Name = "epsilon")]
public double Epsilon { get; set; } = 1.0;

/// <summary>
/// Delta parameter for (epsilon, delta)-differential privacy.
/// Represents the probability of privacy failure. Should be cryptographically small.
///
/// GUIDANCE:
/// - For (ε,δ)-differential privacy, δ should be much smaller than 1/n where n is dataset size
/// - Typical values: 1e-5 to 1e-8 for healthcare datasets
/// - δ = 0 gives pure ε-differential privacy (Laplace mechanism)
/// - Only applies to Gaussian mechanism; Laplace mechanism has δ=0
///
/// DEFAULT: 1e-5 (appropriate for datasets of up to ~100,000 records)
/// </summary>
[DataMember(Name = "delta")]
public double Delta { get; set; } = 1e-5;

/// <summary>
/// Sensitivity of the query function (global sensitivity).
/// Measures the maximum change in output when one record is added/removed.
///
/// GUIDANCE:
/// - For counting queries: sensitivity = 1
/// - For sum queries: sensitivity = max possible value
/// - For average queries: sensitivity = range / n
/// - Higher sensitivity requires more noise for same epsilon
///
/// DEFAULT: 1.0 (appropriate for counts and bounded numeric fields)
/// </summary>
[DataMember(Name = "sensitivity")]
public double Sensitivity { get; set; } = 1.0;

/// <summary>
/// Maximum cumulative epsilon budget before warning.
///
/// COMPOSITION: Under sequential composition, total privacy loss is sum of individual ε values.
/// Advanced composition theorems can provide tighter bounds but are not yet implemented.
///
/// DEFAULT: 1.0 (reasonable for most healthcare research applications per NIST guidance)
///
/// WARNING: Exceeding this budget across multiple queries degrades privacy guarantees.
/// </summary>
[DataMember(Name = "maxCumulativeEpsilon")]
public double MaxCumulativeEpsilon { get; set; } = 1.0;

/// <summary>
/// Whether to use advanced composition for better privacy accounting.
///
/// ADVANCED COMPOSITION THEOREM (Dwork et al.):
/// k queries with (ε,δ)-DP satisfy (ε', kδ+δ')-DP where:
/// ε' ≈ √(2k ln(1/δ')) * ε + k*ε*(e^ε - 1)
///
/// This can significantly improve privacy accounting for many queries.
///
/// DEFAULT: false (uses simple sequential composition: total ε = Σε_i)
///
/// NOTE: Advanced composition is not yet implemented. Setting this to true will
/// log a warning and fall back to sequential composition.
/// </summary>
[DataMember(Name = "useAdvancedComposition")]
public bool UseAdvancedComposition { get; set; } = false;

/// <summary>
/// Noise mechanism to use for differential privacy.
///
/// MECHANISMS:
/// - "laplace": Laplace mechanism (ε-DP, δ=0). Standard choice for numeric queries.
/// Noise scale = sensitivity/ε. Use for unbounded queries.
/// - "gaussian": Gaussian mechanism ((ε,δ)-DP). Use when approximate DP is acceptable.
/// Requires δ > 0. Better utility for large datasets. Use for L2-sensitivity queries.
/// - "exponential": Exponential mechanism. For categorical/selection queries.
/// Currently implemented using Laplace for numeric data.
///
/// DEFAULT: "laplace" (provides pure ε-differential privacy)
/// </summary>
[DataMember(Name = "mechanism")]
public string Mechanism { get; set; } = "laplace";

/// <summary>
/// Whether to track and enforce cumulative privacy budget across queries.
/// When enabled, the system monitors total epsilon spend and warns when approaching
/// <see cref="MaxCumulativeEpsilon"/>.
///
/// DEFAULT: false (budget tracking disabled)
/// </summary>
[DataMember(Name = "privacyBudgetTrackingEnabled")]
public bool PrivacyBudgetTrackingEnabled { get; set; } = false;

/// <summary>
/// Whether to clip input values to a bounded range before adding noise.
/// Clipping limits the sensitivity of queries to a known maximum, which can
/// improve the privacy-utility tradeoff for bounded data.
///
/// DEFAULT: false (clipping disabled)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📝 medium: The documentation for PrivacyBudgetTrackingEnabled was significantly simplified compared to the original. The original explained that the engine emits a warning when the total epsilon spend across operations exceeds MaxCumulativeEpsilon, and connected it to the MaxCumulativeEpsilon property via a see-ref link. Similarly, ClippingEnabled lost the explanation that clipping is required to bound sensitivity, which is a prerequisite for the Gaussian mechanism to provide meaningful (ε,δ)-differenti...

💡 Restore the detailed original documentation for these properties. The technical details about the Gaussian mechanism's dependency on clipping and the epsilon budget warning threshold are important for correct use of these privacy configurations.

/// </summary>
[DataMember(Name = "clippingEnabled")]
public bool ClippingEnabled { get; set; } = false;

/// <summary>
/// Validates the differential privacy configuration parameters.
/// </summary>
/// <exception cref="ArgumentException">Thrown when parameters are invalid.</exception>
public void Validate()
{
if (Epsilon <= 0)
{
throw new ArgumentException("Differential privacy epsilon must be greater than 0");
}

if (Epsilon > 10.0)
{
throw new ArgumentException(
$"Differential privacy epsilon value {Epsilon} exceeds maximum of 10.0. " +
"High epsilon values provide minimal privacy protection. See configuration comments for guidance.");
}

if (Epsilon > 1.0)
{
s_logger.LogWarning(
$"Differential privacy epsilon value {Epsilon} is high (>1.0). " +
"This provides weaker privacy guarantees. Consider using epsilon <= 1.0 for moderate privacy " +
"or epsilon <= 0.1 for strong privacy (NIST SP 800-188 guidance for health data).");
}

if (Delta < 0 || Delta > 1)
{
throw new ArgumentException("Differential privacy delta must be between 0 and 1");
}

if (Sensitivity <= 0)
{
throw new ArgumentException("Differential privacy sensitivity must be greater than 0");
}

if (MaxCumulativeEpsilon <= 0)
{
throw new ArgumentException("Differential privacy maxCumulativeEpsilon must be greater than 0");
}
}
}
}
Loading
Loading