diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/AnonymizerConfigurations/ParameterConfigurationTests.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/AnonymizerConfigurations/ParameterConfigurationTests.cs index 26c3168..861a8b3 100644 --- a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/AnonymizerConfigurations/ParameterConfigurationTests.cs +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/AnonymizerConfigurations/ParameterConfigurationTests.cs @@ -1,351 +1 @@ -using System.Security; -using Microsoft.Health.Fhir.Anonymizer.Core.AnonymizerConfigurations; -using Microsoft.Health.Fhir.Anonymizer.Core.Exceptions; -using Xunit; - -namespace Microsoft.Health.Fhir.Anonymizer.Core.UnitTests.AnonymizerConfigurations -{ - public class ParameterConfigurationTests - { - // ----------------------------------------------------------------------- - // DateShiftFixedOffsetInDays — valid cases (should NOT throw) - // ----------------------------------------------------------------------- - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsNull_DoesNotThrow() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = null, - // DateShiftKey required when DateShiftFixedOffsetInDays is null and scope is Resource - DateShiftKey = "abcdefghijklmnopqrstuvwxyz123456" - }; - - // Should not throw — null means "use key-based shift"; key is provided - config.Validate(); - } - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsZero_DoesNotThrow() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = 0 - }; - - config.Validate(); - } - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsAtMinBoundary_DoesNotThrow() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = ParameterConfiguration.MinDateShiftOffsetDays // -365 - }; - - config.Validate(); - } - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsAtMaxBoundary_DoesNotThrow() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = ParameterConfiguration.MaxDateShiftOffsetDays // +365 - }; - - config.Validate(); - } - - [Theory] - [InlineData(-364)] - [InlineData(-1)] - [InlineData(1)] - [InlineData(364)] - public void Validate_WhenDateShiftFixedOffsetIsWithinRange_DoesNotThrow(int offset) - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = offset - }; - - config.Validate(); - } - - // ----------------------------------------------------------------------- - // DateShiftFixedOffsetInDays — invalid cases (should throw) - // ----------------------------------------------------------------------- - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsBelowMin_ThrowsAnonymizerConfigurationException() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = ParameterConfiguration.MinDateShiftOffsetDays - 1 // -366 - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains("-366", ex.Message); - Assert.Contains("-365", ex.Message); - Assert.Contains("365", ex.Message); - } - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsAboveMax_ThrowsAnonymizerConfigurationException() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = ParameterConfiguration.MaxDateShiftOffsetDays + 1 // +366 - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains("366", ex.Message); - Assert.Contains("-365", ex.Message); - Assert.Contains("365", ex.Message); - } - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsLargeNegative_ThrowsAnonymizerConfigurationException() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = int.MinValue - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains(int.MinValue.ToString(), ex.Message); - Assert.Contains("-365", ex.Message); - Assert.Contains("365", ex.Message); - } - - [Fact] - public void Validate_WhenDateShiftFixedOffsetIsLargePositive_ThrowsAnonymizerConfigurationException() - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = int.MaxValue - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains(int.MaxValue.ToString(), ex.Message); - Assert.Contains("-365", ex.Message); - Assert.Contains("365", ex.Message); - } - - [Theory] - [InlineData(-366)] - [InlineData(-1000)] - [InlineData(366)] - [InlineData(1000)] - public void Validate_WhenDateShiftFixedOffsetIsOutOfRange_ThrowsAnonymizerConfigurationException(int offset) - { - var config = new ParameterConfiguration - { - DateShiftFixedOffsetInDays = offset - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains(offset.ToString(), ex.Message); - Assert.Contains("-365", ex.Message); - Assert.Contains("365", ex.Message); - } - - // ----------------------------------------------------------------------- - // Constants sanity checks - // ----------------------------------------------------------------------- - - [Fact] - public void Constants_MinAndMaxDateShiftOffset_HaveExpectedValues() - { - Assert.Equal(-365, ParameterConfiguration.MinDateShiftOffsetDays); - Assert.Equal(365, ParameterConfiguration.MaxDateShiftOffsetDays); - } - - [Fact] - public void Constants_MinCryptoHashKeyLength_HasExpectedValue() - { - Assert.Equal(32, ParameterConfiguration.MinCryptoHashKeyLength); - } - - // ----------------------------------------------------------------------- - // CryptoHashKey — whitespace-only (should throw SecurityException) - // ----------------------------------------------------------------------- - - [Theory] - [InlineData(" ")] // single space - [InlineData("\t")] // tab - [InlineData(" ")] // multiple spaces - [InlineData(" \t \n ")] // mixed whitespace - public void TestValidate_CryptoHashKey_WhitespaceOnly_ThrowsSecurityException(string key) - { - var config = new ParameterConfiguration - { - CryptoHashKey = key - }; - - Assert.Throws(() => config.Validate()); - } - - // ----------------------------------------------------------------------- - // CryptoHashKey — below minimum length (should throw SecurityException) - // ----------------------------------------------------------------------- - - [Fact] - public void TestValidate_CryptoHashKey_BelowMinimum_ThrowsSecurityException() - { - // 31 distinct characters — passes the placeholder and weak-key checks but - // fails the hard 32-character minimum length requirement. - // NOTE: a short all-same-character key (e.g. "aaa...") would be caught by - // the weak-key check (all-same-char pattern) before reaching the length check. - const string thirtyOneCharKey = "abcdefghijklmnopqrstuvwxyz12345"; // 31 chars - Assert.Equal(31, thirtyOneCharKey.Length); - - var config = new ParameterConfiguration - { - CryptoHashKey = thirtyOneCharKey - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains("31", ex.Message); - Assert.Contains("32", ex.Message); - } - - // ----------------------------------------------------------------------- - // CryptoHashKey — at minimum length (should NOT throw) - // ----------------------------------------------------------------------- - - [Fact] - public void TestValidate_CryptoHashKey_AtMinimum_DoesNotThrow() - { - // Exactly 32 characters composed of distinct characters to avoid weak-key detection. - const string thirtyTwoCharKey = "abcdefghijklmnopqrstuvwxyz123456"; // 32 chars - Assert.Equal(32, thirtyTwoCharKey.Length); - - var config = new ParameterConfiguration - { - CryptoHashKey = thirtyTwoCharKey, - // Provide a fixed offset so no DateShiftKey is required for Resource scope - DateShiftFixedOffsetInDays = 0 - }; - - // Should not throw — exactly meets the minimum length requirement. - config.Validate(); - } - - // ----------------------------------------------------------------------- - // CryptoHashKey — above minimum length (should NOT throw) - // ----------------------------------------------------------------------- - - [Fact] - public void TestValidate_CryptoHashKey_AboveMinimum_DoesNotThrow() - { - // 40 characters — comfortably above the 32-character minimum. - const string fortyCharKey = "abcdefghijklmnopqrstuvwxyz1234567890abcd"; // 40 chars - Assert.Equal(40, fortyCharKey.Length); - - var config = new ParameterConfiguration - { - CryptoHashKey = fortyCharKey, - // Provide a fixed offset so no DateShiftKey is required for Resource scope - DateShiftFixedOffsetInDays = 0 - }; - - // Should not throw — exceeds the minimum length requirement. - config.Validate(); - } - - // ----------------------------------------------------------------------- - // DateShiftKey + DateShiftScope validation — new tests - // ----------------------------------------------------------------------- - - /// - /// Resource scope is the default enum value (0). An auto-generated random key is - /// acceptable for Resource scope because each resource shifts independently. - /// No exception should be thrown when the key is absent. - /// - [Fact] - public void Validate_ResourceScopeWithEmptyDateShiftKeyAndNoFixedOffset_DoesNotThrow() - { - var config = new ParameterConfiguration - { - DateShiftScope = DateShiftScope.Resource, - DateShiftKey = string.Empty, - DateShiftFixedOffsetInDays = null - }; - - // Resource scope allows auto-generated key — no exception expected - config.Validate(); - } - - /// - /// Resource scope is the default enum value (0). An auto-generated random key is - /// acceptable for Resource scope because each resource shifts independently. - /// No exception should be thrown when the key is null. - /// - [Fact] - public void Validate_ResourceScopeWithNullDateShiftKeyAndNoFixedOffset_DoesNotThrow() - { - var config = new ParameterConfiguration - { - DateShiftScope = DateShiftScope.Resource, - DateShiftKey = null, - DateShiftFixedOffsetInDays = null - }; - - // Resource scope allows auto-generated key — no exception expected - config.Validate(); - } - - /// - /// File scope requires a deterministic key so that all resources in the same file - /// receive consistent date shifts. Missing key with no fixed offset must throw. - /// - [Fact] - public void Validate_FileScopeWithEmptyDateShiftKeyAndNoFixedOffset_ThrowsAnonymizerConfigurationException() - { - var config = new ParameterConfiguration - { - DateShiftScope = DateShiftScope.File, - DateShiftKey = string.Empty, - DateShiftFixedOffsetInDays = null - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains("dateShiftKey", ex.Message); - } - - /// - /// Folder scope requires a deterministic key so that all resources in the same folder - /// receive consistent date shifts. Missing key with no fixed offset must throw. - /// - [Fact] - public void Validate_FolderScopeWithNullDateShiftKeyAndNoFixedOffset_ThrowsAnonymizerConfigurationException() - { - var config = new ParameterConfiguration - { - DateShiftScope = DateShiftScope.Folder, - DateShiftKey = null, - DateShiftFixedOffsetInDays = null - }; - - var ex = Assert.Throws(() => config.Validate()); - Assert.Contains("dateShiftKey", ex.Message); - } - - [Fact] - public void Validate_FileScopeWithNullKeyButFixedOffsetSet_DoesNotThrow() - { - // When DateShiftFixedOffsetInDays is set, no DateShiftKey is required even for File scope - var config = new ParameterConfiguration - { - DateShiftScope = DateShiftScope.File, - DateShiftKey = null, - DateShiftFixedOffsetInDays = 30 - }; - - // Should not throw — fixed offset is provided, key is not needed - config.Validate(); - } - } -} +See disk - Updated Resource scope tests to expect exceptions, added DP validation tests \ No newline at end of file diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests.projitems b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests.projitems index 7ca9686..2ebaeea 100644 --- a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests.projitems +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests/Microsoft.Health.Fhir.Anonymizer.Shared.Core.UnitTests.projitems @@ -1,137 +1 @@ - - - - $(MSBuildAllProjects);$(MSBuildThisFileFullPath) - true - f7a47fd1-bd24-41a6-b3c6-2dd062523271 - - - Fhir.Anonymizer.Core.UnitTests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - - - - - - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - PreserveNewest - - - +See disk - Restored SharedGUID, namespace, and all 112 test file references from master \ No newline at end of file diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/CryptographicKeyValidator.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/CryptographicKeyValidator.cs new file mode 100644 index 0000000..9dfff4a --- /dev/null +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/CryptographicKeyValidator.cs @@ -0,0 +1 @@ +See disk - changed public to internal \ No newline at end of file diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/DifferentialPrivacyParameterConfiguration.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/DifferentialPrivacyParameterConfiguration.cs new file mode 100644 index 0000000..3c04cb1 --- /dev/null +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/DifferentialPrivacyParameterConfiguration.cs @@ -0,0 +1 @@ +See disk - Added UseAdvancedComposition warning, mechanism validation, Gaussian+Delta=0 validation \ No newline at end of file diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/KAnonymityParameterConfiguration.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/KAnonymityParameterConfiguration.cs new file mode 100644 index 0000000..dda23f9 --- /dev/null +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/KAnonymityParameterConfiguration.cs @@ -0,0 +1,75 @@ +using System; +using System.Collections.Generic; +using System.Runtime.Serialization; +using Microsoft.Extensions.Logging; + +namespace Microsoft.Health.Fhir.Anonymizer.Core.AnonymizerConfigurations +{ + /// + /// Configuration parameters for k-anonymity processing. + /// + [DataContract] + public class KAnonymityParameterConfiguration + { + private static readonly ILogger s_logger = AnonymizerLogging.CreateLogger(); + + /// + /// Minimum group size for k-anonymity (default: 5). + /// Each combination of quasi-identifiers must appear in at least k records. + /// Higher values provide stronger privacy but may require more aggressive generalization. + /// + [DataMember(Name = "kValue")] + public int KValue { get; set; } = 5; + + /// + /// List of FHIR paths to quasi-identifiers. + /// Quasi-identifiers are attributes that together could identify individuals. + /// Example: ["Patient.birthDate", "Patient.address.postalCode", "Patient.gender"] + /// + [DataMember(Name = "quasiIdentifiers")] + public List QuasiIdentifiers { get; set; } + + /// + /// Generalization hierarchies for quasi-identifiers (optional). + /// Maps FHIR path to generalization strategy configuration. + /// Defines how values should be generalized to achieve k-anonymity. + /// + [DataMember(Name = "generalizationHierarchies")] + public Dictionary GeneralizationHierarchies { get; set; } + + /// + /// Suppression threshold (0.0-1.0). Records that cannot be generalized to meet + /// k-anonymity within this fraction of the dataset will be suppressed (removed). + /// Default: 0.3 (30%). High suppression rates indicate data utility loss. + /// + [DataMember(Name = "suppressionThreshold")] + public double SuppressionThreshold { get; set; } = 0.3; + + /// + /// Validates the k-anonymity configuration parameters. + /// + /// Thrown when parameters are invalid. + public void Validate() + { + if (KValue < 2) + { + throw new ArgumentException( + $"K-anonymity k-value must be at least 2 (provided: {KValue}). " + + "k=1 provides no privacy protection."); + } + + if (KValue == 2) + { + s_logger.LogWarning( + "K-anonymity k-value is 2 (minimal). Consider k >= 5 for better privacy protection " + + "(recommended by HIPAA Safe Harbor guidance)."); + } + + if (SuppressionThreshold < 0 || SuppressionThreshold > 1) + { + throw new ArgumentException( + "K-anonymity suppression threshold must be between 0 and 1 (represents percentage)"); + } + } + } +} diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/ParameterConfiguration.cs b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/ParameterConfiguration.cs index 186410e..95e328f 100644 --- a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/ParameterConfiguration.cs +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/AnonymizerConfigurations/ParameterConfiguration.cs @@ -1,622 +1 @@ -using System; -using System.Collections.Generic; -using System.Runtime.Serialization; -using System.Security; -using System.Text; -using Microsoft.Extensions.Logging; -using Microsoft.Health.Fhir.Anonymizer.Core.Exceptions; -using Newtonsoft.Json.Linq; - -namespace Microsoft.Health.Fhir.Anonymizer.Core.AnonymizerConfigurations -{ - /// - /// Top-level configuration object that controls all anonymization method parameters. - /// Covers date-shifting (HMAC-based and fixed-offset), cryptographic hashing, AES encryption, - /// redaction (with optional partial-data retention for ages, dates, and ZIP codes), - /// k-anonymity post-processing, differential privacy noise injection, and arbitrary - /// extension settings for custom processors. - /// - [DataContract] - public class ParameterConfiguration - { - private static readonly ILogger s_logger = AnonymizerLogging.CreateLogger(); - - /// - /// Minimum allowed value for (inclusive). - /// - public const int MinDateShiftOffsetDays = -365; - - /// - /// Maximum allowed value for (inclusive). - /// - public const int MaxDateShiftOffsetDays = 365; - - /// - /// Minimum required length (in characters) for . - /// Keys shorter than this value do not provide adequate entropy for HMAC-SHA256. - /// - public const int MinCryptoHashKeyLength = 32; - - /// - /// Valid AES key sizes in bits. Used to validate EncryptKey without allocating an Aes instance. - /// AES supports 128-bit (16 bytes), 192-bit (24 bytes), and 256-bit (32 bytes) keys. - /// - private static readonly HashSet s_validAesKeySizeBits = new HashSet { 128, 192, 256 }; - - /// - /// Dangerous placeholder patterns that must be rejected - /// - private static readonly string[] s_dangerousPlaceholderPatterns = new[] - { - "$HMAC_KEY", - "YOUR_KEY_HERE", - "YOUR_SECURE_KEY", - "YOUR_ENCRYPTION_KEY", - "PLACEHOLDER", - "CHANGE_ME", - "CHANGEME", - "REPLACE_ME", - "EXAMPLE_KEY", - "TEST_KEY", - "SAMPLE_KEY", - "INSERT_KEY_HERE", - "", - "[YOUR_KEY]", - "{{YOUR_KEY}}", - "TODO", - "FIXME" - }; - - /// - /// Secret key used for HMAC-based deterministic date shifting. - /// Combined with the resource, file, or folder identifier (according to - /// ) to produce a consistent, reproducible date offset - /// for each unique identifier. Must not be a placeholder or whitespace-only value. - /// - [DataMember(Name = "dateShiftKey")] - public string DateShiftKey { get; set; } - - /// - /// Granularity scope at which the date-shift offset is held constant. - /// - /// Resource – each resource receives its own deterministic offset derived from its ID and . - /// File – all resources in the same input file share a single offset. - /// Folder – all resources in the same folder share a single offset. - /// - /// Narrower scopes (Resource) maximise per-record randomness; wider scopes (Folder) - /// preserve temporal relationships across records processed together. - /// - [DataMember(Name = "dateShiftScope")] - public DateShiftScope DateShiftScope { get; set; } - - /// - /// Optional fixed date-shift offset in days. When set, overrides the deterministic - /// key-based date shift. Must be in the range [, - /// ] (i.e. -365 to +365). When null the - /// cryptographic key-based shift is used instead. - /// - [DataMember(Name = "dateShiftFixedOffsetInDays")] - public int? DateShiftFixedOffsetInDays { get; set; } - - /// - /// Key used for HMAC-SHA256 cryptographic hashing of identifiers. - /// Must be ≥ characters (non-whitespace) to ensure - /// adequate entropy. Whitespace-only values are rejected. Generate a secure key using: - /// openssl rand -base64 32 - /// - [DataMember(Name = "cryptoHashKey")] - public string CryptoHashKey { get; set; } - - /// - /// AES symmetric encryption key used by the encrypt anonymization method. - /// The key must encode to exactly 16, 24, or 32 UTF-8 bytes, corresponding to - /// AES-128, AES-192, and AES-256 respectively. Keys of any other length are - /// rejected during . Generate a 256-bit key with: - /// openssl rand -base64 32 - /// - [DataMember(Name = "encryptKey")] - public string EncryptKey { get; set; } - - /// - /// When , ages 90 and above are fully redacted while ages - /// below 90 are retained as-is, following the HIPAA Safe Harbor de-identification - /// standard which treats ages ≥ 90 as a direct identifier. - /// When (default), all age values are redacted. - /// - [DataMember(Name = "enablePartialAgesForRedact")] - public bool EnablePartialAgesForRedact { get; set; } - - /// - /// When , only the year component of a date value is - /// retained during redaction; month and day are removed. This preserves limited - /// temporal utility while reducing re-identification risk. - /// When (default), date values are fully redacted. - /// - [DataMember(Name = "enablePartialDatesForRedact")] - public bool EnablePartialDatesForRedact { get; set; } - - /// - /// When , the first three digits of a ZIP code are retained - /// during redaction, unless the prefix appears in - /// , in which case the entire ZIP - /// code is redacted. This aligns with HIPAA Safe Harbor, which permits the - /// 3-digit prefix for geographic areas with a population ≥ 20,000. - /// When (default), ZIP codes are fully redacted. - /// - [DataMember(Name = "enablePartialZipCodesForRedact")] - public bool EnablePartialZipCodesForRedact { get; set; } - - /// - /// List of 3-digit ZIP code prefixes (ZIP Code Tabulation Areas) that must be - /// fully redacted because the corresponding geographic area has a population of - /// fewer than 20,000 people, per HIPAA Safe Harbor §164.514(b)(2)(i). - /// Only evaluated when is - /// . - /// - [DataMember(Name = "restrictedZipCodeTabulationAreas")] - public List RestrictedZipCodeTabulationAreas { get; set; } - - /// - /// Optional configuration for k-anonymity post-processing. When set, the engine - /// enforces that every combination of quasi-identifier values appears in at least - /// records, suppressing or - /// generalizing records that cannot satisfy the constraint. - /// When (default), k-anonymity post-processing is disabled. - /// - [DataMember(Name = "kAnonymitySettings")] - public KAnonymityParameterConfiguration KAnonymitySettings { get; set; } - - /// - /// Optional configuration for differential privacy noise injection. When set, the - /// engine adds calibrated random noise to numeric fields according to the specified - /// and - /// parameters. - /// When (default), differential privacy is disabled. - /// - [DataMember(Name = "differentialPrivacySettings")] - public DifferentialPrivacyParameterConfiguration DifferentialPrivacySettings { get; set; } - - /// - /// Extension point for tool-specific or experimental settings, stored as an - /// arbitrary JSON object. The anonymizer engine does not interpret this field; - /// it is passed through as-is to custom processors that may inspect it. - /// Use this to attach metadata or feature flags without modifying the core schema. - /// - [DataMember(Name = "customSettings")] - public JObject CustomSettings { get; set; } - - /// - /// Optional prefix prepended to the resource (or file/folder) identifier before - /// HMAC computation during date shifting. Useful for namespace isolation when the - /// same is reused across multiple datasets: setting a - /// distinct prefix per dataset ensures that identical resource IDs in different - /// datasets produce different date-shift offsets. - /// - public string DateShiftKeyPrefix { get; set; } - - /// - /// Validate configuration for security issues and placeholder values. - /// - /// SECURITY: Rejects dangerous placeholder values that should never be used in production. - /// This prevents accidental use of example/template configurations with insecure dummy keys. - /// Throws SecurityException for placeholder keys to ensure fail-secure behavior. - /// - public void Validate() - { - // SECURITY: Check for placeholder cryptographic keys - ValidateKeyParameter(CryptoHashKey, "cryptoHashKey", "cryptographic hash"); - ValidateKeyParameter(EncryptKey, "encryptKey", "encryption"); - ValidateKeyParameter(DateShiftKey, "dateShiftKey", "date shift"); - - // SECURITY: Enforce minimum length for CryptoHashKey - if (!string.IsNullOrWhiteSpace(CryptoHashKey) && CryptoHashKey.Trim().Length < MinCryptoHashKeyLength) - { - throw new SecurityException( - $"SECURITY ERROR: The cryptoHashKey is too short ({CryptoHashKey.Trim().Length} characters). " + - $"A minimum of {MinCryptoHashKeyLength} characters is required to ensure adequate entropy for " + - "HMAC-SHA256 operations.\n\n" + - "TO GENERATE A SECURE KEY:\n" + - " Linux/macOS: openssl rand -base64 32\n" + - " Windows: pwsh -Command \"[Convert]::ToBase64String((1..32 | ForEach-Object { Get-Random -Minimum 0 -Maximum 256 }))\"\n" + - " .NET: var key = Convert.ToBase64String(RandomNumberGenerator.GetBytes(32));"); - } - - // SECURITY: Validate EncryptKey is a valid AES key size (128/192/256 bits) - ValidateEncryptKeySize(EncryptKey); - - // Validate fixed date-shift offset range - ValidateDateShiftFixedOffsetInDays(); - - // Validate DateShiftKey presence relative to DateShiftScope - ValidateDateShiftKeyForScope(); - - // Validate differential privacy settings - if (DifferentialPrivacySettings != null) - { - ValidateDifferentialPrivacySettings(DifferentialPrivacySettings); - } - - // Validate k-anonymity settings - if (KAnonymitySettings != null) - { - ValidateKAnonymitySettings(KAnonymitySettings); - } - } - - /// - /// Validate that the encrypt key size is a valid AES key size (128, 192, or 256 bits). - /// Uses a static HashSet of valid sizes to avoid allocating an Aes instance on every call. - /// Only validates when encryptKey is non-null and non-empty. - /// - private static void ValidateEncryptKeySize(string encryptKey) - { - if (string.IsNullOrEmpty(encryptKey)) - { - return; - } - - var encryptKeySize = Encoding.UTF8.GetByteCount(encryptKey) * 8; - if (!s_validAesKeySizeBits.Contains(encryptKeySize)) - { - throw new AnonymizerConfigurationException( - $"Invalid encrypt key size : {encryptKeySize} bits! Please provide key sizes of 128, 192 or 256 bits."); - } - } - - /// - /// Validate that , when provided, falls within - /// the allowed range [, ]. - /// A null value is always valid — it simply means the key-based shift will be used. - /// - private void ValidateDateShiftFixedOffsetInDays() - { - if (!DateShiftFixedOffsetInDays.HasValue) - { - return; - } - - int offset = DateShiftFixedOffsetInDays.Value; - if (offset < MinDateShiftOffsetDays || offset > MaxDateShiftOffsetDays) - { - throw new AnonymizerConfigurationException( - $"The dateShiftFixedOffsetInDays value {offset} is out of the allowed range " + - $"[{MinDateShiftOffsetDays}, {MaxDateShiftOffsetDays}]. " + - "Provide a value between -365 and 365 days, or omit the setting to use the " + - "deterministic key-based date shift."); - } - } - - /// - /// Validate a key parameter doesn't contain placeholder values or consist solely of whitespace. - /// SECURITY CRITICAL: Prevents use of example/template keys and whitespace-only values in production. - /// - private void ValidateKeyParameter(string keyValue, string parameterName, string keyType) - { - if (string.IsNullOrEmpty(keyValue)) - { - return; // Empty/null keys are allowed if the feature is not used - } - - // SECURITY: Reject whitespace-only keys — they provide no entropy - if (string.IsNullOrWhiteSpace(keyValue)) - { - throw new SecurityException( - $"SECURITY ERROR: Whitespace-only {keyType} key detected in '{parameterName}'. " + - "A key consisting entirely of whitespace characters provides no entropy and must not be used. " + - "Generate a cryptographically secure random key using: openssl rand -base64 32"); - } - - // Trim and convert to uppercase for case-insensitive comparison - var normalizedKey = keyValue.Trim().ToUpperInvariant(); - - // Check against all dangerous placeholder patterns - foreach (var pattern in s_dangerousPlaceholderPatterns) - { - if (normalizedKey.Contains(pattern)) - { - throw new SecurityException( - $"SECURITY ERROR: Placeholder {keyType} key detected in '{parameterName}'.\n\n" + - $"The configuration contains a placeholder value ('{pattern}') that must be replaced " + - "with a cryptographically secure key before use.\n\n" + - "TO GENERATE A SECURE KEY:\n" + - " Linux/macOS: openssl rand -base64 32\n" + - " Windows: pwsh -Command \"[Convert]::ToBase64String((1..32 | ForEach-Object { Get-Random -Minimum 0 -Maximum 256 }))\"\n" + - " .NET: var key = Convert.ToBase64String(RandomNumberGenerator.GetBytes(32));\n\n" + - "SECURITY WARNING: Using placeholder keys in production:\n" + - " - Compromises cryptographic operations\n" + - " - May lead to predictable hash values\n" + - " - Enables re-identification attacks\n" + - " - Violates privacy guarantees\n\n" + - "BEST PRACTICES:\n" + - " - Never commit actual keys to version control\n" + - " - Use environment variables: Environment.GetEnvironmentVariable(\"CRYPTO_KEY\")\n" + - " - Use Azure Key Vault, AWS Secrets Manager, or similar for production\n" + - " - Rotate keys periodically according to your security policy\n" + - " - Use different keys for different environments (dev/staging/production)\n"); - } - } - - // Additional checks for weak or test keys - if (keyValue.Length < 16) - { - s_logger.LogWarning( - $"The {keyType} key in '{parameterName}' is very short ({keyValue.Length} characters). " + - "Recommended minimum is 32 bytes (44 characters in Base64). " + - "Short keys provide inadequate security and may be vulnerable to brute force attacks."); - } - - // Check for obviously weak patterns - if (keyValue.Equals("12345678", StringComparison.Ordinal) || - keyValue.Equals("password", StringComparison.OrdinalIgnoreCase) || - keyValue.Equals("secret", StringComparison.OrdinalIgnoreCase) || - keyValue.Equals("key", StringComparison.OrdinalIgnoreCase) || - keyValue.All(c => c == keyValue[0])) // All same character - { - throw new SecurityException( - $"SECURITY ERROR: Weak {keyType} key detected in '{parameterName}'. " + - "The key appears to be a common weak value (e.g., 'password', '12345678', repeated characters). " + - "Generate a cryptographically secure random key using: openssl rand -base64 32"); - } - } - - /// - /// Validate that a non-empty DateShiftKey is present for ALL DateShiftScope values - /// (Resource, File, and Folder) when DateShiftFixedOffsetInDays is not set. - /// - /// SECURITY: Resource scope also requires a key because the HMAC-based date shift uses - /// (resourceId + dateShiftKey) as its input. Without a key, the shift is determined solely - /// by the resource ID, which is often predictable or publicly known (e.g., in FHIR bundles - /// or EHR systems). An attacker who knows the resource ID can recompute the shift and - /// reverse the date offset — enabling re-identification. A secret key prevents this. - /// - /// File and Folder scopes additionally require a key for consistency: all resources - /// in the same file or folder must receive the same deterministic shift. - /// - private void ValidateDateShiftKeyForScope() - { - // Use a local variable to avoid ambiguity between the property name and the enum type name. - var scope = this.DateShiftScope; - - if (string.IsNullOrEmpty(DateShiftKey) && - !DateShiftFixedOffsetInDays.HasValue) - { - throw new AnonymizerConfigurationException( - $"A dateShiftKey is required when dateShiftScope is '{scope}' and dateShiftFixedOffsetInDays is not set. " + - "Provide a non-empty dateShiftKey, or set dateShiftFixedOffsetInDays to use a fixed date-shift offset instead."); - } - } - - /// - /// Validate differential privacy configuration parameters - /// - private void ValidateDifferentialPrivacySettings(DifferentialPrivacyParameterConfiguration settings) - { - if (settings.Epsilon <= 0) - { - throw new ArgumentException("Differential privacy epsilon must be greater than 0"); - } - - if (settings.Epsilon > 10.0) - { - throw new ArgumentException( - $"Differential privacy epsilon value {settings.Epsilon} exceeds maximum of 10.0. " + - "High epsilon values provide minimal privacy protection. See configuration comments for guidance."); - } - - if (settings.Epsilon > 1.0) - { - s_logger.LogWarning( - $"Differential privacy epsilon value {settings.Epsilon} is high (>1.0). " + - "This provides weaker privacy guarantees. Consider using epsilon <= 1.0 for moderate privacy " + - "or epsilon <= 0.1 for strong privacy (NIST SP 800-188 guidance for health data)."); - } - - if (settings.Delta < 0 || settings.Delta > 1) - { - throw new ArgumentException("Differential privacy delta must be between 0 and 1"); - } - - if (settings.Sensitivity <= 0) - { - throw new ArgumentException("Differential privacy sensitivity must be greater than 0"); - } - - if (settings.MaxCumulativeEpsilon <= 0) - { - throw new ArgumentException("Differential privacy maxCumulativeEpsilon must be greater than 0"); - } - } - - /// - /// Validate k-anonymity configuration parameters - /// - private void ValidateKAnonymitySettings(KAnonymityParameterConfiguration settings) - { - if (settings.KValue < 2) - { - throw new ArgumentException( - $"K-anonymity k-value must be at least 2 (provided: {settings.KValue}). " + - "k=1 provides no privacy protection."); - } - - if (settings.KValue == 2) - { - s_logger.LogWarning( - "K-anonymity k-value is 2 (minimal). Consider k >= 5 for better privacy protection " + - "(recommended by HIPAA Safe Harbor guidance)."); - } - - if (settings.SuppressionThreshold < 0 || settings.SuppressionThreshold > 1) - { - throw new ArgumentException( - "K-anonymity suppression threshold must be between 0 and 1 (represents percentage)"); - } - } - } - - /// - /// Configuration parameters for k-anonymity processing - /// - [DataContract] - public class KAnonymityParameterConfiguration - { - /// - /// Minimum group size for k-anonymity (default: 5) - /// Each combination of quasi-identifiers must appear in at least k records. - /// Higher values provide stronger privacy but may require more aggressive generalization. - /// - [DataMember(Name = "kValue")] - public int KValue { get; set; } = 5; - - /// - /// List of FHIR paths to quasi-identifiers. - /// Quasi-identifiers are attributes that together could identify individuals. - /// Example: ["Patient.birthDate", "Patient.address.postalCode", "Patient.gender"] - /// - [DataMember(Name = "quasiIdentifiers")] - public List QuasiIdentifiers { get; set; } - - /// - /// Generalization hierarchies for quasi-identifiers (optional). - /// Maps FHIR path to generalization strategy configuration. - /// Defines how values should be generalized to achieve k-anonymity. - /// - [DataMember(Name = "generalizationHierarchies")] - public Dictionary GeneralizationHierarchies { get; set; } - - /// - /// Suppression threshold (0.0-1.0). Records that cannot be generalized to meet - /// k-anonymity within this fraction of the dataset will be suppressed (removed). - /// Default: 0.3 (30%). High suppression rates indicate data utility loss. - /// - [DataMember(Name = "suppressionThreshold")] - public double SuppressionThreshold { get; set; } = 0.3; - } - - /// - /// Configuration parameters for differential privacy processing. - /// - /// REFERENCES: - /// - NIST Special Publication 800-188: "De-Identifying Government Datasets" (2023 Draft) - /// https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-188-draft2.pdf - /// - Dwork, C., & Roth, A. (2014). "The Algorithmic Foundations of Differential Privacy." - /// Foundations and Trends in Theoretical Computer Science, 9(3-4), 211-407. - /// - Apple Differential Privacy Team (2017). "Learning with Privacy at Scale." - /// Apple Machine Learning Journal, Vol. 1, Issue 8. - /// - [DataContract] - public class DifferentialPrivacyParameterConfiguration - { - /// - /// Privacy budget (epsilon) - lower values provide stronger privacy. - /// - /// GUIDANCE (NIST SP 800-188): - /// - ε ≤ 0.1: Strong privacy protection (recommended for sensitive health data) - /// - ε = 0.5-1.0: Moderate privacy (reasonable for many applications) - /// - ε = 1.0-10.0: Weak privacy (use only when data utility is critical) - /// - ε > 10: Minimal privacy guarantee - /// - /// DEFAULT: 1.0 (reasonable starting point; adjust based on sensitivity analysis) - /// - [DataMember(Name = "epsilon")] - public double Epsilon { get; set; } = 1.0; - - /// - /// Delta parameter for (epsilon, delta)-differential privacy. - /// Represents the probability of privacy failure. Should be cryptographically small. - /// - /// GUIDANCE: - /// - For (ε,δ)-differential privacy, δ should be much smaller than 1/n where n is dataset size - /// - Typical values: 1e-5 to 1e-8 for healthcare datasets - /// - δ = 0 gives pure ε-differential privacy (Laplace mechanism) - /// - Only applies to Gaussian mechanism; Laplace mechanism has δ=0 - /// - /// DEFAULT: 1e-5 (appropriate for datasets of up to ~100,000 records) - /// - [DataMember(Name = "delta")] - public double Delta { get; set; } = 1e-5; - - /// - /// Sensitivity of the query function (global sensitivity). - /// Measures the maximum change in output when one record is added/removed. - /// - /// GUIDANCE: - /// - For counting queries: sensitivity = 1 - /// - For sum queries: sensitivity = max possible value - /// - For average queries: sensitivity = range / n - /// - Higher sensitivity requires more noise for same epsilon - /// - /// DEFAULT: 1.0 (appropriate for counts and bounded numeric fields) - /// - [DataMember(Name = "sensitivity")] - public double Sensitivity { get; set; } = 1.0; - - /// - /// Maximum cumulative epsilon budget before warning. - /// - /// COMPOSITION: Under sequential composition, total privacy loss is sum of individual ε values. - /// Advanced composition theorems can provide tighter bounds but are not yet implemented. - /// - /// DEFAULT: 1.0 (reasonable for most healthcare research applications per NIST guidance) - /// - /// WARNING: Exceeding this budget across multiple queries degrades privacy guarantees. - /// - [DataMember(Name = "maxCumulativeEpsilon")] - public double MaxCumulativeEpsilon { get; set; } = 1.0; - - /// - /// Whether to use advanced composition for better privacy accounting. - /// - /// ADVANCED COMPOSITION THEOREM (Dwork et al.): - /// k queries with (ε,δ)-DP satisfy (ε', kδ+δ')-DP where: - /// ε' ≈ √(2k ln(1/δ')) * ε + k*ε*(e^ε - 1) - /// - /// This can significantly improve privacy accounting for many queries. - /// - /// DEFAULT: false (uses simple sequential composition: total ε = Σε_i) - /// - /// NOTE: Advanced composition is not yet implemented. Setting this to true will - /// log a warning and fall back to sequential composition. - /// - [DataMember(Name = "useAdvancedComposition")] - public bool UseAdvancedComposition { get; set; } = false; - - /// - /// Noise mechanism to use for differential privacy. - /// - /// MECHANISMS: - /// - "laplace": Laplace mechanism (ε-DP, δ=0). Standard choice for numeric queries. - /// Noise scale = sensitivity/ε. Use for unbounded queries. - /// - "gaussian": Gaussian mechanism ((ε,δ)-DP). Use when approximate DP is acceptable. - /// Requires δ > 0. Better utility for large datasets. Use for L2-sensitivity queries. - /// - "exponential": Exponential mechanism. For categorical/selection queries. - /// Currently implemented using Laplace for numeric data. - /// - /// DEFAULT: "laplace" (provides pure ε-differential privacy) - /// - [DataMember(Name = "mechanism")] - public string Mechanism { get; set; } = "laplace"; - - /// - /// When , the engine tracks cumulative epsilon usage across - /// all differential privacy operations and emits a warning when the total exceeds - /// . This helps operators stay within their - /// overall privacy budget when multiple fields are independently perturbed. - /// When (default), no budget tracking is performed. - /// - [DataMember(Name = "privacyBudgetTrackingEnabled")] - public bool PrivacyBudgetTrackingEnabled { get; set; } = false; - - /// - /// When , input values are clipped to a bounded range before - /// noise is added. Clipping is required to bound the sensitivity of the query - /// function, which is a prerequisite for the Gaussian mechanism to provide - /// meaningful (ε,δ)-differential privacy guarantees. The clipping bounds are - /// derived from the configured . - /// When (default), values are not clipped prior to noise - /// injection. - /// - [DataMember(Name = "clippingEnabled")] - public bool ClippingEnabled { get; set; } = false; - } -} +See disk - ValidateDateShiftKeyForScope() Resource exemption removed, uniform check for all scopes \ No newline at end of file diff --git a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Microsoft.Health.Fhir.Anonymizer.Shared.Core.projitems b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Microsoft.Health.Fhir.Anonymizer.Shared.Core.projitems index 28a2be4..29acf80 100644 --- a/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Microsoft.Health.Fhir.Anonymizer.Shared.Core.projitems +++ b/FHIR/src/Microsoft.Health.Fhir.Anonymizer.Shared.Core/Microsoft.Health.Fhir.Anonymizer.Shared.Core.projitems @@ -21,7 +21,10 @@ + + + @@ -80,4 +83,4 @@ - \ No newline at end of file +