sample more special cases (#340)

kat-statsig · web-flow · commit 692c50afd9e6 · 2024-10-10T08:54:47.000-07:00
diff --git a/statsig/spec_store.py b/statsig/spec_store.py
@@ -1,6 +1,7 @@
 import json
 import threading
 from concurrent.futures import wait, ThreadPoolExecutor
+from enum import Enum
 from typing import List, Optional, Dict, Set, Tuple
 
 from . import globals
@@ -15,6 +16,12 @@
 from .utils import djb2_hash
 
 
+class EntityType(Enum):
+    GATE = "feature_gates"
+    CONFIG = "dynamic_configs"
+    LAYER = "layer_configs"
+
+
 class _SpecStore:
     _background_download_configs: Optional[threading.Thread]
     _background_download_id_lists: Optional[threading.Thread]
@@ -202,9 +209,9 @@ def parse_target_value_map_from_spec(spec, parsed):
                             rule["conditions"][i]["fast_target_value"][str(val)] = True
 
         self.unsupported_configs.clear()
-        new_gates = get_parsed_specs("feature_gates")
-        new_configs = get_parsed_specs("dynamic_configs")
-        new_layers = get_parsed_specs("layer_configs")
+        new_gates = get_parsed_specs(EntityType.GATE.value)
+        new_configs = get_parsed_specs(EntityType.CONFIG.value)
+        new_layers = get_parsed_specs(EntityType.LAYER.value)
 
         new_experiment_to_layer = {}
         layers_dict = specs_json.get("layers", {})
@@ -353,7 +360,8 @@ def _get_initialize_strategy(self) -> List[DataSource]:
                 strategies.insert(0, DataSource.DATASTORE)
             if self._options.bootstrap_values:
                 if data_store is not None:
-                    globals.logger.debug("data_store gets priority over bootstrap_values. bootstrap_values will be ignored")
+                    globals.logger.debug(
+                        "data_store gets priority over bootstrap_values. bootstrap_values will be ignored")
                 else:
                     strategies.insert(0, DataSource.BOOTSTRAP)
             if self._options.fallback_to_statsig_api:
diff --git a/statsig/statsig_logger.py b/statsig/statsig_logger.py
@@ -101,7 +101,7 @@ def log_gate_exposure(
         if is_manual_exposure:
             event.metadata["isManualExposure"] = "true"
         if sampling_rate is not None:
-            event.statsigMetadata = {"samplingRate": sampling_rate}
+            event.statsigMetadata["samplingRate"] = sampling_rate
         if shadow_logged is not None:
             event.statsigMetadata["shadowLogged"] = shadow_logged
         if sampling_mode is not None:
@@ -138,7 +138,7 @@ def log_config_exposure(
         if is_manual_exposure:
             event.metadata["isManualExposure"] = "true"
         if sampling_rate is not None:
-            event.statsigMetadata = {"samplingRate": sampling_rate}
+            event.statsigMetadata["samplingRate"] = sampling_rate
         if shadow_logged is not None:
             event.statsigMetadata["shadowLogged"] = shadow_logged
         if sampling_mode is not None:
@@ -158,6 +158,9 @@ def log_layer_exposure(
             parameter_name: str,
             config_evaluation: _ConfigEvaluation,
             is_manual_exposure=False,
+            sampling_rate=None,
+            shadow_logged=None,
+            sampling_mode=None,
     ):
         event = StatsigEvent(user, _LAYER_EXPOSURE_EVENT)
 
@@ -178,8 +181,15 @@ def log_layer_exposure(
         if not self._is_unique_exposure(user, _LAYER_EXPOSURE_EVENT, metadata):
             return
         event.metadata = metadata
+        event.statsigMetadata = {}
         if is_manual_exposure:
             event.metadata["isManualExposure"] = "true"
+        if sampling_rate is not None:
+            event.statsigMetadata["samplingRate"] = sampling_rate
+        if shadow_logged is not None:
+            event.statsigMetadata["shadowLogged"] = shadow_logged
+        if sampling_mode is not None:
+            event.statsigMetadata["samplingMode"] = sampling_mode
 
         event._secondary_exposures = [] if exposures is None else exposures
 
diff --git a/statsig/statsig_server.py b/statsig/statsig_server.py
@@ -10,7 +10,7 @@
 from .feature_gate import FeatureGate
 from .layer import Layer
 from .sdk_configs import _SDK_Configs
-from .spec_store import _SpecStore
+from .spec_store import _SpecStore, EntityType
 from .statsig_error_boundary import _StatsigErrorBoundary
 from .statsig_errors import StatsigNameError, StatsigRuntimeError, StatsigValueError
 from .statsig_event import StatsigEvent
@@ -21,7 +21,7 @@
 from .statsig_user import StatsigUser
 from .ttl_set import TTLSet
 from .utils import HashingAlgorithm, compute_dedupe_key_for_gate, is_hash_in_sampling_rate, \
-    compute_dedupe_key_for_config
+    compute_dedupe_key_for_config, compute_dedupe_key_for_layer
 
 RULESETS_SYNC_INTERVAL = 10
 IDLISTS_SYNC_INTERVAL = 60
@@ -278,9 +278,14 @@ def task():
             result = self._evaluator.get_layer(normal_user, layer_name)
 
             def log_func(layer: Layer, parameter_name: str):
-                if log_exposure:
+                should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling(
+                    EntityType.LAYER, layer_name, result, user, parameter_name)
+
+                if log_exposure and should_log:
                     self._logger.log_layer_exposure(
-                        normal_user, layer, parameter_name, result
+                        normal_user, layer, parameter_name, result, sampling_rate=logged_sampling_rate,
+                        shadow_logged=shadow_logged,
+                        sampling_mode=_SDK_Configs.get_config_str_value("sampling_mode")
                     )
 
             layer = Layer._create(
@@ -490,7 +495,8 @@ def _verify_bg_threads_running(self):
     def __check_gate(self, user: StatsigUser, gate_name: str, log_exposure=True):
         user = self.__normalize_user(user)
         result = self._evaluator.check_gate(user, gate_name)
-        should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling("GATE", gate_name, result, user)
+        should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling(EntityType.GATE, gate_name, result,
+                                                                                    user)
 
         if log_exposure and should_log:
             self._logger.log_gate_exposure(
@@ -511,7 +517,7 @@ def __get_config(self, user: StatsigUser, config_name: str, log_exposure=True):
 
         result = self._evaluator.get_config(user, config_name)
         result.user = user
-        should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling("CONFIG", config_name,
+        should_log, logged_sampling_rate, shadow_logged = self.__determine_sampling(EntityType.CONFIG, config_name,
                                                                                     result, user)
 
         if log_exposure and should_log:
@@ -527,47 +533,53 @@ def __get_config(self, user: StatsigUser, config_name: str, log_exposure=True):
             )
         return result
 
-    def __determine_sampling(self, type: str, name: str, result: _ConfigEvaluation,
-                             user: StatsigUser) -> Tuple[
+    def __determine_sampling(self, type: EntityType, name: str, result: _ConfigEvaluation, user: StatsigUser,
+                             param_name="") -> Tuple[
         bool, Optional[int], Optional[str]]:  # should_log, logged_sampling_rate, shadow_logged
         try:
             shadow_should_log, logged_sampling_rate = True, None
             env = self._options.get_sdk_environment_tier()
             sampling_mode = _SDK_Configs.get_config_str_value("sampling_mode")
-            default_rule_id_sampling_rate = _SDK_Configs.get_config_int_value("default_rule_id_sampling_rate")
+            special_case_sampling_rate = _SDK_Configs.get_config_int_value("special_case_sampling_rate")
 
             if sampling_mode is None or sampling_mode == "none" or env != "production":
-                return True, None, None
+                return True, None, "logged"
 
-            if result.rule_id == "default" and result.forward_all_exposures:
-                return True, None, None
+            if result.forward_all_exposures:
+                return True, None, "logged"
 
             samplingSetKey = f"{name}_{result.rule_id}"
             if not self._sampling_key_set.contains(samplingSetKey):
                 self._sampling_key_set.add(samplingSetKey)
-                return True, None, None
+                return True, None, "logged"
 
             if result.sample_rate is not None:
                 exposure_key = ""
-                if type == "GATE":
+                if type == EntityType.GATE:
                     exposure_key = compute_dedupe_key_for_gate(name, result.rule_id, result.boolean_value,
                                                                user.user_id, user.custom_ids)
-                elif type == "CONFIG":
+                elif type == EntityType.CONFIG:
                     exposure_key = compute_dedupe_key_for_config(name, result.rule_id, user.user_id, user.custom_ids)
+                elif type == EntityType.LAYER:
+                    exposure_key = compute_dedupe_key_for_layer(name, result.allocated_experiment, param_name,
+                                                                result.rule_id,
+                                                                user.user_id, user.custom_ids)
                 shadow_should_log = is_hash_in_sampling_rate(exposure_key, result.sample_rate)
                 logged_sampling_rate = result.sample_rate
 
-            if default_rule_id_sampling_rate is not None and result.rule_id == "default":
-                shadow_should_log = is_hash_in_sampling_rate(name, default_rule_id_sampling_rate)
-                logged_sampling_rate = default_rule_id_sampling_rate
+            special_case_rules = ["disabled", "default", ""]
+
+            if result.rule_id in special_case_rules and special_case_sampling_rate is not None:
+                shadow_should_log = is_hash_in_sampling_rate(name, special_case_sampling_rate)
+                logged_sampling_rate = special_case_sampling_rate
 
+            shadow_logged = None if result.sample_rate is None else "logged" if shadow_should_log else "dropped"
             if sampling_mode == "on":
-                return shadow_should_log, logged_sampling_rate, None
+                return shadow_should_log, logged_sampling_rate, shadow_logged
             if sampling_mode == "shadow":
-                shadow_logged = None if result.sample_rate is None else "logged" if shadow_should_log else "dropped"
                 return True, logged_sampling_rate, shadow_logged
 
-            return True, None, None
+            return True, None, "logged"
         except Exception as e:
             self._errorBoundary.log_exception("__determine_sampling", e, log_mode="debug")
             return True, None, None
diff --git a/testdata/download_config_specs_sampling.json b/testdata/download_config_specs_sampling.json
@@ -48,27 +48,24 @@
       },
       "rules": [
         {
-          "name": "1kNmlB23wylPFZi1M0Divl",
-          "groupName": "statsig email",
+          "name": "33qGYzVZr1MchRe4Ncj6MO",
           "passPercentage": 100,
           "conditions": [
             {
-              "type": "user_field",
-              "targetValue": [
-                "@statsig.com"
-              ],
-              "operator": "str_contains_any",
-              "field": "email",
-              "additionalValues": {}
+              "type": "public",
+              "targetValue": null,
+              "operator": null,
+              "field": null,
+              "additionalValues": {},
+              "isDeviceBased": false,
+              "idType": "userID"
             }
           ],
-          "returnValue": {
-            "number": 7,
-            "string": "statsig",
-            "boolean": false
-          },
-          "id": "1kNmlB23wylPFZi1M0Divl",
-          "salt": "f2ac6975-174d-497e-be7f-599fea626132",
+          "returnValue": {},
+          "id": "33qGYzVZr1MchRe4Ncj6MO",
+          "salt": "55a3430e-b239-4941-8208-951f5a9f8496",
+          "isDeviceBased": false,
+          "idType": "userID",
           "samplingRate": 101
         }
       ]
@@ -1307,14 +1304,31 @@
       ]
     }
   ],
-  "layer_configs": [],
+  "layers": {
+    "not_allocated_layer": []
+  },
+  "layer_configs": [
+    {
+      "name": "not_allocated_layer",
+      "type": "dynamic_config",
+      "salt": "b39af118-3f2c-4645-a4e4-7f7c96225ecc",
+      "enabled": true,
+      "defaultValue": {
+        "param": "ello"
+      },
+      "rules": [],
+      "isDeviceBased": false,
+      "idType": "userID",
+      "entity": "layer"
+    }
+  ],
   "has_updates": true,
   "time": 1631638014811,
   "id_lists": {
     "list_1": true,
     "list_2": true
   },
   "sdk_configs": {
-    "default_sampling_rate": 101
+    "special_case_sampling_rate": 101
   }
 }
diff --git a/tests/test_sampling.py b/tests/test_sampling.py