From a9f1203b777545e233e8be00a523864c5722155a Mon Sep 17 00:00:00 2001 From: Keshav Krishna Date: Mon, 24 Jun 2024 04:02:21 +0530 Subject: [PATCH] first commit --- .gitignore | 1 + cache.py | 133 +++++++++++++++++++++++++++++++++++++++ cache_factory.py | 21 +++++++ cache_metrics.py | 40 ++++++++++++ demo.py | 147 +++++++++++++++++++++++++++++++++++++++++++ eviction_policies.py | 58 +++++++++++++++++ readme.md | 90 ++++++++++++++++++++++++++ 7 files changed, 490 insertions(+) create mode 100644 .gitignore create mode 100644 cache.py create mode 100644 cache_factory.py create mode 100644 cache_metrics.py create mode 100644 demo.py create mode 100644 eviction_policies.py create mode 100644 readme.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/cache.py b/cache.py new file mode 100644 index 0000000..d2410f3 --- /dev/null +++ b/cache.py @@ -0,0 +1,133 @@ +from abc import ABC, abstractmethod +from typing import Generic, TypeVar, Optional, Dict, Any +from threading import Lock +from time import time +from eviction_policies import EvictionPolicy +from cache_metrics import CacheMetrics + +K = TypeVar('K') +V = TypeVar('V') + +class CacheItem: + def __init__(self, value: Any, expiry: Optional[float] = None): + self.value = value + self.expiry = expiry + +class Cache(ABC, Generic[K, V]): + @abstractmethod + def put(self, key: K, value: V, ttl: Optional[int] = None) -> None: + pass + + @abstractmethod + def get(self, key: K) -> V: + pass + + @abstractmethod + def remove(self, key: K) -> None: + pass + + @abstractmethod + def get_metrics(self) -> Dict[str, Any]: + pass + +class SegmentedCache(Cache[K, V]): + def __init__(self, capacity_per_segment: int, eviction_policy_class: EvictionPolicy[K], num_segments: int = 16): + self.capacity_per_segment = capacity_per_segment + self.eviction_policy_class = eviction_policy_class + self.segments = [{} for _ in range(num_segments)] # Each segment is a dictionary + self.eviction_policies = [eviction_policy_class() for _ in range(num_segments)] # Instantiate policy for each segment + self.locks = [Lock() for _ in range(num_segments)] # One lock per segment + self.metrics = CacheMetrics() + self.num_segments = num_segments + self.global_lock = Lock() + + def _get_segment(self, key: K) -> int: + return hash(key) % self.num_segments + + def put(self, key: K, value: V, ttl: Optional[int] = None) -> None: + segment_index = self._get_segment(key) + with self.locks[segment_index]: + self._remove_expired_items(segment_index) + if key in self.segments[segment_index]: + self.eviction_policies[segment_index].remove(key) + elif len(self.segments[segment_index]) >= self.capacity_per_segment: + self._evict_item(segment_index) + + expiry = time() + ttl if ttl is not None else None + self.segments[segment_index][key] = CacheItem(value, expiry) + self.eviction_policies[segment_index].add(key) + + def get(self, key: K) -> V: + segment_index = self._get_segment(key) + with self.locks[segment_index]: + self._remove_expired_items(segment_index) + if key not in self.segments[segment_index]: + self.metrics.record_miss() + raise KeyError(f"Key '{key}' not found in cache") + + item = self.segments[segment_index][key] + if item.expiry is not None and item.expiry <= time(): + del self.segments[segment_index][key] + self.eviction_policies[segment_index].remove(key) + self.metrics.record_expiration() + self.metrics.record_miss() + raise KeyError(f"Key '{key}' has expired") + + self.eviction_policies[segment_index].remove(key) + self.eviction_policies[segment_index].add(key) + self.metrics.record_hit() + return item.value + + def remove(self, key: K) -> None: + segment_index = self._get_segment(key) + with self.locks[segment_index]: + if key in self.segments[segment_index]: + del self.segments[segment_index][key] + self.eviction_policies[segment_index].remove(key) + + def _remove_expired_items(self, segment_index: int) -> None: + current_time = time() + expired_keys = [k for k, v in self.segments[segment_index].items() if v.expiry is not None and v.expiry <= current_time] + for key in expired_keys: + del self.segments[segment_index][key] + self.eviction_policies[segment_index].remove(key) + self.metrics.record_expiration() + + def _evict_item(self, segment_index: int) -> None: + evicted_key = self.eviction_policies[segment_index].evict() + del self.segments[segment_index][evicted_key] + self.metrics.record_eviction() + + def get_metrics(self) -> Dict[str, Any]: + return self.metrics.get_metrics() + + def resize_segments(self, new_num_segments: int) -> None: + if new_num_segments <= 0: + raise ValueError("Number of segments must be positive") + + acquired_locks = [] + with self.global_lock: + current_num_segments = self.num_segments + for lock in self.locks: + lock.acquire() + acquired_locks.append(lock) + + try: + if new_num_segments > current_num_segments: + self.segments.extend([{} for _ in range(new_num_segments - current_num_segments)]) + self.eviction_policies.extend([self.eviction_policy_class() for _ in range(new_num_segments - current_num_segments)]) + self.locks.extend([Lock() for _ in range(new_num_segments - current_num_segments)]) + elif new_num_segments < current_num_segments: + self.segments = self.segments[:new_num_segments] + self.eviction_policies = self.eviction_policies[:new_num_segments] + self.locks = self.locks[:new_num_segments] + + self.num_segments = new_num_segments + print(f"Segments resized to {new_num_segments}") + + finally: + for lock in acquired_locks: + lock.release() + + def __str__(self) -> str: + return f"SegmentedCache with {self.num_segments} segments, capacity per segment: {self.capacity_per_segment}" diff --git a/cache_factory.py b/cache_factory.py new file mode 100644 index 0000000..ba740fd --- /dev/null +++ b/cache_factory.py @@ -0,0 +1,21 @@ +from typing import Type +from cache import SegmentedCache +from eviction_policies import EvictionPolicy, FIFOEvictionPolicy, LRUEvictionPolicy, LIFOEvictionPolicy + +class CacheFactory: + _cache_types = { + "FIFO": FIFOEvictionPolicy, + "LRU": LRUEvictionPolicy, + "LIFO": LIFOEvictionPolicy + } + + @classmethod + def register_cache_type(cls, cache_type: str, policy_class: Type[EvictionPolicy]): + cls._cache_types[cache_type] = policy_class + + @classmethod + def create_cache(cls, cache_type: str, capacity: int, num_segments: int = 16): + policy_class = cls._cache_types.get(cache_type) + if policy_class is None: + raise ValueError(f"Unsupported cache type: {cache_type}") + return SegmentedCache(capacity, policy_class, num_segments) \ No newline at end of file diff --git a/cache_metrics.py b/cache_metrics.py new file mode 100644 index 0000000..e2f92cf --- /dev/null +++ b/cache_metrics.py @@ -0,0 +1,40 @@ +from threading import Lock + +class CacheMetrics: + def __init__(self): + self.hits = 0 + self.misses = 0 + self.total_requests = 0 + self.evictions = 0 + self.expirations = 0 + self.lock = Lock() + + def record_hit(self): + with self.lock: + self.hits += 1 + self.total_requests += 1 + + def record_miss(self): + with self.lock: + self.misses += 1 + self.total_requests += 1 + + def record_eviction(self): + with self.lock: + self.evictions += 1 + + def record_expiration(self): + with self.lock: + self.expirations += 1 + + def get_metrics(self): + with self.lock: + return { + "hits": self.hits, + "misses": self.misses, + "total_requests": self.total_requests, + "evictions": self.evictions, + "expirations": self.expirations, + "hit_ratio": self.hits / self.total_requests if self.total_requests > 0 else 0, + "miss_ratio": self.misses / self.total_requests if self.total_requests > 0 else 0 + } \ No newline at end of file diff --git a/demo.py b/demo.py new file mode 100644 index 0000000..258699a --- /dev/null +++ b/demo.py @@ -0,0 +1,147 @@ +import threading +import time +from cache_factory import CacheFactory +from eviction_policies import EvictionPolicy +from typing import Generic, TypeVar, Dict +from collections import defaultdict + +K = TypeVar('K') + +# declared this class to implement LFU (least frequently used ) eviction policy +class LFUEvictionPolicy(EvictionPolicy, Generic[K]): + def __init__(self): + self.key_frequency: Dict[K, int] = defaultdict(int) + self.frequency_keys: Dict[int, set[K]] = defaultdict(set) + self.min_frequency = 0 + + def add(self, key: K) -> None: + if key not in self.key_frequency: + self.key_frequency[key] = 1 + self.frequency_keys[1].add(key) + self.min_frequency = 1 + else: + self._increment_frequency(key) + + def remove(self, key: K) -> None: + if key in self.key_frequency: + freq = self.key_frequency[key] + self.frequency_keys[freq].remove(key) + if len(self.frequency_keys[freq]) == 0: + del self.frequency_keys[freq] + if freq == self.min_frequency: + self.min_frequency += 1 + del self.key_frequency[key] + + def evict(self) -> K: + if not self.key_frequency: + raise ValueError("No keys to evict") + + key_to_evict = next(iter(self.frequency_keys[self.min_frequency])) + self.remove(key_to_evict) + return key_to_evict + + def _increment_frequency(self, key: K) -> None: + freq = self.key_frequency[key] + self.key_frequency[key] = freq + 1 + self.frequency_keys[freq].remove(key) + if len(self.frequency_keys[freq]) == 0: + del self.frequency_keys[freq] + if freq == self.min_frequency: + self.min_frequency += 1 + self.frequency_keys[freq + 1].add(key) + + + + +def test_cache(cache, thread): + # Put some items in the cache + for i in range(4): + cache.put(f"key{i}", f"value{i}") + print(f"{thread} Added key{i}") + + # Access some items multiple times to increase their frequency + for _ in range(3): + cache.get("key0") + for _ in range(2): + cache.get("key1") + + print(f"\n {thread} Current cache state:") + for i in range(4): + try: + print(f" {thread} key{i}: {cache.get(f'key{i}')}") + except KeyError: + print(f" {thread} key{i}: NOt found") + + + + for i in range(4, 16): + cache.put(f"key{i}", f"value{i}") + print(f"{thread} Added key{i}") + + print(f"\n {thread} Afteer adding more items:") + for i in range(16): + try: + print(f" {thread} key{i}: {cache.get(f'key{i}')}") + except KeyError: + print(f" {thread} key{i}: Not found") + + # Resize the segment, add one more segment + cache.resize_segments(cache.num_segments + 1) + + + print(f"\n {thread} After ading one mre dsegment:") + for i in range(6): + try: + print(f" {thread} key{i}: {cache.get(f'key{i}')}") + except KeyError: + print(f" {thread} key{i}: Not found") + + # Demonstrate TTL functionality + cache.put("ttl_key", "ttl_value", ttl=2) + print(f"\n {thread} After adding ttl_key:") + print(f"{thread} ttl_key: {cache.get('ttl_key')}") + + time.sleep(3) + + print(f"\n {thread} After waiting for TTL expiristion:") + try: + print(f"{thread} ttl_key: {cache.get('ttl_key')}") + except KeyError: + print(f"{thread} ttl_key: expired") + + + # Print cache metrics + print(f"\n {thread} Cache Metrics:") + print(cache.get_metrics()) + + + +def create_test_cache(thread, cache_type, capacity, num_segments): + cache = CacheFactory.create_cache(cache_type, capacity=capacity, num_segments=num_segments) + test_cache(cache, thread) + +def main(): + threads = [] + + # Create a thread for each cache operation + threads.append(threading.Thread(target=create_test_cache, args=("Thread1->", "FIFO", 3, 4))) + threads.append(threading.Thread(target=create_test_cache, args=("Thread2->", "LRU", 3, 2))) + threads.append(threading.Thread(target=create_test_cache, args=("Thread3->", "LIFO", 3, 3))) + + # have implemented the custom class LFUEvictionPolicy to add custom Eviction policy, + # it will basically replicate Least frequently Used policy to evict + CacheFactory.register_cache_type("LFU", LFUEvictionPolicy) + threads.append(threading.Thread(target=create_test_cache, args=("Thread4->", "LFU", 3, 4))) + + # Start all threads + for thread in threads: + thread.start() + + # Wait till all the threads are completed + for thread in threads: + thread.join() + + + +if __name__ == "__main__": + main() diff --git a/eviction_policies.py b/eviction_policies.py new file mode 100644 index 0000000..d7dbf45 --- /dev/null +++ b/eviction_policies.py @@ -0,0 +1,58 @@ +from abc import ABC, abstractmethod +from typing import Generic, TypeVar +from collections import OrderedDict + +K = TypeVar('K') + +class EvictionPolicy(ABC, Generic[K]): + @abstractmethod + def add(self, key: K) -> None: + pass + + @abstractmethod + def remove(self, key: K) -> None: + pass + + @abstractmethod + def evict(self) -> K: + pass + +class FIFOEvictionPolicy(EvictionPolicy[K]): + def __init__(self): + self.queue = [] + + def add(self, key: K) -> None: + self.queue.append(key) + + def remove(self, key: K) -> None: + self.queue.remove(key) + + def evict(self) -> K: + return self.queue.pop(0) + +class LRUEvictionPolicy(EvictionPolicy[K]): + def __init__(self): + self.order = OrderedDict() + + def add(self, key: K) -> None: + self.order[key] = None + + def remove(self, key: K) -> None: + if key in self.order: + del self.order[key] + + def evict(self) -> K: + return self.order.popitem(last=False)[0] + +class LIFOEvictionPolicy(EvictionPolicy[K]): + def __init__(self): + self.stack = [] + + def add(self, key: K) -> None: + self.stack.append(key) + + def remove(self, key: K) -> None: + self.stack.remove(key) + + def evict(self) -> K: + return self.stack.pop() \ No newline at end of file diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..8c223ae --- /dev/null +++ b/readme.md @@ -0,0 +1,90 @@ +# In-Memory Caching + +#### Problem Statement +Design and Implement an in-memory caching library for general use +##### Must Have +- Support for multiple Standard Eviction Policies ( FIFO, LRU, LIFO ) +- Support to add custom eviction policies +##### Good To Have +- Thread saftey + +## Structure of the Project + +The project consists of the following main components: +1. **eviction_policies.py**: It contains EvictionPolicy interface and standard eviction policy implementations (FIFO, LRU, LIFO). +2. **cache_metrics.py**: Implements metrics tracking for cache operations. +3. **cache.py**: Defines the core cache classes including CacheItem, Cache, and SegmentedCache. +4. **cache_factory.py**: Provides a CacheFactory class for creating instances of SegmentedCache with different eviction policies. +5. **demo.py**: This script demonstrates concurrent usage of your caching library with different eviction policies across multiple threads. It showcases basic cache operations, eviction handling, TTL functionality, and resizing of cache segments. Each thread operates independently on its own cache instance, demonstrating thread safety in accessing and manipulating the cache data structures. + +## Assumptions +- All cache operations are performed in-memory. The cache is not designed to be persistent; all data is stored in memory and will be lost if the application terminates. +- To implement Custom eviction policy, user will create its own custom class implementing EvictionPolicy interface. +- Cache entries can have an optional TTL, after which they expire and are evicted from the cache. +- Expiry is checked during retrieval and put operations, but there is no background thread continuously purging expired items + +## Approach + +The library adopts an object-oriented approach with the following key design patterns: +- **Strategy Pattern**: Used in EvictionPolicy to define a family of interchangeable eviction algorithms. +- **Template Method Pattern**: Implemented in FIFOEvictionPolicy, LRUEvictionPolicy, and LIFOEvictionPolic to provide a skeleton of eviction algorithms with specific details left to subclasses. +- **Composite Pattern**: The Composite Pattern allows individual objects and compositions of objects to be treated uniformly. In the case of SegmentedCache, each segment can be considered an individual cache, and the entire SegmentedCache is a composition of these individual caches. +- **Factory Method Pattern**: Implemented in CacheFactory to encapsulate the creation of SegmentedCache instances based on client requirements. + + +# Class Overview + +#### CacheItem +- **Attributes**: value, expiry +- **Methods**: None +- **Design Pattern**: None +- **Purpose**: Represents an item in the cache with value and optional expiration time. + +#### CacheMetrics +- **Attributes**: hits, misses, total_requests, evictions, expirations, lock +- **Methods**: record_hit(), record_miss(), record_eviction(), record_expiration(), get_metrics() +- **Design Pattern**: None (uses Locking pattern) +- **Purpose**: Tracks and records cache performance metrics ensuring thread safety. + +#### EvictionPolicy (Abstract Class) +- **Attributes**: None (abstract base class) +- **Methods**: add(key), remove(key), evict() +- **Design Pattern**: Strategy Pattern +- **Purpose**: Defines a common interface for different eviction policies. + +#### FIFOEvictionPolicy, LRUEvictionPolicy, LIFOEvictionPolicy +- **Methods**: add(key), remove(key), evict() +- **Design Pattern**: Template Method Pattern (provides a skeleton of an algorithm). +- **Explanation**: Implements specific eviction policies using template methods for common eviction steps while leveraging subclass-specific data structures. +#### SegmentedCache +- **Attributes**: capacity_per_segment, eviction_policy_class, segments, eviction_policies, locks, metrics, num_segments, global_lock +- **Methods**: put(key, value, ttl), get(key), remove(key), _remove_expired_items(segment_index), _evict_item(segment_index), get_metrics(), resize_segments(new_num_segments) +- **Design Pattern**: Composite Pattern +- **Purpose**: Manages multiple cache segments with individual eviction policies ensuring thread-safe operations. + +#### CacheFactory +- **Attributes**: _cache_types +- **Methods**: register_cache_type(cache_type, policy_class), create_cache(cache_type, capacity, num_segments) +- **Design Pattern**: Factory Method Pattern +- **Purpose**: Creates instances of SegmentedCache with specified eviction policies and configurations. + + +### Supporting Standard Eviction Policies + +Standard eviction policies (FIFO, LRU, LIFO) are supported through dedicated classes FIFOEvictionPolicy, LRUEvictionPolicy, LIFOEvictionPolicy that encapsulate the specific eviction algorithms and data structures. + +### Supporting Custom Eviction Policy + +Custom eviction policies can be added by implementing the EvictionPolicy interface. The CacheFactory allows registration of custom eviction policies, enabling users to create SegmentedCache instances with these custom policies. For example we have mplement LFU (least frequently used ) eviction policy. + +### Providing Thread Safety + +Thread safety is ensured through the use of locks (Lock objects) in critical sections of cache operations within SegmentedCache and CacheMetrics. This prevents data races and maintains the integrity of cache operations in concurrent execution scenarios. + +### Demo +To see the demo follow below steps: +- Clone the repo +- Navigate to the repo directory +- run this command `python3 demo.py`. + +