diff --git a/src/checkmk_kube_agent/api.py b/src/checkmk_kube_agent/api.py index c6602be..17d32f1 100644 --- a/src/checkmk_kube_agent/api.py +++ b/src/checkmk_kube_agent/api.py @@ -297,6 +297,40 @@ def update_container_metrics( app.state.node_collector_metadata_queue.put(metrics.metadata) for metric in metrics.container_metrics: app.state.container_metric_queue.put(metric) + + # Log cache statistics + cache_size = app.state.container_metric_queue.size() + cache_utilization = app.state.container_metric_queue.utilization() + cache_maxsize = app.state.container_metric_queue.maxsize + + # Log at appropriate level based on utilization + if cache_utilization >= 95.0: + LOGGER.critical( + "Container metrics cache CRITICAL: received=%d, cache_size=%d/%d (%.1f%% full) - " + "Cache is nearly full! Metrics are being evicted. Increase --cache-maxsize urgently.", + len(metrics.container_metrics), + cache_size, + cache_maxsize, + cache_utilization, + ) + elif cache_utilization >= 80.0: + LOGGER.error( + "Container metrics cache WARNING: received=%d, cache_size=%d/%d (%.1f%% full) - " + "Cache utilization high. Consider increasing --cache-maxsize.", + len(metrics.container_metrics), + cache_size, + cache_maxsize, + cache_utilization, + ) + else: + LOGGER.debug( + "Container metrics updated: received=%d, cache_size=%d/%d (%.1f%% full)", + len(metrics.container_metrics), + cache_size, + cache_maxsize, + cache_utilization, + ) + @app.get("/container_metrics") @@ -425,6 +459,13 @@ def main(argv: Optional[Sequence[str]] = None) -> None: """Cluster collector API main function: start API""" args = parse_arguments(argv or sys.argv[1:]) + # Configure application logging + logging.basicConfig( + level=args.log_level.upper(), + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], + ) + _init_app_state( app, cache_maxsize=args.cache_maxsize, diff --git a/src/checkmk_kube_agent/dedup_ttl_cache.py b/src/checkmk_kube_agent/dedup_ttl_cache.py index 507a531..f334ac2 100644 --- a/src/checkmk_kube_agent/dedup_ttl_cache.py +++ b/src/checkmk_kube_agent/dedup_ttl_cache.py @@ -86,3 +86,13 @@ def get_all(self) -> Sequence[V]: """Get all entries from the TTL cache.""" with self.__lock: return list(self.values()) + + def size(self) -> int: + """Get the current number of entries in the cache.""" + with self.__lock: + return len(self) + + def utilization(self) -> float: + """Get the cache utilization as a percentage (0.0 to 100.0).""" + with self.__lock: + return (len(self) / self.maxsize) * 100.0 diff --git a/test_cache_size.py b/test_cache_size.py new file mode 100644 index 0000000..e942bff --- /dev/null +++ b/test_cache_size.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Simple test to verify DedupTTLCache size() and utilization() methods work correctly.""" + +import sys +sys.path.insert(0, 'src') + +from checkmk_kube_agent.dedup_ttl_cache import DedupTTLCache + +# Create a small cache for testing +cache = DedupTTLCache( + key=lambda x: x[0], + maxsize=10, + ttl=60 +) + +print("Testing DedupTTLCache size and utilization methods:") +print("=" * 50) + +# Test 1: Empty cache +print(f"\n1. Empty cache:") +print(f" Size: {cache.size()}") +print(f" Utilization: {cache.utilization():.1f}%") +assert cache.size() == 0 +assert cache.utilization() == 0.0 + +# Test 2: Add some entries +print(f"\n2. After adding 3 entries:") +cache.put(("foo", "bar")) +cache.put(("baz", "qux")) +cache.put(("hello", "world")) +print(f" Size: {cache.size()}") +print(f" Utilization: {cache.utilization():.1f}%") +assert cache.size() == 3 +assert cache.utilization() == 30.0 + +# Test 3: Add duplicate (should not increase size) +print(f"\n3. After adding duplicate 'foo':") +cache.put(("foo", "updated")) +print(f" Size: {cache.size()}") +print(f" Utilization: {cache.utilization():.1f}%") +assert cache.size() == 3 +assert cache.utilization() == 30.0 + +# Test 4: Fill cache to capacity +print(f"\n4. After filling to capacity (10 entries):") +for i in range(7): + cache.put((f"key{i}", f"value{i}")) +print(f" Size: {cache.size()}") +print(f" Utilization: {cache.utilization():.1f}%") +assert cache.size() == 10 +assert cache.utilization() == 100.0 + +# Test 5: Exceed capacity (oldest should be evicted) +print(f"\n5. After exceeding capacity:") +cache.put(("new", "entry")) +print(f" Size: {cache.size()}") +print(f" Utilization: {cache.utilization():.1f}%") +assert cache.size() == 10 # Should still be 10 +assert cache.utilization() == 100.0 + +print("\n" + "=" * 50) +print("✅ All tests passed!") +print("\nThese methods can now be used in the API to log cache statistics.")