Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/checkmk_kube_agent/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,40 @@ def update_container_metrics(
app.state.node_collector_metadata_queue.put(metrics.metadata)
for metric in metrics.container_metrics:
app.state.container_metric_queue.put(metric)

# Log cache statistics
cache_size = app.state.container_metric_queue.size()
cache_utilization = app.state.container_metric_queue.utilization()
cache_maxsize = app.state.container_metric_queue.maxsize

# Log at appropriate level based on utilization
if cache_utilization >= 95.0:
LOGGER.critical(
"Container metrics cache CRITICAL: received=%d, cache_size=%d/%d (%.1f%% full) - "
"Cache is nearly full! Metrics are being evicted. Increase --cache-maxsize urgently.",
len(metrics.container_metrics),
cache_size,
cache_maxsize,
cache_utilization,
)
elif cache_utilization >= 80.0:
LOGGER.error(
"Container metrics cache WARNING: received=%d, cache_size=%d/%d (%.1f%% full) - "
"Cache utilization high. Consider increasing --cache-maxsize.",
len(metrics.container_metrics),
cache_size,
cache_maxsize,
cache_utilization,
)
else:
LOGGER.debug(
"Container metrics updated: received=%d, cache_size=%d/%d (%.1f%% full)",
len(metrics.container_metrics),
cache_size,
cache_maxsize,
cache_utilization,
)



@app.get("/container_metrics")
Expand Down Expand Up @@ -425,6 +459,13 @@ def main(argv: Optional[Sequence[str]] = None) -> None:
"""Cluster collector API main function: start API"""
args = parse_arguments(argv or sys.argv[1:])

# Configure application logging
logging.basicConfig(
level=args.log_level.upper(),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)

_init_app_state(
app,
cache_maxsize=args.cache_maxsize,
Expand Down
10 changes: 10 additions & 0 deletions src/checkmk_kube_agent/dedup_ttl_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,13 @@ def get_all(self) -> Sequence[V]:
"""Get all entries from the TTL cache."""
with self.__lock:
return list(self.values())

def size(self) -> int:
"""Get the current number of entries in the cache."""
with self.__lock:
return len(self)

def utilization(self) -> float:
"""Get the cache utilization as a percentage (0.0 to 100.0)."""
with self.__lock:
return (len(self) / self.maxsize) * 100.0
63 changes: 63 additions & 0 deletions test_cache_size.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python3
"""Simple test to verify DedupTTLCache size() and utilization() methods work correctly."""

import sys
sys.path.insert(0, 'src')

from checkmk_kube_agent.dedup_ttl_cache import DedupTTLCache

# Create a small cache for testing
cache = DedupTTLCache(
key=lambda x: x[0],
maxsize=10,
ttl=60
)

print("Testing DedupTTLCache size and utilization methods:")
print("=" * 50)

# Test 1: Empty cache
print(f"\n1. Empty cache:")
print(f" Size: {cache.size()}")
print(f" Utilization: {cache.utilization():.1f}%")
assert cache.size() == 0
assert cache.utilization() == 0.0

# Test 2: Add some entries
print(f"\n2. After adding 3 entries:")
cache.put(("foo", "bar"))
cache.put(("baz", "qux"))
cache.put(("hello", "world"))
print(f" Size: {cache.size()}")
print(f" Utilization: {cache.utilization():.1f}%")
assert cache.size() == 3
assert cache.utilization() == 30.0

# Test 3: Add duplicate (should not increase size)
print(f"\n3. After adding duplicate 'foo':")
cache.put(("foo", "updated"))
print(f" Size: {cache.size()}")
print(f" Utilization: {cache.utilization():.1f}%")
assert cache.size() == 3
assert cache.utilization() == 30.0

# Test 4: Fill cache to capacity
print(f"\n4. After filling to capacity (10 entries):")
for i in range(7):
cache.put((f"key{i}", f"value{i}"))
print(f" Size: {cache.size()}")
print(f" Utilization: {cache.utilization():.1f}%")
assert cache.size() == 10
assert cache.utilization() == 100.0

# Test 5: Exceed capacity (oldest should be evicted)
print(f"\n5. After exceeding capacity:")
cache.put(("new", "entry"))
print(f" Size: {cache.size()}")
print(f" Utilization: {cache.utilization():.1f}%")
assert cache.size() == 10 # Should still be 10
assert cache.utilization() == 100.0

print("\n" + "=" * 50)
print("✅ All tests passed!")
print("\nThese methods can now be used in the API to log cache statistics.")
Loading