Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
497f300
Merge pull request #17 from vaisu-bhut/datadog
vaisu-bhut Dec 15, 2025
66199ae
Trying with datadog agent to enable all features
vaisu-bhut Dec 17, 2025
fa8aa51
Merge pull request #21 from vaisu-bhut/dd_agent
vaisu-bhut Dec 17, 2025
81a1d03
Package configs
vaisu-bhut Dec 17, 2025
209ff85
Merge pull request #22 from vaisu-bhut/dd_agent
vaisu-bhut Dec 17, 2025
c5d068b
I see Logs correlation and infrastructure management configured.. Som…
vaisu-bhut Dec 18, 2025
929cb6d
Datadog is raining fire with its services!!
vaisu-bhut Dec 18, 2025
0074b33
Merge pull request #23 from vaisu-bhut/dd_agent
vaisu-bhut Dec 18, 2025
12d8c9e
Fixing previous otel config
vaisu-bhut Dec 18, 2025
4d03be3
Merge pull request #24 from vaisu-bhut/dd_agent
vaisu-bhut Dec 18, 2025
bcd4355
Debugging silent failures
vaisu-bhut Dec 18, 2025
e361166
Merge pull request #25 from vaisu-bhut/dd_agent
vaisu-bhut Dec 18, 2025
287ae0e
Implementing lazy import
vaisu-bhut Dec 18, 2025
fb984ec
Merge pull request #26 from vaisu-bhut/dd_agent
vaisu-bhut Dec 18, 2025
fa007b5
Model Mesh
vaisu-bhut Dec 20, 2025
3ed1678
Merge pull request #27 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
4941288
Detailed debugging
vaisu-bhut Dec 20, 2025
4029aa4
Merge pull request #28 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
7c515ab
Setting failure
vaisu-bhut Dec 20, 2025
45f9567
Merge pull request #29 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
09a86bc
Security Debugging
vaisu-bhut Dec 20, 2025
dbd83df
Merge pull request #30 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
b6f7adc
Security Debugging
vaisu-bhut Dec 20, 2025
ff8857d
Merge pull request #31 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
f025c88
Security Debugging
vaisu-bhut Dec 20, 2025
1990911
Merge pull request #32 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
210816e
Security Debugging
vaisu-bhut Dec 20, 2025
b70f3b4
Merge pull request #33 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
d3e718a
Security Debugging
vaisu-bhut Dec 20, 2025
cc2ca4c
Merge pull request #34 from vaisu-bhut/dd_agent
vaisu-bhut Dec 20, 2025
6147b3f
GenAI
vaisu-bhut Dec 21, 2025
78efbbb
Merge pull request #35 from vaisu-bhut/dd_agent
vaisu-bhut Dec 21, 2025
4558cf6
GenAi error fixed, AgentCore is ready
vaisu-bhut Dec 21, 2025
478d88d
Merge pull request #36 from vaisu-bhut/dd_agent
vaisu-bhut Dec 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 107 additions & 33 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,28 @@ services:
- "8000:8000"
environment:
- DATABASE_URL=postgresql+asyncpg://user:password@db:5432/clestiq_shield
- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
- OTEL_SERVICE_NAME=clestiq-shield-gateway
- DD_ENV=development
- DD_SERVICE=clestiq-shield-gateway
- DD_VERSION=1.0.0
- DD_AGENT_HOST=datadog-agent
- DD_TRACE_AGENT_PORT=8126
- DD_DOGSTATSD_SOCKET=/var/run/datadog/dsd.socket
- DD_LOGS_INJECTION=true
- DD_PROFILING_ENABLED=true
- DD_RUNTIME_METRICS_ENABLED=true
- SENTINEL_SERVICE_URL=http://sentinel:8001
- TELEMETRY_ENABLED=true
depends_on:
- db
- sentinel
- otel-collector
- datadog-agent
volumes:
- ./services/gateway/app:/app/app
- dogstatsd-socket:/var/run/datadog
labels:
com.datadoghq.tags.service: "clestiq-shield-gateway"
com.datadoghq.tags.env: "development"
com.datadoghq.tags.version: "1.0.0"
networks:
- clestiq-network
healthcheck:
Expand All @@ -38,16 +50,28 @@ services:
- "8003"
environment:
- DATABASE_URL=postgresql+asyncpg://user:password@db:5432/clestiq_shield
- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
- OTEL_SERVICE_NAME=clestiq-shield-eagle-eye
- DD_ENV=development
- DD_SERVICE=clestiq-shield-eagle-eye
- DD_VERSION=1.0.0
- DD_AGENT_HOST=datadog-agent
- DD_TRACE_AGENT_PORT=8126
- DD_DOGSTATSD_SOCKET=/var/run/datadog/dsd.socket
- DD_LOGS_INJECTION=true
- DD_PROFILING_ENABLED=true
- DD_RUNTIME_METRICS_ENABLED=true
- TELEMETRY_ENABLED=true
- SECRET_KEY=change_this_to_a_strong_secret_key
- ALGORITHM=HS256
depends_on:
- db
- otel-collector
- datadog-agent
volumes:
- ./services/eagle-eye/app:/app/app
- dogstatsd-socket:/var/run/datadog
labels:
com.datadoghq.tags.service: "clestiq-shield-eagle-eye"
com.datadoghq.tags.env: "development"
com.datadoghq.tags.version: "1.0.0"
networks:
- clestiq-network
healthcheck:
Expand All @@ -65,11 +89,16 @@ services:
expose:
- "8001"
environment:
- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
- OTEL_SERVICE_NAME=clestiq-shield-sentinel
- GCP_PROJECT_ID=${GCP_PROJECT_ID}
- GCP_LOCATION=${GCP_LOCATION:-us-east1}
- GOOGLE_APPLICATION_CREDENTIALS=/app/gcp-credentials.json
- DD_ENV=development
- DD_SERVICE=clestiq-shield-sentinel
- DD_VERSION=1.0.0
- DD_AGENT_HOST=datadog-agent
- DD_TRACE_AGENT_PORT=8126
- DD_DOGSTATSD_SOCKET=/var/run/datadog/dsd.socket
- DD_LOGS_INJECTION=true
- DD_PROFILING_ENABLED=true
- DD_RUNTIME_METRICS_ENABLED=true
- GEMINI_API_KEY=${GEMINI_API_KEY}
- TELEMETRY_ENABLED=true
# Security Settings
- SECURITY_SANITIZATION_ENABLED=true
Expand All @@ -82,16 +111,20 @@ services:
- TOON_CONVERSION_ENABLED=true
# LLM Settings
- LLM_FORWARD_ENABLED=true
- LLM_MODEL_NAME=gemini-2.0-flash
- LLM_MODEL_NAME=gemini-2.5-flash
- LLM_MAX_TOKENS=8192
# Guardian Service URL
- GUARDIAN_SERVICE_URL=http://guardian:8002
depends_on:
- otel-collector
- datadog-agent
- guardian
volumes:
- ./services/security-agent/app:/app/app
- ./gcp-credentials.json:/app/gcp-credentials.json:ro
- dogstatsd-socket:/var/run/datadog
labels:
com.datadoghq.tags.service: "clestiq-shield-sentinel"
com.datadoghq.tags.env: "development"
com.datadoghq.tags.version: "1.0.0"
networks:
- clestiq-network
healthcheck:
Expand All @@ -109,11 +142,16 @@ services:
expose:
- "8002"
environment:
- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318
- OTEL_SERVICE_NAME=clestiq-shield-guardian
- GCP_PROJECT_ID=${GCP_PROJECT_ID}
- GCP_LOCATION=${GCP_LOCATION:-us-east1}
- GOOGLE_APPLICATION_CREDENTIALS=/app/gcp-credentials.json
- DD_ENV=development
- DD_SERVICE=clestiq-shield-guardian
- DD_VERSION=1.0.0
- DD_AGENT_HOST=datadog-agent
- DD_TRACE_AGENT_PORT=8126
- DD_DOGSTATSD_SOCKET=/var/run/datadog/dsd.socket
- DD_LOGS_INJECTION=true
- DD_PROFILING_ENABLED=true
- DD_RUNTIME_METRICS_ENABLED=true
- GEMINI_API_KEY=${GEMINI_API_KEY}
- TELEMETRY_ENABLED=true
# Moderation Settings
- DEFAULT_MODERATION_MODE=moderate
Expand All @@ -122,10 +160,14 @@ services:
- OUTPUT_PII_DETECTION_ENABLED=true
- AUTO_CONVERT_TOON_TO_JSON=true
depends_on:
- otel-collector
- datadog-agent
volumes:
- ./services/guardian/app:/app/app
- ./gcp-credentials.json:/app/gcp-credentials.json:ro
- dogstatsd-socket:/var/run/datadog
labels:
com.datadoghq.tags.service: "clestiq-shield-guardian"
com.datadoghq.tags.env: "development"
com.datadoghq.tags.version: "1.0.0"
networks:
- clestiq-network
healthcheck:
Expand Down Expand Up @@ -154,25 +196,56 @@ services:
timeout: 5s
retries: 5

# OpenTelemetry Collector
otel-collector:
image: otel/opentelemetry-collector-contrib:0.91.0
command: ["--config=/etc/otel-collector-config.yaml"]
# Datadog Agent
datadog-agent:
image: gcr.io/datadoghq/agent:latest
environment:
- DD_API_KEY=${DD_API_KEY}
- DD_SITE=${DD_SITE:-datadoghq.com}
- DEPLOYMENT_ENV=development
volumes:
- ./services/otel-collector/config.yaml:/etc/otel-collector-config.yaml:ro
- DD_ENV=development
- DD_APM_ENABLED=true
- DD_APM_NON_LOCAL_TRAFFIC=true
- DD_LOGS_ENABLED=true
- DD_LOGS_CONFIG_CONTAINER_COLLECT_ALL=true
- DD_LOGS_CONFIG_AUTO_MULTI_LINE_DETECTION=true
- DD_CONTAINER_EXCLUDE="name:datadog-agent"
- DD_PROFILING_ENABLED=true
- DD_RUNTIME_METRICS_ENABLED=true
- DD_DOGSTATSD_SOCKET=/var/run/datadog/dsd.socket
- DD_DOGSTATSD_NON_LOCAL_TRAFFIC=false
# Universal Service Monitoring
- DD_SYSTEM_PROBE_ENABLED=true
- DD_SYSTEM_PROBE_SERVICE_MONITORING_ENABLED=true
# Profiling intake
- DD_PROFILING_AGENTLESS=false
# Service Monitoring
- DD_SERVICE_MONITORING_ENABLED=true
- DD_PROCESS_CONFIG_PROCESS_COLLECTION_ENABLED=true
ports:
- "4317:4317"
- "4318:4318"
- "8888:8888"
- "13133:13133"
- "8126:8126" # APM traces
- "8125:8125/udp" # DogStatsD metrics (fallback)
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- /proc/:/host/proc/:ro
- /sys/fs/cgroup/:/host/sys/fs/cgroup:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /opt/datadog-agent/run:/opt/datadog-agent/run:rw
- dogstatsd-socket:/var/run/datadog
cap_add:
- SYS_ADMIN
- SYS_RESOURCE
- SYS_PTRACE
- NET_ADMIN
- NET_BROADCAST
- NET_RAW
- IPC_LOCK
- CHOWN
security_opt:
- apparmor:unconfined
networks:
- clestiq-network
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:13133"]
test: ["CMD", "agent", "health"]
interval: 30s
timeout: 10s
retries: 3
Expand All @@ -183,3 +256,4 @@ networks:

volumes:
postgres_data:
dogstatsd-socket:
7 changes: 4 additions & 3 deletions services/eagle-eye/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ class Settings(BaseSettings):
ALGORITHM: str = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES: int = 30

# OpenTelemetry
# Datadog APM
TELEMETRY_ENABLED: bool = True
OTEL_SERVICE_NAME: str = "clestiq-shield-eagle-eye"
OTEL_EXPORTER_OTLP_ENDPOINT: str = "http://otel-collector:4317"
DD_SERVICE: str = "clestiq-shield-eagle-eye"
DD_ENV: str = "production"
DD_VERSION: str = "1.0.0"

class Config:
case_sensitive = True
Expand Down
79 changes: 27 additions & 52 deletions services/eagle-eye/app/core/telemetry.py
Original file line number Diff line number Diff line change
@@ -1,72 +1,49 @@
import logging
import sys
import structlog
from opentelemetry import trace

# Import OTLP Log components (HTTP)
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor

# HTTP Log Exporter
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
from opentelemetry._logs import set_logger_provider
from opentelemetry.sdk.resources import Resource
from opentelemetry.semconv.resource import ResourceAttributes
from ddtrace import tracer

from app.core.config import get_settings

settings = get_settings()


def add_open_telemetry_spans(_, __, event_dict):
span = trace.get_current_span()
if not span.is_recording():
event_dict["span"] = None
event_dict["trace"] = None
return event_dict

ctx = span.get_span_context()
event_dict["span_id"] = format(ctx.span_id, "016x")
event_dict["trace_id"] = format(ctx.trace_id, "032x")
def add_datadog_trace_context(_, __, event_dict):
"""Add Datadog trace context to logs for correlation."""
span = tracer.current_span()
if span:
event_dict["dd.trace_id"] = str(span.trace_id)
event_dict["dd.span_id"] = str(span.span_id)
event_dict["dd.service"] = span.service
event_dict["dd.env"] = span.get_tag("env")
event_dict["dd.version"] = span.get_tag("version")
return event_dict


def setup_logging():
"""Configure structured logging with Datadog trace context."""
if not settings.TELEMETRY_ENABLED:
return

# Create Resource
import socket
# Enable Datadog instrumentation
from ddtrace import patch_all
from ddtrace.runtime import RuntimeMetrics
from ddtrace.profiling import Profiler

resource = Resource.create(
{
ResourceAttributes.SERVICE_NAME: settings.OTEL_SERVICE_NAME,
ResourceAttributes.SERVICE_VERSION: settings.VERSION,
ResourceAttributes.HOST_NAME: socket.gethostname(),
}
)
patch_all()

# --- OTLP Logging Setup ---
# Create Logger Provider
logger_provider = LoggerProvider(resource=resource)
set_logger_provider(logger_provider)
# Enable Continuous Profiler
profiler = Profiler()
profiler.start()

# Create OTLP Log Exporter (HTTP)
# The endpoint should be full URL for HTTP exporter e.g. http://otel-collector:4318/v1/logs
# But often the exporter appends /v1/logs if missing.
# Let's trust the default behavior of the HTTP exporter with the base endpoint.
otlp_log_exporter = OTLPLogExporter(
endpoint=f"{settings.OTEL_EXPORTER_OTLP_ENDPOINT}/v1/logs"
)

# Add Batch Processor
logger_provider.add_log_record_processor(BatchLogRecordProcessor(otlp_log_exporter))
# Enable runtime metrics
RuntimeMetrics.enable()

# Output logs to stdout as JSON using structlog
# Configure Structlog with Datadog trace context
structlog.configure(
processors=[
structlog.contextvars.merge_contextvars,
add_open_telemetry_spans,
add_datadog_trace_context,
structlog.processors.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.JSONRenderer(),
Expand All @@ -76,16 +53,14 @@ def setup_logging():
)

# Configure Standard Library Logging
otlp_handler = LoggingHandler(level=logging.INFO, logger_provider=logger_provider)

stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(logging.Formatter("%(message)s"))

logging.basicConfig(
level=logging.INFO,
handlers=[otlp_handler, stdout_handler],
handlers=[stdout_handler],
)

# Force uvicorn logs to use OTLP handler
logging.getLogger("uvicorn.access").handlers = [otlp_handler, stdout_handler]
logging.getLogger("uvicorn.error").handlers = [otlp_handler, stdout_handler]
# Force uvicorn logs to JSON format
logging.getLogger("uvicorn.access").handlers = [stdout_handler]
logging.getLogger("uvicorn.error").handlers = [stdout_handler]
Loading