feat: skip tracing for health and metrics endpoints

Health check and metrics endpoints are frequently polled and don't
provide meaningful trace data. This change skips OpenTelemetry span
creation for:
- /health/* (liveness, readiness checks)
- /metrics (Prometheus metrics)

These endpoints still record Prometheus metrics (request count, latency,
in-flight requests) but no longer create trace spans, reducing tracing
noise and storage costs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Chris Coutinho
2025-11-10 07:24:27 +01:00
parent 640a7818f9
commit b32324cb76
@@ -66,22 +66,40 @@ class ObservabilityMiddleware(BaseHTTPMiddleware):
# Record start time
start_time = time.time()
try:
# Create span for request (OpenTelemetry auto-instrumentation will create parent span)
with trace_operation(
f"HTTP {method} {endpoint}",
attributes={
"http.method": method,
"http.path": path,
"http.scheme": request.url.scheme,
"http.host": request.url.hostname,
},
):
# Process request
response = await call_next(request)
# Skip tracing for health/metrics endpoints to reduce noise
should_trace = not (path.startswith("/health/") or path == "/metrics")
# Add response status to span
add_span_attribute("http.status_code", response.status_code)
try:
if should_trace:
# Create span for request (OpenTelemetry auto-instrumentation will create parent span)
with trace_operation(
f"HTTP {method} {endpoint}",
attributes={
"http.method": method,
"http.path": path,
"http.scheme": request.url.scheme,
"http.host": request.url.hostname,
},
):
# Process request
response = await call_next(request)
# Add response status to span
add_span_attribute("http.status_code", response.status_code)
# Record metrics
duration = time.time() - start_time
self._record_request_metrics(
method=method,
endpoint=endpoint,
status_code=response.status_code,
duration=duration,
)
return response
else:
# No tracing for health/metrics endpoints, but still record metrics
response = await call_next(request)
# Record metrics
duration = time.time() - start_time