fix(observability): isolate metrics endpoint to dedicated port

Security fix: Move Prometheus metrics endpoint from main HTTP port to dedicated port 9090 to prevent external exposure of metrics data. Changes: - Use prometheus_client.start_http_server() for dedicated metrics server - Remove /metrics route from main application routes - Metrics now only accessible on port 9090 (configurable via METRICS_PORT) - Main application port no longer serves /metrics endpoint This follows security best practice of isolating monitoring endpoints from application traffic. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-09 09:53:36 +01:00
parent 5e4667a643
commit 4e89e92b65
3 changed files with 28 additions and 36 deletions
@@ -39,7 +39,6 @@ from nextcloud_mcp_server.context import get_client as get_nextcloud_client
 from nextcloud_mcp_server.document_processors import get_registry
 from nextcloud_mcp_server.observability import (
    ObservabilityMiddleware,
-    get_metrics_handler,
    get_uvicorn_logging_config,
    setup_metrics,
    setup_tracing,
@@ -786,8 +785,10 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):

    # Setup Prometheus metrics (always enabled by default)
    if settings.metrics_enabled:
-        setup_metrics()
-        logger.info("Prometheus metrics enabled")
+        setup_metrics(port=settings.metrics_port)
+        logger.info(
+            f"Prometheus metrics enabled on dedicated port {settings.metrics_port}"
+        )

    # Setup OpenTelemetry tracing (optional)
    if settings.tracing_enabled:
@@ -1212,12 +1213,8 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
    routes.append(Route("/health/ready", health_ready, methods=["GET"]))
    logger.info("Health check endpoints enabled: /health/live, /health/ready")

-    # Add metrics endpoint (if metrics are enabled)
-    if settings.metrics_enabled:
-        routes.append(Route("/metrics", get_metrics_handler, methods=["GET"]))
-        logger.info(
-            f"Prometheus metrics endpoint enabled: /metrics (port: {settings.metrics_port if hasattr(settings, 'metrics_port') else 'default'})"
-        )
+    # Note: Metrics endpoint is NOT exposed on main HTTP port for security reasons.
+    # Metrics are served on dedicated port via setup_metrics() (default: 9090)

    if oauth_enabled:
        # Import OAuth routes (ADR-004 Progressive Consent)
@@ -18,10 +18,7 @@ from nextcloud_mcp_server.observability.logging_config import (
    get_uvicorn_logging_config,
    setup_logging,
 )
-from nextcloud_mcp_server.observability.metrics import (
-    get_metrics_handler,
-    setup_metrics,
-)
+from nextcloud_mcp_server.observability.metrics import setup_metrics
 from nextcloud_mcp_server.observability.middleware import ObservabilityMiddleware
 from nextcloud_mcp_server.observability.tracing import setup_tracing

@@ -30,6 +27,5 @@ __all__ = [
    "get_uvicorn_logging_config",
    "setup_metrics",
    "setup_tracing",
-    "get_metrics_handler",
    "ObservabilityMiddleware",
 ]
@@ -17,15 +17,11 @@ and resource usage. Metrics are organized by category:
 import logging

 from prometheus_client import (
-    CONTENT_TYPE_LATEST,
-    REGISTRY,
    Counter,
    Gauge,
    Histogram,
-    generate_latest,
+    start_http_server,
 )
-from starlette.requests import Request
-from starlette.responses import Response

 logger = logging.getLogger(__name__)

@@ -220,29 +216,32 @@ dependency_check_duration_seconds = Histogram(
 # =============================================================================


-def setup_metrics() -> None:
+def setup_metrics(port: int = 9090) -> None:
    """
-    Initialize Prometheus metrics collection.
+    Initialize Prometheus metrics collection and start HTTP server.

-    This function should be called once during application startup.
-    It currently doesn't require any initialization beyond module-level
-    metric definitions, but is provided for consistency and future extensibility.
-    """
-    logger.info("Prometheus metrics initialized")
-
-
-async def get_metrics_handler(request: Request) -> Response:
-    """
-    HTTP handler for the /metrics endpoint.
+    Starts a dedicated HTTP server on the specified port to serve metrics.
+    This server runs in a separate thread and is isolated from the main application.

    Args:
-        request: Starlette request object (unused, but required by signature)
+        port: Port to serve metrics on (default: 9090)

-    Returns:
-        Response containing Prometheus metrics in text format
+    Note:
+        Metrics endpoint (/metrics) is ONLY accessible on this dedicated port,
+        not on the main application HTTP port. This is a security best practice
+        to prevent external exposure of metrics.
    """
-    metrics_data = generate_latest(REGISTRY)
-    return Response(content=metrics_data, media_type=CONTENT_TYPE_LATEST)
+    try:
+        start_http_server(port)
+        logger.info(f"Prometheus metrics server started on port {port}")
+    except OSError as e:
+        if "Address already in use" in str(e):
+            logger.warning(
+                f"Metrics port {port} already in use (metrics server likely already running)"
+            )
+        else:
+            logger.error(f"Failed to start metrics server on port {port}: {e}")
+            raise


 # =============================================================================