fix(mcp): Move all imports to the top of modules

2025-12-26 10:05:27 -06:00
parent b841407f07
commit 056414752e
41 changed files with 85 additions and 152 deletions
@@ -15,6 +15,7 @@ import time
 from importlib.metadata import version
 from typing import Any

+import httpx
 from starlette.requests import Request
 from starlette.responses import JSONResponse

@@ -530,8 +531,6 @@ async def get_installed_apps(request: Request) -> JSONResponse:
        )

    try:
-        import httpx
-
        # Get Bearer token from request
        token = extract_bearer_token(request)
        if not token:
@@ -602,8 +601,6 @@ async def list_webhooks(request: Request) -> JSONResponse:
        )

    try:
-        import httpx
-
        from nextcloud_mcp_server.client.webhooks import WebhooksClient

        # Get Bearer token from request
@@ -669,8 +666,6 @@ async def create_webhook(request: Request) -> JSONResponse:
        )

    try:
-        import httpx
-
        from nextcloud_mcp_server.client.webhooks import WebhooksClient

        # Parse request body
@@ -747,8 +742,6 @@ async def delete_webhook(request: Request) -> JSONResponse:
        )

    try:
-        import httpx
-
        from nextcloud_mcp_server.client.webhooks import WebhooksClient

        # Get webhook_id from path parameter
@@ -1,5 +1,7 @@
 from __future__ import annotations

+import base64
+import json
 import logging
 import os
 import time
@@ -11,13 +13,13 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Optional, cast
 from urllib.parse import urlparse

+import anyio
 from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor

 if TYPE_CHECKING:
    from nextcloud_mcp_server.auth.storage import RefreshTokenStorage


-import anyio
 import click
 import httpx
 from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
@@ -384,8 +386,6 @@ class BasicAuthMiddleware:

            if auth_header.startswith(b"Basic "):
                try:
-                    import base64
-
                    # Decode base64(username:password)
                    encoded = auth_header[6:]  # Skip "Basic "
                    decoded = base64.b64decode(encoded).decode("utf-8")
@@ -1200,8 +1200,6 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
                "OAuth credentials not configured - attempting Dynamic Client Registration..."
            )

-            import anyio
-
            async def setup_multi_user_basic_dcr():
                """Setup DCR for multi-user BasicAuth background operations."""
                # Construct registration endpoint directly from nextcloud_host
@@ -1288,7 +1286,6 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
    if mode in (AuthMode.OAUTH_SINGLE_AUDIENCE, AuthMode.OAUTH_TOKEN_EXCHANGE):
        logger.info("Configuring MCP server for OAuth mode")
        # Asynchronously get the OAuth configuration
-        import anyio

        (
            nextcloud_host,
@@ -1626,7 +1623,6 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =

        # Start background vector sync tasks (ADR-007)
        # Scanner runs at server-level (once), not per-session
-        import anyio as anyio_module

        # Re-use settings from outer scope (already validated)
        # Note: enable_offline_access_for_sync, encryption_key, and refresh_token_storage
@@ -1666,11 +1662,11 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
                ) from e

            # Initialize shared state
-            send_stream, receive_stream = anyio_module.create_memory_object_stream(
+            send_stream, receive_stream = anyio.create_memory_object_stream(
                max_buffer_size=settings.vector_sync_queue_max_size
            )
-            shutdown_event = anyio_module.Event()
-            scanner_wake_event = anyio_module.Event()
+            shutdown_event = anyio.Event()
+            scanner_wake_event = anyio.Event()

            # Store in app state for access from routes (ADR-007)
            app.state.document_send_stream = send_stream
@@ -1697,7 +1693,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
                    break

            # Start background tasks using anyio TaskGroup
-            async with anyio_module.create_task_group() as tg:
+            async with anyio.create_task_group() as tg:
                # Start scanner task
                await tg.start(
                    scanner_task,
@@ -1828,11 +1824,11 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
                    ) from e

                # Initialize shared state
-                send_stream, receive_stream = anyio_module.create_memory_object_stream(
+                send_stream, receive_stream = anyio.create_memory_object_stream(
                    max_buffer_size=settings.vector_sync_queue_max_size
                )
-                shutdown_event = anyio_module.Event()
-                scanner_wake_event = anyio_module.Event()
+                shutdown_event = anyio.Event()
+                scanner_wake_event = anyio.Event()

                # User state tracking for user manager
                user_states: dict = {}
@@ -1869,7 +1865,7 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
                use_basic_auth = not oauth_enabled

                # Start background tasks using anyio TaskGroup
-                async with anyio_module.create_task_group() as tg:
+                async with anyio.create_task_group() as tg:
                    # Start user manager task (supervises per-user scanners)
                    await tg.start(
                        user_manager_task,
@@ -2076,7 +2072,6 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
        This is a temporary endpoint for testing webhook schemas and payloads.
        It logs the full payload and returns 200 OK immediately.
        """
-        import json

        try:
            payload = await request.json()
@@ -2467,8 +2462,6 @@ def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None =
                # Starlette caches the body internally, so it's safe to read here
                body = await request.body()
                try:
-                    import json
-
                    data = json.loads(body)
                    # Check if this is an initialize request
                    if data.get("method") == "initialize":
@@ -8,6 +8,7 @@ import hashlib
 import logging
 import os
 import secrets
+import time
 from base64 import urlsafe_b64encode
 from urllib.parse import urlencode

@@ -381,8 +382,6 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
    refresh_expires_in = token_data.get("refresh_expires_in")
    refresh_expires_at = None
    if refresh_expires_in:
-        import time
-
        refresh_expires_at = int(time.time()) + refresh_expires_in
        logger.info(
            f"Refresh token expires in {refresh_expires_in}s (at timestamp {refresh_expires_at})"
@@ -8,6 +8,7 @@ Handles OAuth flows with Keycloak as the identity provider, including:
 - Integration with RefreshTokenStorage
 """

+import base64
 import hashlib
 import logging
 import os
@@ -155,7 +156,6 @@ class KeycloakOAuthClient:
        Returns:
            Tuple of (code_verifier, code_challenge)
        """
-        import base64

        # Generate code verifier (43-128 characters)
        code_verifier = secrets.token_urlsafe(32)
@@ -23,6 +23,7 @@ import hashlib
 import logging
 import os
 import secrets
+import time
 from base64 import urlsafe_b64encode
 from urllib.parse import urlencode

@@ -521,8 +522,6 @@ async def oauth_callback_nextcloud(request: Request):
        refresh_expires_in = token_data.get("refresh_expires_in")
        refresh_expires_at = None
        if refresh_expires_in:
-            import time
-
            refresh_expires_at = int(time.time()) + refresh_expires_in
            logger.info(f"  refresh_expires_in: {refresh_expires_in}s")
            logger.info(f"  refresh_expires_at: {refresh_expires_at}")
@@ -9,6 +9,7 @@ import functools
 import logging
 from typing import Callable

+import jwt
 from mcp.server.fastmcp import Context
 from mcp.shared.exceptions import McpError
 from mcp.types import ErrorData
@@ -78,8 +79,6 @@ def require_provisioning(func: Callable) -> Callable:
        user_id = None
        if hasattr(ctx, "authorization") and ctx.authorization:
            try:
-                import jwt
-
                token = ctx.authorization.token
                payload = jwt.decode(token, options={"verify_signature": False})
                user_id = payload.get("sub")
@@ -163,8 +162,6 @@ def require_provisioning_or_suggest(func: Callable) -> Callable:
                # Get user_id from authorization token
                user_id = None
                if hasattr(ctx, "authorization") and ctx.authorization:
-                    import jwt
-
                    token = ctx.authorization.token
                    payload = jwt.decode(token, options={"verify_signature": False})
                    user_id = payload.get("sub")
@@ -28,6 +28,7 @@ Sensitive data (tokens, secrets) is encrypted at rest using Fernet symmetric enc
 import json
 import logging
 import os
+import socket
 import time
 from pathlib import Path
 from typing import Any, Optional
@@ -830,7 +831,6 @@ class RefreshTokenStorage:
            resource_id: Resource identifier
            auth_method: Authentication method used
        """
-        import socket

        hostname = socket.gethostname()
        timestamp = int(time.time())
@@ -9,6 +9,7 @@ For OAuth mode: Requires browser-based OAuth login to establish session.

 import logging
 import os
+import traceback
 from pathlib import Path
 from typing import Any

@@ -385,8 +386,6 @@ async def _get_user_info(request: Request) -> dict[str, Any]:
        return user_context

    except Exception as e:
-        import traceback
-
        logger.error(f"Error retrieving user info: {e}")
        logger.error(f"Traceback: {traceback.format_exc()}")
        return {
@@ -15,6 +15,7 @@ import logging
 import time
 from pathlib import Path

+import anyio
 import numpy as np
 from jinja2 import Environment, FileSystemLoader
 from starlette.authentication import requires
@@ -396,8 +397,6 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
            coords = pca.fit_transform(vectors)
            return coords, pca

-        import anyio
-
        with trace_operation(
            "vector_viz.pca_compute",
            attributes={
@@ -1,6 +1,7 @@
 import logging
 import logging.config
 import os
+import socket
 from dataclasses import dataclass
 from enum import Enum
 from typing import Any, Optional
@@ -337,7 +338,6 @@ class Settings:
        Returns:
            Collection name string
        """
-        import socket

        # Use explicit override if user configured non-default value
        if self.qdrant_collection != "nextcloud_content":
@@ -9,6 +9,7 @@ See ADR-020 for detailed architecture and deployment mode documentation.
 """

 import logging
+import os
 from dataclasses import dataclass
 from enum import Enum

@@ -240,8 +241,6 @@ def detect_auth_mode(settings: Settings) -> AuthMode:
    Raises:
        ValueError: If explicit deployment_mode is invalid or conflicts with detected mode
    """
-    import logging
-    import os

    logger = logging.getLogger(__name__)

@@ -6,6 +6,8 @@ import tempfile
 from collections.abc import Awaitable, Callable
 from typing import Any, Optional

+import anyio
+
 # NOTE: Do NOT call pymupdf.layout.activate() here!
 # It changes the behavior of pymupdf4llm.to_markdown() when page_chunks=True,
 # causing it to return a string instead of a list[dict].
@@ -95,7 +97,6 @@ class PyMuPDFProcessor(DocumentProcessor):
        Raises:
            ProcessorError: If PDF processing fails
        """
-        import anyio

        try:
            if progress_callback:
@@ -3,6 +3,7 @@
 import logging
 from typing import Any

+import anyio
 from fastembed import SparseTextEmbedding

 logger = logging.getLogger(__name__)
@@ -67,7 +68,6 @@ class BM25SparseEmbeddingProvider:
        Returns:
            Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
        """
-        import anyio

        # Run CPU-bound BM25 encoding in thread pool
        return await anyio.to_thread.run_sync(lambda: self.encode(text))  # type: ignore[attr-defined]
@@ -82,7 +82,6 @@ class BM25SparseEmbeddingProvider:
        Returns:
            List of dictionaries with 'indices' and 'values' for each text
        """
-        import anyio

        # Run CPU-bound BM25 encoding in thread pool to avoid blocking event loop
        sparse_embeddings = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
@@ -6,6 +6,7 @@ provides CLI integration.
 """

 import logging
+import sqlite3
 from pathlib import Path

 from alembic.config import Config
@@ -98,7 +99,6 @@ def get_current_revision(database_path: str | Path | None = None) -> str | None:
    Returns:
        Current revision ID or None if not versioned
    """
-    import sqlite3

    if database_path is None:
        database_path = "/app/data/tokens.db"
@@ -14,7 +14,9 @@ and resource usage. Metrics are organized by category:
 - External Dependency Health Metrics
 """

+import functools
 import logging
+import time

 from prometheus_client import (
    Counter,
@@ -423,8 +425,6 @@ def instrument_tool(func):
    Returns:
        Wrapped function with metrics and tracing instrumentation
    """
-    import functools
-    import time

    from nextcloud_mcp_server.observability.tracing import trace_operation

@@ -1,9 +1,16 @@
 """Base interfaces and data structures for search algorithms."""

+import logging
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any, Protocol, runtime_checkable

+from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+

@runtime_checkable
 class NextcloudClientProtocol(Protocol):
@@ -78,13 +85,6 @@ async def get_indexed_doc_types(user_id: str) -> set[str]:
        >>> if "note" in types:
        ...     # Search notes
    """
-    import logging
-
-    from qdrant_client.models import FieldCondition, Filter, MatchValue
-
-    from nextcloud_mcp_server.config import get_settings
-    from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
-    from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client

    logger = logging.getLogger(__name__)
    settings = get_settings()
@@ -7,6 +7,9 @@ position markers for better visualization and understanding of search results.
 import logging
 from dataclasses import dataclass

+import pymupdf
+import pymupdf4llm
+
 from nextcloud_mcp_server.client import NextcloudClient

 logger = logging.getLogger(__name__)
@@ -549,8 +552,6 @@ async def _fetch_document_text(
                    # Extract text from PDF using PyMuPDF
                    # IMPORTANT: Use pymupdf4llm.to_markdown() to match indexing extraction
                    # This ensures character offsets align between indexed chunks and retrieval
-                    import pymupdf
-                    import pymupdf4llm

                    logger.debug(f"Extracting text from PDF: {file_path}")
                    pdf_doc = pymupdf.open(stream=file_content, filetype="pdf")
@@ -10,6 +10,9 @@ varies between indexing and rendering.

 import logging
 import re
+import shutil
+import tempfile
+from pathlib import Path
 from typing import Optional

 import pymupdf
@@ -77,8 +80,6 @@ class PDFHighlighter:
            Tuple of (full_text, page_boundaries) where page_boundaries is a list of:
            {"page": 1, "start_offset": 0, "end_offset": 1234}
        """
-        import tempfile
-        from pathlib import Path

        page_boundaries = []
        text_parts = []
@@ -110,7 +111,6 @@ class PDFHighlighter:
        full_text = "".join(text_parts)

        # Clean up temp directory and extracted images
-        import shutil

        try:
            shutil.rmtree(temp_dir)
@@ -590,8 +590,6 @@ class PDFHighlighter:
        Returns:
            Tuple of (png_bytes, page_number, highlight_count) or None if failed
        """
-        import tempfile
-        from pathlib import Path

        temp_pdf_path = None
        try:
@@ -12,6 +12,7 @@ from typing import Optional
 from urllib.parse import urlencode

 import httpx
+import jwt
 from mcp.server.auth.middleware.auth_context import get_access_token
 from mcp.server.auth.provider import AccessToken
 from mcp.server.fastmcp import Context
@@ -53,8 +54,6 @@ async def extract_user_id_from_token(ctx: Context) -> str:
    # Try JWT decode first
    if is_jwt:
        try:
-            import jwt
-
            payload = jwt.decode(token, options={"verify_signature": False})
            user_id = payload.get("sub", "unknown")
            logger.info(f"  ✓ JWT decode successful: user_id={user_id}")
@@ -1,6 +1,7 @@
 """Semantic search MCP tools using vector database."""

 import logging
+import os

 import anyio
 from httpx import RequestError
@@ -656,7 +657,6 @@ def configure_semantic_tools(mcp: FastMCP):
        This is useful for determining when vector indexing is complete
        after creating or updating content across all indexed apps.
        """
-        import os

        # Check if vector sync is enabled
        vector_sync_enabled = (
@@ -1,3 +1,4 @@
+import base64
 import logging

 from mcp.server.fastmcp import Context, FastMCP
@@ -120,7 +121,6 @@ def configure_webdav_tools(mcp: FastMCP):
                pass

        # For binary files, return metadata and base64 encoded content
-        import base64

        return {
            "path": path,
@@ -156,8 +156,6 @@ def configure_webdav_tools(mcp: FastMCP):

        # Handle base64 encoded content
        if content_type and "base64" in content_type.lower():
-            import base64
-
            content_bytes = base64.b64decode(content)
            content_type = content_type.replace(";base64", "")
        else:
@@ -3,6 +3,7 @@
 import logging
 from dataclasses import dataclass

+import anyio
 from langchain_text_splitters import RecursiveCharacterTextSplitter

 logger = logging.getLogger(__name__)
@@ -68,7 +69,6 @@ class DocumentChunker:
        Returns:
            List of chunks with their character positions in the original content
        """
-        import anyio

        # Handle empty content - return single empty chunk for backward compatibility
        if not content:
@@ -1,6 +1,7 @@
 """HTML to Markdown conversion utilities for vector sync."""

 import logging
+import re

 from markdownify import markdownify as md

@@ -43,7 +44,6 @@ def html_to_markdown(html_content: str | None) -> str:
    except Exception as e:
        logger.warning(f"Failed to convert HTML to Markdown: {e}")
        # Fallback: strip all HTML tags as a last resort
-        import re

        text = re.sub(r"<[^>]+>", " ", html_content)
        return " ".join(text.split())  # Normalize whitespace
@@ -3,6 +3,7 @@
 Processes documents from stream: fetches content, generates embeddings, stores in Qdrant.
 """

+import base64
 import logging
 import time
 import uuid
@@ -585,8 +586,6 @@ async def _index_document(
                "vector_sync.pdf_size": len(content_bytes),
            },
        ):
-            import base64
-
            from nextcloud_mcp_server.search.pdf_highlighter import PDFHighlighter

            # Build chunk data for batch processing
@@ -5,6 +5,7 @@ Periodically scans enabled users' content and queues changed documents for proce

 import logging
 import os
+import random
 import time
 from dataclasses import dataclass

@@ -167,7 +168,6 @@ async def scan_user_documents(
        nc_client: Authenticated Nextcloud client
        initial_sync: If True, send all documents (first-time sync)
    """
-    import random

    scan_id = random.randint(1000, 9999)
    logger.info(