Compare commits
17 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a0576aa9a2 | |||
| 4a6c60113b | |||
| a0cb1ac9fe | |||
| de4f1032aa | |||
| 178be5da6d | |||
| 61d8c851c9 | |||
| a8c63c8379 | |||
| 3147180ccd | |||
| 380578dd2e | |||
| 10c5557aea | |||
| 7772b1ac2e | |||
| 0513bec105 | |||
| 4e89e92b65 | |||
| af96378cb6 | |||
| c5da11aa4c | |||
| 5e4667a643 | |||
| 093ac5b5ba |
@@ -25,7 +25,7 @@ jobs:
|
||||
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
|
||||
changelog_increment_filename: body.md
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@6da8fa9354ddfdc4aeace5fc48d7f679b5214090 # v2.4.1
|
||||
uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
|
||||
with:
|
||||
body_path: "body.md"
|
||||
tag_name: v${{ env.REVISION }}
|
||||
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
||||
|
||||
- name: Install Helm
|
||||
uses: azure/setup-helm@v4.3.0
|
||||
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
|
||||
with:
|
||||
version: v3.16.0
|
||||
|
||||
|
||||
@@ -1,3 +1,25 @@
|
||||
## v0.29.2 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Set default strategy to Recreate
|
||||
|
||||
## v0.29.1 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **observability**: isolate metrics endpoint to dedicated port
|
||||
|
||||
## v0.29.0 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: Add observability support with ServiceMonitor and Grafana dashboard
|
||||
|
||||
### Fix
|
||||
|
||||
- **readiness**: Only check external Qdrant in network mode
|
||||
|
||||
## v0.28.0 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
dependencies:
|
||||
- name: qdrant
|
||||
repository: https://qdrant.github.io/qdrant-helm
|
||||
version: 0.9.0
|
||||
version: 1.15.5
|
||||
- name: ollama
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
version: 1.33.0
|
||||
digest: sha256:d2a0d0e347db47dc89c607d61251aeb0b7a39eddaa2d8137526f29cf625c900c
|
||||
generated: "2025-11-09T07:48:54.477365384+01:00"
|
||||
version: 1.34.0
|
||||
digest: sha256:d51c97d05be2614b751c0dd7267ef7dc959eff5ebef859c5f895c5c554b7a874
|
||||
generated: "2025-11-09T17:08:02.86648061Z"
|
||||
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: nextcloud-mcp-server
|
||||
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
|
||||
type: application
|
||||
version: 0.28.0
|
||||
appVersion: "0.28.0"
|
||||
version: 0.29.2
|
||||
appVersion: "0.29.2"
|
||||
keywords:
|
||||
- nextcloud
|
||||
- mcp
|
||||
@@ -23,10 +23,10 @@ sources:
|
||||
icon: https://raw.githubusercontent.com/nextcloud/server/master/core/img/logo/logo.svg
|
||||
dependencies:
|
||||
- name: qdrant
|
||||
version: "0.9.0"
|
||||
version: "1.15.5"
|
||||
repository: https://qdrant.github.io/qdrant-helm
|
||||
condition: qdrant.networkMode.deploySubchart
|
||||
- name: ollama
|
||||
version: "1.33.0"
|
||||
version: "1.34.0"
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
condition: ollama.enabled
|
||||
|
||||
@@ -0,0 +1,90 @@
|
||||
# Grafana Dashboards
|
||||
|
||||
This directory contains example Grafana dashboards for monitoring the Nextcloud MCP Server.
|
||||
|
||||
## Dashboards
|
||||
|
||||
### nextcloud-mcp-server.json
|
||||
|
||||
Comprehensive dashboard with the following panels:
|
||||
|
||||
- **Request Rate**: HTTP requests per second by method and endpoint
|
||||
- **Error Rate**: Percentage of 5xx errors
|
||||
- **Request Latency**: P50 and P95 latency by endpoint
|
||||
- **Top MCP Tools**: Most frequently called tools
|
||||
- **Nextcloud API Latency**: API call latency by app (notes, calendar, etc.)
|
||||
- **Vector Sync Queue**: Queue size for background document processing
|
||||
|
||||
## Importing to Grafana
|
||||
|
||||
### Manual Import
|
||||
|
||||
1. Open Grafana UI
|
||||
2. Navigate to Dashboards → Import
|
||||
3. Upload `nextcloud-mcp-server.json`
|
||||
4. Select your Prometheus data source
|
||||
5. Click "Import"
|
||||
|
||||
### Automated Import (Kubernetes)
|
||||
|
||||
If using the Grafana Operator or kube-prometheus-stack, you can create a ConfigMap:
|
||||
|
||||
```bash
|
||||
kubectl create configmap nextcloud-mcp-dashboards \
|
||||
--from-file=nextcloud-mcp-server.json \
|
||||
-n monitoring
|
||||
|
||||
# Add label for Grafana sidecar to discover
|
||||
kubectl label configmap nextcloud-mcp-dashboards \
|
||||
grafana_dashboard=1 \
|
||||
-n monitoring
|
||||
```
|
||||
|
||||
Or add to your Helm values:
|
||||
|
||||
```yaml
|
||||
# values.yaml for kube-prometheus-stack
|
||||
grafana:
|
||||
dashboardProviders:
|
||||
dashboardproviders.yaml:
|
||||
apiVersion: 1
|
||||
providers:
|
||||
- name: 'nextcloud-mcp'
|
||||
orgId: 1
|
||||
folder: 'Nextcloud MCP'
|
||||
type: file
|
||||
disableDeletion: false
|
||||
editable: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards/nextcloud-mcp
|
||||
|
||||
dashboardsConfigMaps:
|
||||
nextcloud-mcp: nextcloud-mcp-dashboards
|
||||
```
|
||||
|
||||
## Dashboard Variables
|
||||
|
||||
The dashboard includes two variables:
|
||||
|
||||
- **Data Source**: Select your Prometheus data source
|
||||
- **Namespace**: Filter metrics by Kubernetes namespace
|
||||
|
||||
## Customization
|
||||
|
||||
You can customize the dashboard by:
|
||||
|
||||
1. Adjusting refresh rate (default: 30s)
|
||||
2. Modifying time range (default: last 6 hours)
|
||||
3. Adding new panels for specific metrics
|
||||
4. Adjusting thresholds in existing panels
|
||||
|
||||
## Metrics Reference
|
||||
|
||||
All metrics are documented in `/docs/observability.md`. Key metric prefixes:
|
||||
|
||||
- `mcp_http_*` - HTTP server metrics
|
||||
- `mcp_tool_*` - MCP tool invocation metrics
|
||||
- `mcp_nextcloud_api_*` - Nextcloud API call metrics
|
||||
- `mcp_oauth_*` - OAuth token validation metrics
|
||||
- `mcp_vector_sync_*` - Vector database sync metrics
|
||||
- `mcp_db_*` - Database operation metrics
|
||||
@@ -0,0 +1,630 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(rate(mcp_http_requests_total{namespace=\"$namespace\"}[5m])) by (method, endpoint)",
|
||||
"legendFormat": "{{method}} {{endpoint}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Request Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "line"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"max": 100,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "sum(rate(mcp_http_requests_total{status_code=~\"5..\", namespace=\"$namespace\"}[5m])) / sum(rate(mcp_http_requests_total{namespace=\"$namespace\"}[5m])) * 100",
|
||||
"legendFormat": "Error Rate",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Error Rate (%)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum(rate(mcp_http_request_duration_seconds_bucket{namespace=\"$namespace\"}[5m])) by (le, endpoint))",
|
||||
"legendFormat": "{{endpoint}} (p95)",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.50, sum(rate(mcp_http_request_duration_seconds_bucket{namespace=\"$namespace\"}[5m])) by (le, endpoint))",
|
||||
"legendFormat": "{{endpoint}} (p50)",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Request Latency (P50/P95)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "topk(10, sum(rate(mcp_tool_calls_total{namespace=\"$namespace\"}[5m])) by (tool_name))",
|
||||
"legendFormat": "{{tool_name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Top MCP Tools by Volume",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "max"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, sum(rate(mcp_nextcloud_api_duration_seconds_bucket{namespace=\"$namespace\"}[5m])) by (le, app))",
|
||||
"legendFormat": "{{app}} (p95)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Nextcloud API Latency by App",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"tooltip": false,
|
||||
"viz": false,
|
||||
"legend": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": ["mean", "lastNotNull"],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"expr": "mcp_vector_sync_queue_size{namespace=\"$namespace\"}",
|
||||
"legendFormat": "Queue Size",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Vector Sync Queue Size",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 38,
|
||||
"style": "dark",
|
||||
"tags": ["nextcloud", "mcp", "observability"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "Prometheus",
|
||||
"value": "Prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Data Source",
|
||||
"multi": false,
|
||||
"name": "datasource",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "default",
|
||||
"value": "default"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${datasource}"
|
||||
},
|
||||
"definition": "label_values(mcp_http_requests_total, namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Namespace",
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(mcp_http_requests_total, namespace)",
|
||||
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Nextcloud MCP Server",
|
||||
"uid": "nextcloud-mcp-server",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@@ -5,6 +5,8 @@ metadata:
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
spec:
|
||||
strategy:
|
||||
type: Recreate
|
||||
{{- if not .Values.autoscaling.enabled }}
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
{{- end }}
|
||||
@@ -56,6 +58,11 @@ spec:
|
||||
- name: http
|
||||
containerPort: {{ include "nextcloud-mcp-server.port" . }}
|
||||
protocol: TCP
|
||||
{{- if .Values.observability.metrics.enabled }}
|
||||
- name: metrics
|
||||
containerPort: {{ .Values.observability.metrics.port }}
|
||||
protocol: TCP
|
||||
{{- end }}
|
||||
env:
|
||||
# Nextcloud connection
|
||||
- name: NEXTCLOUD_HOST
|
||||
@@ -200,6 +207,27 @@ spec:
|
||||
value: {{ .Values.openai.baseUrl | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
# Observability
|
||||
- name: METRICS_ENABLED
|
||||
value: {{ .Values.observability.metrics.enabled | quote }}
|
||||
- name: METRICS_PORT
|
||||
value: {{ .Values.observability.metrics.port | quote }}
|
||||
{{- if .Values.observability.tracing.enabled }}
|
||||
- name: OTEL_ENABLED
|
||||
value: "true"
|
||||
- name: OTEL_EXPORTER_OTLP_ENDPOINT
|
||||
value: {{ .Values.observability.tracing.endpoint | quote }}
|
||||
- name: OTEL_SERVICE_NAME
|
||||
value: {{ .Values.observability.tracing.serviceName | quote }}
|
||||
- name: OTEL_TRACES_SAMPLER_ARG
|
||||
value: {{ .Values.observability.tracing.samplingRate | quote }}
|
||||
{{- end }}
|
||||
- name: LOG_FORMAT
|
||||
value: {{ .Values.observability.logging.format | quote }}
|
||||
- name: LOG_LEVEL
|
||||
value: {{ .Values.observability.logging.level | quote }}
|
||||
- name: LOG_INCLUDE_TRACE_CONTEXT
|
||||
value: {{ .Values.observability.logging.includeTraceContext | quote }}
|
||||
{{- with .Values.extraEnv }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
|
||||
@@ -0,0 +1,92 @@
|
||||
{{- if and .Values.observability.metrics.enabled .Values.prometheusRule.enabled }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.prometheusRule.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
groups:
|
||||
- name: nextcloud-mcp-server.critical
|
||||
interval: 30s
|
||||
rules:
|
||||
- alert: NextcloudMCPServerDown
|
||||
expr: up{job="{{ include "nextcloud-mcp-server.fullname" . }}"} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Nextcloud MCP Server is down"
|
||||
description: "{{ `{{` }} $labels.pod {{ `}}` }} has been down for more than 5 minutes."
|
||||
|
||||
- alert: NextcloudMCPHighErrorRate
|
||||
expr: |
|
||||
sum(rate(mcp_http_requests_total{status_code=~"5..", job="{{ include "nextcloud-mcp-server.fullname" . }}"}[5m]))
|
||||
/ sum(rate(mcp_http_requests_total{job="{{ include "nextcloud-mcp-server.fullname" . }}"}[5m])) > 0.05
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "High error rate on Nextcloud MCP Server"
|
||||
description: "Error rate is {{ `{{` }} printf \"%.2f%%\" (mul $value 100) {{ `}}` }} (threshold: 5%)"
|
||||
|
||||
- alert: NextcloudMCPHighLatency
|
||||
expr: |
|
||||
histogram_quantile(0.95,
|
||||
sum(rate(mcp_http_request_duration_seconds_bucket{job="{{ include "nextcloud-mcp-server.fullname" . }}"}[5m])) by (le, endpoint)
|
||||
) > 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "High latency on Nextcloud MCP Server"
|
||||
description: "P95 latency is {{ `{{` }} printf \"%.2fs\" $value {{ `}}` }} on {{ `{{` }} $labels.endpoint {{ `}}` }} (threshold: 1s)"
|
||||
|
||||
- alert: NextcloudMCPDependencyDown
|
||||
expr: mcp_dependency_health{job="{{ include "nextcloud-mcp-server.fullname" . }}"} == 0
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Nextcloud MCP dependency is down"
|
||||
description: "Dependency {{ `{{` }} $labels.dependency {{ `}}` }} has been down for more than 2 minutes."
|
||||
|
||||
- name: nextcloud-mcp-server.warning
|
||||
interval: 30s
|
||||
rules:
|
||||
- alert: NextcloudMCPTokenValidationErrors
|
||||
expr: |
|
||||
sum(rate(mcp_oauth_token_validations_total{result="error", job="{{ include "nextcloud-mcp-server.fullname" . }}"}[10m]))
|
||||
/ sum(rate(mcp_oauth_token_validations_total{job="{{ include "nextcloud-mcp-server.fullname" . }}"}[10m])) > 0.01
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "High token validation error rate"
|
||||
description: "Token validation error rate is {{ `{{` }} printf \"%.2f%%\" (mul $value 100) {{ `}}` }} (threshold: 1%)"
|
||||
|
||||
- alert: NextcloudMCPVectorSyncQueueHigh
|
||||
expr: mcp_vector_sync_queue_size{job="{{ include "nextcloud-mcp-server.fullname" . }}"} > 100
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Vector sync queue is high"
|
||||
description: "Vector sync queue size is {{ `{{` }} $value {{ `}}` }} (threshold: 100)"
|
||||
|
||||
- alert: NextcloudMCPQdrantSlowQueries
|
||||
expr: |
|
||||
histogram_quantile(0.95,
|
||||
sum(rate(mcp_db_operation_duration_seconds_bucket{db="qdrant", job="{{ include "nextcloud-mcp-server.fullname" . }}"}[10m])) by (le)
|
||||
) > 0.5
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Qdrant queries are slow"
|
||||
description: "P95 Qdrant query latency is {{ `{{` }} printf \"%.2fs\" $value {{ `}}` }} (threshold: 0.5s)"
|
||||
{{- end }}
|
||||
@@ -15,5 +15,11 @@ spec:
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
{{- if .Values.observability.metrics.enabled }}
|
||||
- port: {{ .Values.observability.metrics.port }}
|
||||
targetPort: metrics
|
||||
protocol: TCP
|
||||
name: metrics
|
||||
{{- end }}
|
||||
selector:
|
||||
{{- include "nextcloud-mcp-server.selectorLabels" . | nindent 4 }}
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
{{- if and .Values.observability.metrics.enabled .Values.serviceMonitor.enabled }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceMonitor.labels }}
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "nextcloud-mcp-server.selectorLabels" . | nindent 6 }}
|
||||
endpoints:
|
||||
- port: metrics
|
||||
path: {{ .Values.observability.metrics.path }}
|
||||
interval: {{ .Values.serviceMonitor.interval }}
|
||||
scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
|
||||
scheme: http
|
||||
relabelings:
|
||||
# Add namespace label
|
||||
- sourceLabels: [__meta_kubernetes_namespace]
|
||||
targetLabel: namespace
|
||||
# Add pod label
|
||||
- sourceLabels: [__meta_kubernetes_pod_name]
|
||||
targetLabel: pod
|
||||
# Add service label
|
||||
- sourceLabels: [__meta_kubernetes_service_name]
|
||||
targetLabel: service
|
||||
{{- end }}
|
||||
@@ -168,6 +168,43 @@ securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
|
||||
# Observability Configuration
|
||||
observability:
|
||||
# Prometheus metrics
|
||||
metrics:
|
||||
enabled: true
|
||||
port: 9090
|
||||
path: /metrics
|
||||
|
||||
# OpenTelemetry tracing
|
||||
tracing:
|
||||
enabled: false
|
||||
endpoint: "" # e.g., "http://opentelemetry-collector:4317"
|
||||
serviceName: "nextcloud-mcp-server"
|
||||
samplingRate: 1.0
|
||||
|
||||
# Logging configuration
|
||||
logging:
|
||||
format: json # "json" or "text"
|
||||
level: INFO
|
||||
includeTraceContext: true
|
||||
|
||||
# Prometheus ServiceMonitor (requires Prometheus Operator)
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
labels: {}
|
||||
# Additional labels for ServiceMonitor (e.g., for Prometheus selector)
|
||||
# Example: { prometheus: kube-prometheus }
|
||||
|
||||
# Prometheus alert rules (requires Prometheus Operator)
|
||||
prometheusRule:
|
||||
enabled: false
|
||||
labels: {}
|
||||
# Additional labels for PrometheusRule (e.g., for Prometheus selector)
|
||||
# Example: { prometheus: kube-prometheus }
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8000
|
||||
|
||||
+14
-12
@@ -39,7 +39,6 @@ from nextcloud_mcp_server.context import get_client as get_nextcloud_client
|
||||
from nextcloud_mcp_server.document_processors import get_registry
|
||||
from nextcloud_mcp_server.observability import (
|
||||
ObservabilityMiddleware,
|
||||
get_metrics_handler,
|
||||
get_uvicorn_logging_config,
|
||||
setup_metrics,
|
||||
setup_tracing,
|
||||
@@ -786,8 +785,10 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
|
||||
# Setup Prometheus metrics (always enabled by default)
|
||||
if settings.metrics_enabled:
|
||||
setup_metrics()
|
||||
logger.info("Prometheus metrics enabled")
|
||||
setup_metrics(port=settings.metrics_port)
|
||||
logger.info(
|
||||
f"Prometheus metrics enabled on dedicated port {settings.metrics_port}"
|
||||
)
|
||||
|
||||
# Setup OpenTelemetry tracing (optional)
|
||||
if settings.tracing_enabled:
|
||||
@@ -1172,13 +1173,15 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
checks["auth_configured"] = "error: credentials not set"
|
||||
is_ready = False
|
||||
|
||||
# Check Qdrant status if vector sync is enabled
|
||||
# Check Qdrant status if using network mode (external Qdrant service)
|
||||
# In-memory and persistent modes use embedded Qdrant, no external service to check
|
||||
vector_sync_enabled = (
|
||||
os.getenv("VECTOR_SYNC_ENABLED", "false").lower() == "true"
|
||||
)
|
||||
if vector_sync_enabled:
|
||||
qdrant_url = os.getenv("QDRANT_URL") # Only set in network mode
|
||||
|
||||
if vector_sync_enabled and qdrant_url:
|
||||
try:
|
||||
qdrant_url = os.getenv("QDRANT_URL", "http://qdrant:6333")
|
||||
async with httpx.AsyncClient(timeout=2.0) as client:
|
||||
response = await client.get(f"{qdrant_url}/readyz")
|
||||
if response.status_code == 200:
|
||||
@@ -1189,6 +1192,9 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
except Exception as e:
|
||||
checks["qdrant"] = f"error: {str(e)}"
|
||||
is_ready = False
|
||||
elif vector_sync_enabled:
|
||||
# Using embedded Qdrant (memory or persistent mode)
|
||||
checks["qdrant"] = "embedded"
|
||||
|
||||
status_code = 200 if is_ready else 503
|
||||
return JSONResponse(
|
||||
@@ -1207,12 +1213,8 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
routes.append(Route("/health/ready", health_ready, methods=["GET"]))
|
||||
logger.info("Health check endpoints enabled: /health/live, /health/ready")
|
||||
|
||||
# Add metrics endpoint (if metrics are enabled)
|
||||
if settings.metrics_enabled:
|
||||
routes.append(Route("/metrics", get_metrics_handler, methods=["GET"]))
|
||||
logger.info(
|
||||
f"Prometheus metrics endpoint enabled: /metrics (port: {settings.metrics_port if hasattr(settings, 'metrics_port') else 'default'})"
|
||||
)
|
||||
# Note: Metrics endpoint is NOT exposed on main HTTP port for security reasons.
|
||||
# Metrics are served on dedicated port via setup_metrics() (default: 9090)
|
||||
|
||||
if oauth_enabled:
|
||||
# Import OAuth routes (ADR-004 Progressive Consent)
|
||||
|
||||
@@ -18,10 +18,7 @@ from nextcloud_mcp_server.observability.logging_config import (
|
||||
get_uvicorn_logging_config,
|
||||
setup_logging,
|
||||
)
|
||||
from nextcloud_mcp_server.observability.metrics import (
|
||||
get_metrics_handler,
|
||||
setup_metrics,
|
||||
)
|
||||
from nextcloud_mcp_server.observability.metrics import setup_metrics
|
||||
from nextcloud_mcp_server.observability.middleware import ObservabilityMiddleware
|
||||
from nextcloud_mcp_server.observability.tracing import setup_tracing
|
||||
|
||||
@@ -30,6 +27,5 @@ __all__ = [
|
||||
"get_uvicorn_logging_config",
|
||||
"setup_metrics",
|
||||
"setup_tracing",
|
||||
"get_metrics_handler",
|
||||
"ObservabilityMiddleware",
|
||||
]
|
||||
|
||||
@@ -17,15 +17,11 @@ and resource usage. Metrics are organized by category:
|
||||
import logging
|
||||
|
||||
from prometheus_client import (
|
||||
CONTENT_TYPE_LATEST,
|
||||
REGISTRY,
|
||||
Counter,
|
||||
Gauge,
|
||||
Histogram,
|
||||
generate_latest,
|
||||
start_http_server,
|
||||
)
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -220,29 +216,32 @@ dependency_check_duration_seconds = Histogram(
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def setup_metrics() -> None:
|
||||
def setup_metrics(port: int = 9090) -> None:
|
||||
"""
|
||||
Initialize Prometheus metrics collection.
|
||||
Initialize Prometheus metrics collection and start HTTP server.
|
||||
|
||||
This function should be called once during application startup.
|
||||
It currently doesn't require any initialization beyond module-level
|
||||
metric definitions, but is provided for consistency and future extensibility.
|
||||
"""
|
||||
logger.info("Prometheus metrics initialized")
|
||||
|
||||
|
||||
async def get_metrics_handler(request: Request) -> Response:
|
||||
"""
|
||||
HTTP handler for the /metrics endpoint.
|
||||
Starts a dedicated HTTP server on the specified port to serve metrics.
|
||||
This server runs in a separate thread and is isolated from the main application.
|
||||
|
||||
Args:
|
||||
request: Starlette request object (unused, but required by signature)
|
||||
port: Port to serve metrics on (default: 9090)
|
||||
|
||||
Returns:
|
||||
Response containing Prometheus metrics in text format
|
||||
Note:
|
||||
Metrics endpoint (/metrics) is ONLY accessible on this dedicated port,
|
||||
not on the main application HTTP port. This is a security best practice
|
||||
to prevent external exposure of metrics.
|
||||
"""
|
||||
metrics_data = generate_latest(REGISTRY)
|
||||
return Response(content=metrics_data, media_type=CONTENT_TYPE_LATEST)
|
||||
try:
|
||||
start_http_server(port)
|
||||
logger.info(f"Prometheus metrics server started on port {port}")
|
||||
except OSError as e:
|
||||
if "Address already in use" in str(e):
|
||||
logger.warning(
|
||||
f"Metrics port {port} already in use (metrics server likely already running)"
|
||||
)
|
||||
else:
|
||||
logger.error(f"Failed to start metrics server on port {port}: {e}")
|
||||
raise
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "nextcloud-mcp-server"
|
||||
version = "0.28.0"
|
||||
version = "0.29.2"
|
||||
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
|
||||
authors = [
|
||||
{name = "Chris Coutinho", email = "chris@coutinho.io"}
|
||||
|
||||
Reference in New Issue
Block a user