From 371d0c93a5017d910be29e8b118a6b36a7152076 Mon Sep 17 00:00:00 2001
From: Chris Coutinho <chris@coutinho.io>
Date: Sat, 18 Oct 2025 17:21:17 +0200
Subject: [PATCH] test: Update oauth benchmark tests

---
 tests/load/README_OAUTH.md    | 94 +++++++++++++++++++++++------------
 tests/load/oauth_benchmark.py |  7 +++
 2 files changed, 68 insertions(+), 33 deletions(-)

diff --git a/tests/load/README_OAUTH.md b/tests/load/README_OAUTH.md
index fdcab00..94a6716 100644
--- a/tests/load/README_OAUTH.md
+++ b/tests/load/README_OAUTH.md
@@ -142,24 +142,35 @@ uv run python -m tests.load.oauth_benchmark -u 2 -d 30 --verbose
 
 | Option | Short | Default | Description |
 |--------|-------|---------|-------------|
-| `--users` | `-u` | 2 | Number of concurrent users (max 4 with default config) |
+| `--users` | `-u` | 2 | Number of concurrent users (dynamically created) |
 | `--duration` | `-d` | 30.0 | Test duration in seconds |
 | `--warmup` | `-w` | 5.0 | Warmup period before metrics collection (seconds) |
 | `--url` | | `http://127.0.0.1:8001/mcp` | MCP OAuth server URL |
 | `--output` | `-o` | None | JSON output file path |
 | `--workload` | | `mixed` | Workload type: mixed, sharing, collaboration, baseline |
+| `--user-prefix` | | `loadtest` | Prefix for dynamically created usernames |
+| `--cleanup/--no-cleanup` | | `cleanup` | Delete created users after benchmark |
+| `--browser` | | `chromium` | Playwright browser: firefox, chromium, webkit |
+| `--headed` | | False | Run browser in headed mode (visible window) |
 | `--verbose` | `-v` | False | Enable verbose logging |
 
-## Default Test Users
+## Test User Creation
 
-The framework includes 4 pre-configured test users:
+The framework **dynamically creates test users** on-demand with OAuth authentication:
 
-| Username | Display Name | Groups | Role |
-|----------|--------------|--------|------|
-| alice | Alice Anderson | owners | Owner - full permissions |
-| bob | Bob Brown | viewers | Viewer - read-only |
-| charlie | Charlie Chen | editors | Editor - read/write |
-| diana | Diana Davis | (none) | No special permissions |
+- **Naming**: Users are created with the pattern `{prefix}_user_{n}` (default: `loadtest_user_1`, `loadtest_user_2`, etc.)
+- **Customization**: Use `--user-prefix` to change the prefix (e.g., `--user-prefix mytest` → `mytest_user_1`)
+- **Scalability**: No limit on user count - create as many concurrent users as your system can handle
+- **Credentials**: Each user gets a randomly generated secure password
+- **OAuth Tokens**: All users authenticate via automated OAuth flow using Playwright
+- **Cleanup**: Users are automatically deleted after the benchmark (disable with `--no-cleanup`)
+
+**Example**: Running `--users 5` creates:
+- `loadtest_user_1` (Display: Load Test User 1, Email: loadtest_user_1@benchmark.local)
+- `loadtest_user_2` (Display: Load Test User 2, Email: loadtest_user_2@benchmark.local)
+- `loadtest_user_3` (Display: Load Test User 3, Email: loadtest_user_3@benchmark.local)
+- `loadtest_user_4` (Display: Load Test User 4, Email: loadtest_user_4@benchmark.local)
+- `loadtest_user_5` (Display: Load Test User 5, Email: loadtest_user_5@benchmark.local)
 
 ## Metrics Output
 
@@ -171,34 +182,35 @@ OAUTH MULTI-USER BENCHMARK RESULTS
 ================================================================================
 
 Duration: 120.45s
-Total Users: 4
-Total Workflows Executed: 247
-Total Baseline Operations: 531
+Total Users: 5
+Total Workflows Executed: 312
+Total Baseline Operations: 678
 
 --------------------------------------------------------------------------------
 WORKFLOW STATISTICS
 --------------------------------------------------------------------------------
 Workflow                         Total  Success     Rate        P50        P95
 --------------------------------------------------------------------------------
-note_share                          89       87    97.8%   0.2341s   0.4782s
-collaborative_edit                  52       48    92.3%   0.5123s   0.9234s
-file_share                          23       23   100.0%   0.3456s   0.6123s
+note_share                         112      109    97.3%   0.2341s   0.4782s
+collaborative_edit                  65       61    93.8%   0.5123s   0.9234s
+file_share                          29       29   100.0%   0.3456s   0.6123s
 
 --------------------------------------------------------------------------------
 PER-USER STATISTICS
 --------------------------------------------------------------------------------
 User                  Total Ops    Success   Errors     Rate        P50
 --------------------------------------------------------------------------------
-alice                        234        229        5    97.9%   0.2456s
-bob                          198        195        3    98.5%   0.2123s
-charlie                      187        183        4    97.9%   0.2345s
-diana                        159        157        2    98.7%   0.2234s
+loadtest_user_1              289        283        6    97.9%   0.2456s
+loadtest_user_2              245        241        4    98.4%   0.2123s
+loadtest_user_3              231        226        5    97.8%   0.2345s
+loadtest_user_4              198        195        3    98.5%   0.2234s
+loadtest_user_5              187        184        3    98.4%   0.2189s
 
 --------------------------------------------------------------------------------
 BASELINE OPERATIONS
 --------------------------------------------------------------------------------
-Total Operations: 531
-Success Rate: 98.1%
+Total Operations: 678
+Success Rate: 98.2%
 Latency: min=0.0234s, p50=0.1234s, p95=0.3456s, max=0.8123s
 ================================================================================
 ```
@@ -209,16 +221,16 @@ Latency: min=0.0234s, p50=0.1234s, p95=0.3456s, max=0.8123s
 {
   "summary": {
     "duration": 120.45,
-    "total_workflows": 247,
-    "total_baseline_ops": 531,
-    "total_users": 4
+    "total_workflows": 312,
+    "total_baseline_ops": 678,
+    "total_users": 5
   },
   "workflows": {
     "note_share": {
-      "total_executions": 89,
-      "successful_executions": 87,
-      "failed_executions": 2,
-      "success_rate": 97.8,
+      "total_executions": 112,
+      "successful_executions": 109,
+      "failed_executions": 3,
+      "success_rate": 97.3,
       "latency": {
         "min": 0.1234,
         "max": 0.8765,
@@ -237,15 +249,19 @@ Latency: min=0.0234s, p50=0.1234s, p95=0.3456s, max=0.8123s
     }
   },
   "users": {
-    "alice": {
-      "total_operations": 234,
-      "successful_operations": 229,
-      "failed_operations": 5,
+    "loadtest_user_1": {
+      "total_operations": 289,
+      "successful_operations": 283,
+      "failed_operations": 6,
       "success_rate": 97.9,
       "latency": {...},
       "operations_breakdown": {...},
       "errors_breakdown": {...}
-    }
+    },
+    "loadtest_user_2": {...},
+    "loadtest_user_3": {...},
+    "loadtest_user_4": {...},
+    "loadtest_user_5": {...}
   },
   "baseline": {...}
 }
@@ -473,6 +489,18 @@ uv run python -m tests.load.cleanup_loadtest_users
 - Ensure user count doesn't exceed configured limits
 - Check that user creation succeeded in previous steps
 
+### CancelledError During Benchmark
+**Symptom**: Error message like `'CancelledError' object has no attribute 'username'` appears in logs
+
+**Cause**: Async task cancellation during benchmark shutdown or errors can cause race conditions in error handling
+
+**Solution**: This has been mitigated with defensive error handling. The worker now:
+- Catches `asyncio.CancelledError` specifically before general exceptions
+- Logs cancellation gracefully without attempting to access potentially invalid state
+- Re-raises the exception to allow proper cleanup chain
+
+If you still see this error, it's likely harmless and occurs during shutdown. The benchmark results should still be valid.
+
 ### High Error Rates
 - Increase delay between operations (`await asyncio.sleep()` in worker)
 - Check OAuth token validity
diff --git a/tests/load/oauth_benchmark.py b/tests/load/oauth_benchmark.py
index a9c1056..56505ad 100644
--- a/tests/load/oauth_benchmark.py
+++ b/tests/load/oauth_benchmark.py
@@ -263,6 +263,13 @@ async def oauth_benchmark_worker(
             f"Worker for {user_wrapper.username} completed {operation_count} operations"
         )
 
+    except asyncio.CancelledError:
+        # Handle task cancellation gracefully (e.g., during benchmark shutdown)
+        logger.info(
+            f"Worker for {user_wrapper.username} was cancelled "
+            f"(completed {operation_count} operations)"
+        )
+        raise  # Re-raise to allow proper cleanup
     except Exception as e:
         logger.error(f"Worker {user_wrapper.username} error: {e}", exc_info=True)