-
Notifications
You must be signed in to change notification settings - Fork 145
MCP connection liveness health checks #1112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: claude/health-checks-identity
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| // Health checks for MCP connections, liveness-only: a connection's credential is | ||
| // probed by dialing the server and listing tools (the same path tool discovery | ||
| // uses). A live token reads healthy; a revoked/wrong token reads expired. MCP | ||
| // has no usable identity source, so there is no identity field and no operation | ||
| // picker - only the alive/expired signal. The connect-modal "Validate key" path | ||
| // (connections.validate) runs the same probe on an unsaved credential. | ||
| // | ||
| // The upstream is a real in-process MCP server (the plugin's own test helper) | ||
| // gated on a bearer token, so revoking the token mid-scenario reproduces the | ||
| // "dev token expired" transition on a saved connection. | ||
| import { randomBytes } from "node:crypto"; | ||
|
|
||
| import { Effect } from "effect"; | ||
| import { expect } from "@effect/vitest"; | ||
| import type { HttpApiClient } from "effect/unstable/httpapi"; | ||
| import { composePluginApi } from "@executor-js/api/server"; | ||
| import { mcpHttpPlugin } from "@executor-js/plugin-mcp/api"; | ||
| import { makeEchoMcpServer, serveMcpServer } from "@executor-js/plugin-mcp/testing"; | ||
| import { variable } from "@executor-js/sdk/http-auth"; | ||
| import { AuthTemplateSlug, ConnectionName, IntegrationSlug } from "@executor-js/sdk/shared"; | ||
|
|
||
| import { scenario } from "../src/scenario"; | ||
| import { Api, Target } from "../src/services"; | ||
|
|
||
| const api = composePluginApi([mcpHttpPlugin()] as const); | ||
| type Client = HttpApiClient.ForApi<typeof api>; | ||
|
|
||
| const newSlug = (prefix: string) => | ||
| IntegrationSlug.make(`${prefix}-${randomBytes(4).toString("hex")}`); | ||
|
|
||
| scenario( | ||
| "Health checks · MCP liveness reports healthy, then expired when the token is revoked", | ||
| {}, | ||
| Effect.scoped( | ||
| Effect.gen(function* () { | ||
| const target = yield* Target; | ||
| const { client: makeClient } = yield* Api; | ||
| const identity = yield* target.newIdentity(); | ||
| const client: Client = yield* makeClient(api, identity); | ||
| const goodToken = `mcp_${randomBytes(8).toString("hex")}`; | ||
| const slug = newSlug("hc-mcp"); | ||
| const name = ConnectionName.make("main"); | ||
|
|
||
| // A real MCP server gated on the bearer token. `live` flips off to | ||
| // reproduce a revoked credential against an already-saved connection. | ||
| let live = true; | ||
| const server = yield* serveMcpServer(() => makeEchoMcpServer({ name: "liveness-mcp" }), { | ||
| auth: { | ||
| validateAuthorization: (authorization) => | ||
| Effect.succeed(live && authorization === `Bearer ${goodToken}`), | ||
| }, | ||
| }); | ||
|
|
||
| yield* Effect.ensuring( | ||
| Effect.gen(function* () { | ||
| yield* client.mcp.addServer({ | ||
| payload: { | ||
| transport: "remote", | ||
| name: "Liveness MCP", | ||
| endpoint: server.url, | ||
| slug: String(slug), | ||
| // Pin streamable-http so the probe's failure is the server's 401 | ||
| // (no auto SSE fallback to muddy the classification). | ||
| remoteTransport: "streamable-http", | ||
| authenticationTemplate: [ | ||
| { | ||
| slug: "bearer", | ||
| type: "apiKey", | ||
| headers: { Authorization: ["Bearer ", variable("token")] }, | ||
| }, | ||
| ], | ||
| }, | ||
| }); | ||
|
|
||
| yield* client.connections.create({ | ||
| payload: { | ||
| owner: "org", | ||
| name, | ||
| integration: slug, | ||
| template: AuthTemplateSlug.make("bearer"), | ||
| value: goodToken, | ||
| }, | ||
| }); | ||
|
|
||
| // Saved connection with the live token: alive. | ||
| const healthy = yield* client.connections.checkHealth({ | ||
| params: { owner: "org", integration: slug, name }, | ||
| }); | ||
| expect(healthy.status, "a live MCP credential is healthy").toBe("healthy"); | ||
|
|
||
| // Key-first validate (unsaved credential) runs the same probe. | ||
| const validated = yield* client.connections.validate({ | ||
| payload: { | ||
| owner: "org", | ||
| integration: slug, | ||
| template: AuthTemplateSlug.make("bearer"), | ||
| value: goodToken, | ||
| }, | ||
| }); | ||
| expect(validated.status, "validating a live key is healthy").toBe("healthy"); | ||
| const rejected = yield* client.connections.validate({ | ||
| payload: { | ||
| owner: "org", | ||
| integration: slug, | ||
| template: AuthTemplateSlug.make("bearer"), | ||
| value: "wrong-token", | ||
| }, | ||
| }); | ||
| expect(rejected.status, "validating a rejected key is expired").toBe("expired"); | ||
|
|
||
| // The upstream revokes the saved token: the same connection now expired. | ||
| live = false; | ||
| const expired = yield* client.connections.checkHealth({ | ||
| params: { owner: "org", integration: slug, name }, | ||
| }); | ||
| expect(expired.status, "a revoked MCP credential reads expired").toBe("expired"); | ||
| // No identity is ever derived for MCP (manual label only). | ||
| expect(expired.identity, "MCP surfaces no derived identity").toBeUndefined(); | ||
| }), | ||
| Effect.gen(function* () { | ||
| yield* client.connections | ||
| .remove({ params: { owner: "org", integration: slug, name } }) | ||
| .pipe(Effect.ignore); | ||
| yield* client.mcp.removeServer({ params: { slug } }).pipe(Effect.ignore); | ||
| }), | ||
| ); | ||
| }), | ||
| ), | ||
| ); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| // --------------------------------------------------------------------------- | ||
| // Extract the HTTP status from an MCP SDK transport error. The SDK surfaces | ||
| // transport failures two ways: a `StreamableHTTPError` subclass carrying a | ||
| // numeric `code`, and an SSE POST failure whose message embeds `(HTTP nnn)`. | ||
| // Shared by the invoke path (classifies tool-call failures) and the connect | ||
| // path (so a 401/403 during the handshake reaches the liveness health check). | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| import { Option, Schema } from "effect"; | ||
|
|
||
| import { StreamableHTTPError } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; | ||
|
|
||
| const SsePostErrorCause = Schema.Struct({ message: Schema.String }); | ||
| const decodeSsePostErrorCause = Schema.decodeUnknownOption(SsePostErrorCause); | ||
|
|
||
| // Matches the SDK's SSEClientTransport POST-failure message (sse.js); re-verify | ||
| // on SDK bumps. A format drift just yields undefined (generic error, no crash). | ||
| const statusFromSsePostError = (cause: unknown): number | undefined => | ||
| Option.match(decodeSsePostErrorCause(cause), { | ||
| onNone: () => undefined, | ||
| onSome: ({ message }) => { | ||
| const match = /^Error POSTing to endpoint \(HTTP ([1-5][0-9]{2})\):/.exec(message); | ||
| if (!match) return undefined; | ||
| return Number(match[1]); | ||
| }, | ||
| }); | ||
|
|
||
| const statusFromStreamableHttpError = (cause: unknown): number | undefined => { | ||
| // oxlint-disable-next-line executor/no-instanceof-tagged-error -- boundary: MCP SDK exposes transport HTTP failures as this Error subclass; protocol errors can carry the same numeric code | ||
| if (!(cause instanceof StreamableHTTPError)) return undefined; | ||
| const code = cause.code; | ||
| return code !== undefined && code >= 100 && code <= 599 ? code : undefined; | ||
| }; | ||
|
|
||
| export const httpStatusFromCause = (cause: unknown): number | undefined => | ||
| statusFromStreamableHttpError(cause) ?? statusFromSsePostError(cause); |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -15,6 +15,7 @@ import { | |||||||||||||||||||||||||||
| tool, | ||||||||||||||||||||||||||||
| ToolResult, | ||||||||||||||||||||||||||||
| type AuthMethodDescriptor, | ||||||||||||||||||||||||||||
| type HealthCheckResult, | ||||||||||||||||||||||||||||
| type Integration, | ||||||||||||||||||||||||||||
| type IntegrationConfig, | ||||||||||||||||||||||||||||
| type IntegrationRecord, | ||||||||||||||||||||||||||||
|
|
@@ -64,6 +65,21 @@ import { | |||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| const MCP_PLUGIN_ID = "mcp" as const; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| /** Classify a failed liveness probe. An auth wall (OAuth re-authorization, or a | ||||||||||||||||||||||||||||
| * 401/403 surfaced into the connect message) means the credential is expired; | ||||||||||||||||||||||||||||
| * anything else (server down, wrong transport) is degraded, not a credential | ||||||||||||||||||||||||||||
| * problem. */ | ||||||||||||||||||||||||||||
| const mcpLivenessFailureStatus = (message: string): "expired" | "degraded" => { | ||||||||||||||||||||||||||||
| const lower = message.toLowerCase(); | ||||||||||||||||||||||||||||
| const authWalled = | ||||||||||||||||||||||||||||
| lower.includes("oauth re-authorization") || | ||||||||||||||||||||||||||||
| lower.includes("(http 401)") || | ||||||||||||||||||||||||||||
| lower.includes("(http 403)") || | ||||||||||||||||||||||||||||
| lower.includes("unauthorized") || | ||||||||||||||||||||||||||||
| lower.includes("forbidden"); | ||||||||||||||||||||||||||||
| return authWalled ? "expired" : "degraded"; | ||||||||||||||||||||||||||||
| }; | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| const legacyOAuthClientSlugCandidate = (value: string): string | null => { | ||||||||||||||||||||||||||||
| const slug = value | ||||||||||||||||||||||||||||
| .trim() | ||||||||||||||||||||||||||||
|
|
@@ -1165,6 +1181,51 @@ export const mcpPlugin = definePlugin((options?: McpPluginOptions) => { | |||||||||||||||||||||||||||
| return out; | ||||||||||||||||||||||||||||
| }), | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| // Liveness-only health check. MCP has no usable identity source (no | ||||||||||||||||||||||||||||
| // id_token/userinfo, no standard whoami), so this answers "is this | ||||||||||||||||||||||||||||
| // credential still alive?" by dialing the server and listing tools (the same | ||||||||||||||||||||||||||||
| // path resolveTools uses); identity stays the user-supplied connection label. | ||||||||||||||||||||||||||||
| // Only checkHealth is implemented (no candidates/describe/set), so the | ||||||||||||||||||||||||||||
| // operation/identity editor stays hidden while the status dot + "Check now" | ||||||||||||||||||||||||||||
| // light up. | ||||||||||||||||||||||||||||
| checkHealth: ({ credential }) => | ||||||||||||||||||||||||||||
| Effect.gen(function* () { | ||||||||||||||||||||||||||||
| const parsed = parseMcpIntegrationConfig(credential.config); | ||||||||||||||||||||||||||||
| if (!parsed) { | ||||||||||||||||||||||||||||
| return { status: "unknown" as const, checkedAt: Date.now() } satisfies HealthCheckResult; | ||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||
| const connector = yield* buildConnectorInput( | ||||||||||||||||||||||||||||
| parsed, | ||||||||||||||||||||||||||||
| credential.values, | ||||||||||||||||||||||||||||
| credential.template === null ? null : String(credential.template), | ||||||||||||||||||||||||||||
| allowStdio, | ||||||||||||||||||||||||||||
| ).pipe(Effect.map((ci) => createMcpConnector(ci))); | ||||||||||||||||||||||||||||
|
Comment on lines
+1197
to
+1202
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| return yield* discoverTools(connector).pipe( | ||||||||||||||||||||||||||||
| Effect.map( | ||||||||||||||||||||||||||||
| () => | ||||||||||||||||||||||||||||
| ({ status: "healthy" as const, checkedAt: Date.now() }) satisfies HealthCheckResult, | ||||||||||||||||||||||||||||
| ), | ||||||||||||||||||||||||||||
| Effect.catchTag("McpToolDiscoveryError", (error) => | ||||||||||||||||||||||||||||
| Effect.succeed({ | ||||||||||||||||||||||||||||
| status: mcpLivenessFailureStatus(error.message), | ||||||||||||||||||||||||||||
| checkedAt: Date.now(), | ||||||||||||||||||||||||||||
| detail: error.message, | ||||||||||||||||||||||||||||
| } satisfies HealthCheckResult), | ||||||||||||||||||||||||||||
| ), | ||||||||||||||||||||||||||||
| ); | ||||||||||||||||||||||||||||
|
Comment on lines
+1204
to
+1216
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||||||||||||||||||||
| }).pipe( | ||||||||||||||||||||||||||||
| // buildConnectorInput rejects (e.g. stdio disabled / missing config). | ||||||||||||||||||||||||||||
| Effect.catchTag("McpConnectionError", (error) => | ||||||||||||||||||||||||||||
| Effect.succeed({ | ||||||||||||||||||||||||||||
| status: mcpLivenessFailureStatus(error.message), | ||||||||||||||||||||||||||||
| checkedAt: Date.now(), | ||||||||||||||||||||||||||||
| detail: error.message, | ||||||||||||||||||||||||||||
| } satisfies HealthCheckResult), | ||||||||||||||||||||||||||||
| ), | ||||||||||||||||||||||||||||
| Effect.withSpan("mcp.plugin.check_health"), | ||||||||||||||||||||||||||||
| ), | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| describeAuthMethods: describeMcpAuthMethods, | ||||||||||||||||||||||||||||
| describeIntegrationDisplay: describeMcpIntegrationDisplay, | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"unauthorized"/"forbidden"fallbacks can produce false positivesThe broad
lower.includes("unauthorized")andlower.includes("forbidden")checks can match non-auth error text (e.g. an upstream tool description, a server error body, or a configuration message that uses these words). A misconfigured-but-alive server that emits "operation forbidden" in its error body would be mis-reported as"expired"instead of"degraded". Scoping these to known patterns (or removing them in favor of only HTTP-status matching) reduces false positives without losing coverage, since the HTTP-status suffix fromconnection.tsand the OAuth re-authorization string already cover the primary auth-failure paths.