From 1327086b8851279e735bdb35f70b0eec38fc0514 Mon Sep 17 00:00:00 2001 From: Rhys Sullivan Date: Tue, 23 Jun 2026 15:20:00 -0700 Subject: [PATCH] feat: MCP connection liveness health checks Give MCP connections a liveness probe. `checkHealth` dials the server and lists its tools (the same connect path tool discovery uses): a credential that authenticates and gets a tool list reads healthy; a 401/403 (or an OAuth re-authorization signal) reads expired; any other connection/discovery failure reads degraded. The connect-modal "Validate key" path runs the same probe on an unsaved credential. MCP gets liveness ONLY: there is no usable identity source (no id_token, no userinfo, no whoami convention across servers), so no identity is derived and no operation/identity editor is shown - the connection's name stays the user's label. The plugin implements only `checkHealth`, so the editor self-hides while the generic status dot + "Check now" still render. Factors the HTTP-status extraction out of invoke into a shared http-status helper and surfaces the upstream status in connect errors so the probe can classify a 401/403 as expired rather than a generic degraded. Covered by e2e: a saved MCP connection reads healthy, then expired once the upstream revokes the token; validate reports healthy for a live key and expired for a rejected one; no identity is ever derived. --- e2e/scenarios/health-checks-mcp.test.ts | 129 ++++++++++++++++++++ packages/plugins/mcp/src/sdk/connection.ts | 15 ++- packages/plugins/mcp/src/sdk/http-status.ts | 36 ++++++ packages/plugins/mcp/src/sdk/invoke.ts | 27 +--- packages/plugins/mcp/src/sdk/plugin.ts | 61 +++++++++ 5 files changed, 240 insertions(+), 28 deletions(-) create mode 100644 e2e/scenarios/health-checks-mcp.test.ts create mode 100644 packages/plugins/mcp/src/sdk/http-status.ts diff --git a/e2e/scenarios/health-checks-mcp.test.ts b/e2e/scenarios/health-checks-mcp.test.ts new file mode 100644 index 000000000..a38728276 --- /dev/null +++ b/e2e/scenarios/health-checks-mcp.test.ts @@ -0,0 +1,129 @@ +// Health checks for MCP connections, liveness-only: a connection's credential is +// probed by dialing the server and listing tools (the same path tool discovery +// uses). A live token reads healthy; a revoked/wrong token reads expired. MCP +// has no usable identity source, so there is no identity field and no operation +// picker - only the alive/expired signal. The connect-modal "Validate key" path +// (connections.validate) runs the same probe on an unsaved credential. +// +// The upstream is a real in-process MCP server (the plugin's own test helper) +// gated on a bearer token, so revoking the token mid-scenario reproduces the +// "dev token expired" transition on a saved connection. +import { randomBytes } from "node:crypto"; + +import { Effect } from "effect"; +import { expect } from "@effect/vitest"; +import type { HttpApiClient } from "effect/unstable/httpapi"; +import { composePluginApi } from "@executor-js/api/server"; +import { mcpHttpPlugin } from "@executor-js/plugin-mcp/api"; +import { makeEchoMcpServer, serveMcpServer } from "@executor-js/plugin-mcp/testing"; +import { variable } from "@executor-js/sdk/http-auth"; +import { AuthTemplateSlug, ConnectionName, IntegrationSlug } from "@executor-js/sdk/shared"; + +import { scenario } from "../src/scenario"; +import { Api, Target } from "../src/services"; + +const api = composePluginApi([mcpHttpPlugin()] as const); +type Client = HttpApiClient.ForApi; + +const newSlug = (prefix: string) => + IntegrationSlug.make(`${prefix}-${randomBytes(4).toString("hex")}`); + +scenario( + "Health checks · MCP liveness reports healthy, then expired when the token is revoked", + {}, + Effect.scoped( + Effect.gen(function* () { + const target = yield* Target; + const { client: makeClient } = yield* Api; + const identity = yield* target.newIdentity(); + const client: Client = yield* makeClient(api, identity); + const goodToken = `mcp_${randomBytes(8).toString("hex")}`; + const slug = newSlug("hc-mcp"); + const name = ConnectionName.make("main"); + + // A real MCP server gated on the bearer token. `live` flips off to + // reproduce a revoked credential against an already-saved connection. + let live = true; + const server = yield* serveMcpServer(() => makeEchoMcpServer({ name: "liveness-mcp" }), { + auth: { + validateAuthorization: (authorization) => + Effect.succeed(live && authorization === `Bearer ${goodToken}`), + }, + }); + + yield* Effect.ensuring( + Effect.gen(function* () { + yield* client.mcp.addServer({ + payload: { + transport: "remote", + name: "Liveness MCP", + endpoint: server.url, + slug: String(slug), + // Pin streamable-http so the probe's failure is the server's 401 + // (no auto SSE fallback to muddy the classification). + remoteTransport: "streamable-http", + authenticationTemplate: [ + { + slug: "bearer", + type: "apiKey", + headers: { Authorization: ["Bearer ", variable("token")] }, + }, + ], + }, + }); + + yield* client.connections.create({ + payload: { + owner: "org", + name, + integration: slug, + template: AuthTemplateSlug.make("bearer"), + value: goodToken, + }, + }); + + // Saved connection with the live token: alive. + const healthy = yield* client.connections.checkHealth({ + params: { owner: "org", integration: slug, name }, + }); + expect(healthy.status, "a live MCP credential is healthy").toBe("healthy"); + + // Key-first validate (unsaved credential) runs the same probe. + const validated = yield* client.connections.validate({ + payload: { + owner: "org", + integration: slug, + template: AuthTemplateSlug.make("bearer"), + value: goodToken, + }, + }); + expect(validated.status, "validating a live key is healthy").toBe("healthy"); + const rejected = yield* client.connections.validate({ + payload: { + owner: "org", + integration: slug, + template: AuthTemplateSlug.make("bearer"), + value: "wrong-token", + }, + }); + expect(rejected.status, "validating a rejected key is expired").toBe("expired"); + + // The upstream revokes the saved token: the same connection now expired. + live = false; + const expired = yield* client.connections.checkHealth({ + params: { owner: "org", integration: slug, name }, + }); + expect(expired.status, "a revoked MCP credential reads expired").toBe("expired"); + // No identity is ever derived for MCP (manual label only). + expect(expired.identity, "MCP surfaces no derived identity").toBeUndefined(); + }), + Effect.gen(function* () { + yield* client.connections + .remove({ params: { owner: "org", integration: slug, name } }) + .pipe(Effect.ignore); + yield* client.mcp.removeServer({ params: { slug } }).pipe(Effect.ignore); + }), + ); + }), + ), +); diff --git a/packages/plugins/mcp/src/sdk/connection.ts b/packages/plugins/mcp/src/sdk/connection.ts index 1e1e2567b..9eb6e55cd 100644 --- a/packages/plugins/mcp/src/sdk/connection.ts +++ b/packages/plugins/mcp/src/sdk/connection.ts @@ -17,6 +17,7 @@ import { HttpClient, HttpClientRequest } from "effect/unstable/http"; import type { McpRemoteIntegrationConfig, McpStdioIntegrationConfig } from "./types"; import { McpConnectionError, McpOAuthReauthorizationRequired } from "./errors"; +import { httpStatusFromCause } from "./http-status"; // --------------------------------------------------------------------------- // Connection type @@ -198,8 +199,18 @@ const connectClient = (input: { yield* Effect.tryPromise({ try: () => client.connect(transportInstance), - catch: (cause) => - connectionFailure(input.transport, `Failed connecting via ${input.transport}`, cause), + catch: (cause) => { + // Surface the handshake HTTP status (e.g. 401/403) in the message so the + // liveness health check can classify a rejected credential as expired + // rather than a generic connection failure. + const status = httpStatusFromCause(cause); + const suffix = status === undefined ? "" : ` (HTTP ${status})`; + return connectionFailure( + input.transport, + `Failed connecting via ${input.transport}${suffix}`, + cause, + ); + }, }).pipe( Effect.withSpan("plugin.mcp.connection.handshake", { attributes: { "plugin.mcp.transport": input.transport }, diff --git a/packages/plugins/mcp/src/sdk/http-status.ts b/packages/plugins/mcp/src/sdk/http-status.ts new file mode 100644 index 000000000..366549993 --- /dev/null +++ b/packages/plugins/mcp/src/sdk/http-status.ts @@ -0,0 +1,36 @@ +// --------------------------------------------------------------------------- +// Extract the HTTP status from an MCP SDK transport error. The SDK surfaces +// transport failures two ways: a `StreamableHTTPError` subclass carrying a +// numeric `code`, and an SSE POST failure whose message embeds `(HTTP nnn)`. +// Shared by the invoke path (classifies tool-call failures) and the connect +// path (so a 401/403 during the handshake reaches the liveness health check). +// --------------------------------------------------------------------------- + +import { Option, Schema } from "effect"; + +import { StreamableHTTPError } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; + +const SsePostErrorCause = Schema.Struct({ message: Schema.String }); +const decodeSsePostErrorCause = Schema.decodeUnknownOption(SsePostErrorCause); + +// Matches the SDK's SSEClientTransport POST-failure message (sse.js); re-verify +// on SDK bumps. A format drift just yields undefined (generic error, no crash). +const statusFromSsePostError = (cause: unknown): number | undefined => + Option.match(decodeSsePostErrorCause(cause), { + onNone: () => undefined, + onSome: ({ message }) => { + const match = /^Error POSTing to endpoint \(HTTP ([1-5][0-9]{2})\):/.exec(message); + if (!match) return undefined; + return Number(match[1]); + }, + }); + +const statusFromStreamableHttpError = (cause: unknown): number | undefined => { + // oxlint-disable-next-line executor/no-instanceof-tagged-error -- boundary: MCP SDK exposes transport HTTP failures as this Error subclass; protocol errors can carry the same numeric code + if (!(cause instanceof StreamableHTTPError)) return undefined; + const code = cause.code; + return code !== undefined && code >= 100 && code <= 599 ? code : undefined; +}; + +export const httpStatusFromCause = (cause: unknown): number | undefined => + statusFromStreamableHttpError(cause) ?? statusFromSsePostError(cause); diff --git a/packages/plugins/mcp/src/sdk/invoke.ts b/packages/plugins/mcp/src/sdk/invoke.ts index 7f0f6c597..46130328e 100644 --- a/packages/plugins/mcp/src/sdk/invoke.ts +++ b/packages/plugins/mcp/src/sdk/invoke.ts @@ -13,7 +13,6 @@ import { Cause, Effect, Exit, Option, Predicate, Schema } from "effect"; -import { StreamableHTTPError } from "@modelcontextprotocol/sdk/client/streamableHttp.js"; import { ElicitRequestSchema } from "@modelcontextprotocol/sdk/types.js"; import { @@ -26,6 +25,7 @@ import { import { McpConnectionError, McpInvocationError, McpOAuthReauthorizationRequired } from "./errors"; import type { McpConnection, McpConnector } from "./connection"; +import { httpStatusFromCause } from "./http-status"; // --------------------------------------------------------------------------- // Helpers @@ -37,31 +37,6 @@ const decodeArgsRecord = Schema.decodeUnknownOption(ArgsRecord); const argsRecord = (value: unknown): Record => Option.getOrElse(decodeArgsRecord(value), () => ({})); -const SsePostErrorCause = Schema.Struct({ message: Schema.String }); -const decodeSsePostErrorCause = Schema.decodeUnknownOption(SsePostErrorCause); - -// Matches the SDK's SSEClientTransport POST-failure message (sse.js); re-verify -// on SDK bumps. A format drift just yields undefined (generic error, no crash). -const statusFromSsePostError = (cause: unknown): number | undefined => - Option.match(decodeSsePostErrorCause(cause), { - onNone: () => undefined, - onSome: ({ message }) => { - const match = /^Error POSTing to endpoint \(HTTP ([1-5][0-9]{2})\):/.exec(message); - if (!match) return undefined; - return Number(match[1]); - }, - }); - -const statusFromStreamableHttpError = (cause: unknown): number | undefined => { - // oxlint-disable-next-line executor/no-instanceof-tagged-error -- boundary: MCP SDK exposes transport HTTP failures as this Error subclass; protocol errors can carry the same numeric code - if (!(cause instanceof StreamableHTTPError)) return undefined; - const code = cause.code; - return code !== undefined && code >= 100 && code <= 599 ? code : undefined; -}; - -const httpStatusFromCause = (cause: unknown): number | undefined => - statusFromStreamableHttpError(cause) ?? statusFromSsePostError(cause); - // --------------------------------------------------------------------------- // Elicitation bridge — decode incoming MCP ElicitRequest, route through // the host's elicit function, marshal the response back to MCP shape. diff --git a/packages/plugins/mcp/src/sdk/plugin.ts b/packages/plugins/mcp/src/sdk/plugin.ts index d676efc86..e6357db57 100644 --- a/packages/plugins/mcp/src/sdk/plugin.ts +++ b/packages/plugins/mcp/src/sdk/plugin.ts @@ -15,6 +15,7 @@ import { tool, ToolResult, type AuthMethodDescriptor, + type HealthCheckResult, type Integration, type IntegrationConfig, type IntegrationRecord, @@ -64,6 +65,21 @@ import { const MCP_PLUGIN_ID = "mcp" as const; +/** Classify a failed liveness probe. An auth wall (OAuth re-authorization, or a + * 401/403 surfaced into the connect message) means the credential is expired; + * anything else (server down, wrong transport) is degraded, not a credential + * problem. */ +const mcpLivenessFailureStatus = (message: string): "expired" | "degraded" => { + const lower = message.toLowerCase(); + const authWalled = + lower.includes("oauth re-authorization") || + lower.includes("(http 401)") || + lower.includes("(http 403)") || + lower.includes("unauthorized") || + lower.includes("forbidden"); + return authWalled ? "expired" : "degraded"; +}; + const legacyOAuthClientSlugCandidate = (value: string): string | null => { const slug = value .trim() @@ -1165,6 +1181,51 @@ export const mcpPlugin = definePlugin((options?: McpPluginOptions) => { return out; }), + // Liveness-only health check. MCP has no usable identity source (no + // id_token/userinfo, no standard whoami), so this answers "is this + // credential still alive?" by dialing the server and listing tools (the same + // path resolveTools uses); identity stays the user-supplied connection label. + // Only checkHealth is implemented (no candidates/describe/set), so the + // operation/identity editor stays hidden while the status dot + "Check now" + // light up. + checkHealth: ({ credential }) => + Effect.gen(function* () { + const parsed = parseMcpIntegrationConfig(credential.config); + if (!parsed) { + return { status: "unknown" as const, checkedAt: Date.now() } satisfies HealthCheckResult; + } + const connector = yield* buildConnectorInput( + parsed, + credential.values, + credential.template === null ? null : String(credential.template), + allowStdio, + ).pipe(Effect.map((ci) => createMcpConnector(ci))); + + return yield* discoverTools(connector).pipe( + Effect.map( + () => + ({ status: "healthy" as const, checkedAt: Date.now() }) satisfies HealthCheckResult, + ), + Effect.catchTag("McpToolDiscoveryError", (error) => + Effect.succeed({ + status: mcpLivenessFailureStatus(error.message), + checkedAt: Date.now(), + detail: error.message, + } satisfies HealthCheckResult), + ), + ); + }).pipe( + // buildConnectorInput rejects (e.g. stdio disabled / missing config). + Effect.catchTag("McpConnectionError", (error) => + Effect.succeed({ + status: mcpLivenessFailureStatus(error.message), + checkedAt: Date.now(), + detail: error.message, + } satisfies HealthCheckResult), + ), + Effect.withSpan("mcp.plugin.check_health"), + ), + describeAuthMethods: describeMcpAuthMethods, describeIntegrationDisplay: describeMcpIntegrationDisplay,