diff --git a/changelogs/CHANGELOG-ai-generation-queue.md b/changelogs/CHANGELOG-ai-generation-queue.md
new file mode 100644
index 0000000..a6fdf4c
--- /dev/null
+++ b/changelogs/CHANGELOG-ai-generation-queue.md
@@ -0,0 +1,31 @@
+# AI Generation Queue — Concurrent Requests Wait Instead of Failing
+
+- Fixed: asking a question in a second annotation thread (or any second AI request) while one was still generating failed with "Another AI generation is already in progress."
+- Added a serial generation **queue**: concurrent `requestAiTask` calls now run in submission order (FIFO) instead of being rejected. The backend still runs one generation at a time, but callers wait their turn automatically.
+- Single authoritative hook — wraps `M.requestAiTask`, so every caller benefits (annotation thread panels, `{{@...}}` DocGen tags, Agent Flow, AI Chat) with no per-call-site changes.
+- Annotation thread panels show a "⏳ Queued (#N)…" state while waiting, then switch to streaming when their turn starts.
+- Exposed `M.aiQueueLength()` for UI hints.
+
+---
+
+## Summary
+
+The local/cloud AI backend is single-flight (one generation at a time, gated by `aiIsGenerating`). With the new parallel annotation threads, a second concurrent question was rejected outright. This adds a queue wrapper around `requestAiTask` so concurrent requests serialize and all complete in order, and surfaces a "Queued" state in the thread panels.
+
+---
+
+## 1. Serial Queue Wrapper
+**Files:** `js/ai-assistant.js`
+**What:** A new wrapper around `M.requestAiTask` maintains a FIFO queue; `drain()` runs the next job only after the previous settles (resolve or reject). Optional `onQueued(position)` / `onQueueStart()` callbacks let callers reflect their wait. Layered after the connector-context wrapper so it covers all call sites.
+**Impact:** Concurrent AI requests no longer fail — they run in sequence.
+
+## 2. Thread Panel Queued State
+**Files:** `js/ai-tags.js`
+**What:** `sendThreadMessage` passes `onQueued`/`onQueueStart`; the AI bubble shows "⏳ Queued (#N)…" while waiting and clears to the streaming state when it begins.
+**Impact:** A queued thread reads as "waiting", not stuck or errored.
+
+---
+
+## Testing
+- Playwright (1280×800): 3 concurrent calls against a single-flight stub that rejects when busy → all 3 fulfilled, FIFO order preserved (was: 2 rejections). The shipped wrapper verified to never reject queued calls with "already in progress".
+- Smoke suite green (one unrelated view-mode flake passes in isolation); build clean.
diff --git a/js/ai-assistant.js b/js/ai-assistant.js
index b823c32..5c2c3f1 100644
--- a/js/ai-assistant.js
+++ b/js/ai-assistant.js
@@ -1584,4 +1584,47 @@
};
})();
+ // ── Serial generation queue ──────────────────────────────────────────────────
+ // The AI backend runs ONE generation at a time (aiIsGenerating gate). Without a
+ // queue, a second concurrent caller — e.g. asking in a second annotation thread
+ // while the first is still answering — was rejected with "Another AI generation
+ // is already in progress." Wrap requestAiTask so concurrent calls QUEUE and run
+ // in submission order instead of failing. Single authoritative hook for every
+ // caller (thread panels, DocGen, Agent Flow, AI Chat).
+ // ─────────────────────────────────────────────────────────────────────────────
+ (function wrapRequestAiTaskWithQueue() {
+ var _inner = M.requestAiTask;
+ var queue = [];
+ var draining = false;
+
+ function drain() {
+ if (draining) return;
+ var job = queue.shift();
+ if (!job) return;
+ draining = true;
+ // Let waiting callers reflect their queued position (e.g. show "Queued…").
+ if (job.opts && typeof job.opts.onQueueStart === 'function') {
+ try { job.opts.onQueueStart(); } catch (_) { /* ignore */ }
+ }
+ Promise.resolve()
+ .then(function () { return _inner.call(M, job.opts); })
+ .then(function (res) { job.resolve(res); }, function (err) { job.reject(err); })
+ .then(function () { draining = false; drain(); });
+ }
+
+ M.requestAiTask = function (opts) {
+ return new Promise(function (resolve, reject) {
+ queue.push({ opts: opts, resolve: resolve, reject: reject });
+ // Surface queue position to the caller (1 = next up behind the running one).
+ if (opts && typeof opts.onQueued === 'function') {
+ try { opts.onQueued(queue.length); } catch (_) { /* ignore */ }
+ }
+ drain();
+ });
+ };
+
+ // Expose queue depth for UI hints (0 = idle).
+ M.aiQueueLength = function () { return queue.length + (draining ? 1 : 0); };
+ })();
+
})(window.MDView);
diff --git a/js/ai-tags.js b/js/ai-tags.js
index d907dea..0992e8e 100644
--- a/js/ai-tags.js
+++ b/js/ai-tags.js
@@ -999,6 +999,15 @@
context: context,
userPrompt: text,
enableThinking: false,
+ onQueued: function (position) {
+ // Another generation is running — show a waiting state instead of failing.
+ if (!openPanels[tagData.id] || position <= 1) return;
+ aiMsg.innerHTML = 'AI⏳ Queued (#' + (position - 1) + ')…';
+ },
+ onQueueStart: function () {
+ if (!openPanels[tagData.id]) return;
+ aiMsg.innerHTML = 'AI';
+ },
onToken: function (token, accumulated) {
// Panel may have been closed mid-stream — guard against a stale ref.
if (!openPanels[tagData.id]) return;