Textagent · ijbo · Jun 23, 2026 · Jun 23, 2026
diff --git a/changelogs/CHANGELOG-ai-generation-queue.md b/changelogs/CHANGELOG-ai-generation-queue.md
@@ -0,0 +1,31 @@
+# AI Generation Queue — Concurrent Requests Wait Instead of Failing
+
+- Fixed: asking a question in a second annotation thread (or any second AI request) while one was still generating failed with "Another AI generation is already in progress."
+- Added a serial generation **queue**: concurrent `requestAiTask` calls now run in submission order (FIFO) instead of being rejected. The backend still runs one generation at a time, but callers wait their turn automatically.
+- Single authoritative hook — wraps `M.requestAiTask`, so every caller benefits (annotation thread panels, `{{@...}}` DocGen tags, Agent Flow, AI Chat) with no per-call-site changes.
+- Annotation thread panels show a "⏳ Queued (#N)…" state while waiting, then switch to streaming when their turn starts.
+- Exposed `M.aiQueueLength()` for UI hints.
+
+---
+
+## Summary
+
+The local/cloud AI backend is single-flight (one generation at a time, gated by `aiIsGenerating`). With the new parallel annotation threads, a second concurrent question was rejected outright. This adds a queue wrapper around `requestAiTask` so concurrent requests serialize and all complete in order, and surfaces a "Queued" state in the thread panels.
+
+---
+
+## 1. Serial Queue Wrapper
+**Files:** `js/ai-assistant.js`
+**What:** A new wrapper around `M.requestAiTask` maintains a FIFO queue; `drain()` runs the next job only after the previous settles (resolve or reject). Optional `onQueued(position)` / `onQueueStart()` callbacks let callers reflect their wait. Layered after the connector-context wrapper so it covers all call sites.
+**Impact:** Concurrent AI requests no longer fail — they run in sequence.
+
+## 2. Thread Panel Queued State
+**Files:** `js/ai-tags.js`
+**What:** `sendThreadMessage` passes `onQueued`/`onQueueStart`; the AI bubble shows "⏳ Queued (#N)…" while waiting and clears to the streaming state when it begins.
+**Impact:** A queued thread reads as "waiting", not stuck or errored.
+
+---
+
+## Testing
+- Playwright (1280×800): 3 concurrent calls against a single-flight stub that rejects when busy → all 3 fulfilled, FIFO order preserved (was: 2 rejections). The shipped wrapper verified to never reject queued calls with "already in progress".
+- Smoke suite green (one unrelated view-mode flake passes in isolation); build clean.
diff --git a/js/ai-assistant.js b/js/ai-assistant.js
@@ -1584,4 +1584,47 @@
     };
   })();
 
+  // ── Serial generation queue ──────────────────────────────────────────────────
+  // The AI backend runs ONE generation at a time (aiIsGenerating gate). Without a
+  // queue, a second concurrent caller — e.g. asking in a second annotation thread
+  // while the first is still answering — was rejected with "Another AI generation
+  // is already in progress." Wrap requestAiTask so concurrent calls QUEUE and run
+  // in submission order instead of failing. Single authoritative hook for every
+  // caller (thread panels, DocGen, Agent Flow, AI Chat).
+  // ─────────────────────────────────────────────────────────────────────────────
+  (function wrapRequestAiTaskWithQueue() {
+    var _inner = M.requestAiTask;
+    var queue = [];
+    var draining = false;
+
+    function drain() {
+      if (draining) return;
+      var job = queue.shift();
+      if (!job) return;
+      draining = true;
+      // Let waiting callers reflect their queued position (e.g. show "Queued…").
+      if (job.opts && typeof job.opts.onQueueStart === 'function') {
+        try { job.opts.onQueueStart(); } catch (_) { /* ignore */ }
+      }
+      Promise.resolve()
+        .then(function () { return _inner.call(M, job.opts); })
+        .then(function (res) { job.resolve(res); }, function (err) { job.reject(err); })
+        .then(function () { draining = false; drain(); });
+    }
+
+    M.requestAiTask = function (opts) {
+      return new Promise(function (resolve, reject) {
+        queue.push({ opts: opts, resolve: resolve, reject: reject });
+        // Surface queue position to the caller (1 = next up behind the running one).
+        if (opts && typeof opts.onQueued === 'function') {
+          try { opts.onQueued(queue.length); } catch (_) { /* ignore */ }
+        }
+        drain();
+      });
+    };
+
+    // Expose queue depth for UI hints (0 = idle).
+    M.aiQueueLength = function () { return queue.length + (draining ? 1 : 0); };
+  })();
+
 })(window.MDView);
diff --git a/js/ai-tags.js b/js/ai-tags.js
@@ -999,6 +999,15 @@
                 context: context,
                 userPrompt: text,
                 enableThinking: false,
+                onQueued: function (position) {
+                    // Another generation is running — show a waiting state instead of failing.
+                    if (!openPanels[tagData.id] || position <= 1) return;
+                    aiMsg.innerHTML = '<span class="ai-tag-msg-label">AI</span><span style="opacity:0.7">⏳ Queued (#' + (position - 1) + ')…</span>';
+                },
+                onQueueStart: function () {
+                    if (!openPanels[tagData.id]) return;
+                    aiMsg.innerHTML = '<span class="ai-tag-msg-label">AI</span>';
+                },
                 onToken: function (token, accumulated) {
                     // Panel may have been closed mid-stream — guard against a stale ref.
                     if (!openPanels[tagData.id]) return;