diff --git a/docs/plans/finetune-deploy-mvp.md b/docs/plans/finetune-deploy-mvp.md new file mode 100644 index 0000000..7e3616a --- /dev/null +++ b/docs/plans/finetune-deploy-mvp.md @@ -0,0 +1,438 @@ +# 模型训练 + 数据集 + 部署:最小闭环 CLI 设计 + +> 目标:一个 Qwen 文本模型 SFT 训练、数据集上传、模型部署的端到端最小链路。 + +--- + +## 一、命令概览 + +| 优先级 | 命令 | 映射 API | 用途 | +| ------ | ----------------------------------- | --------------------------------------------- | ------------------------------- | +| P0 | `bl dataset upload ` | `POST /api/v1/files` | 上传训练数据(含本地格式校验) | +| P0 | `bl finetune create` | `POST /api/v1/fine-tunes` | 创建 SFT 训练任务(预填默认超参) | +| P0 | `bl finetune status ` | `GET /api/v1/fine-tunes/{job_id}` | 查询训练状态 | +| P0 | `bl deploy create` | `POST /api/v1/deployments` | 部署训练好的模型 | +| P1 | `bl finetune logs ` | `GET /api/v1/fine-tunes/{job_id}/logs` | 拉取训练日志 | +| P1 | `bl finetune checkpoints ` | `GET /api/v1/fine-tunes/{job_id}/checkpoints` | 查看/挑选 Checkpoint | +| P1 | `bl deploy status ` | `GET /api/v1/deployments/{deployed_model}` | 查询部署状态 | +| P1 | `bl deploy delete ` | `DELETE /api/v1/deployments/{deployed_model}` | 下线部署 | +| P1 | `bl infer --model ` | 复用 `text chat` 通路 | 调用已部署模型 | + +--- + +## 二、P0 命令详细设计 + +### 2.1 `bl dataset upload` + +**定位:** 上传训练数据文件到百炼平台,获取 `file_id` 供训练任务引用。 + +#### CLI 签名 + +``` +bl dataset upload [--purpose fine-tune] [--validate] [--no-validate] +``` + +| Flag | 必填 | 默认值 | 说明 | +| --------------- | ---- | ----------- | ------------------------------ | +| `` | 是 | — | 本地文件路径(.jsonl 或 .zip) | +| `--purpose` | 否 | `fine-tune` | 文件用途标签 | +| `--validate` | 否 | `true` | 上传前执行本地格式校验 | +| `--no-validate` | 否 | — | 跳过本地校验 | + +#### 本地格式校验规则(提交前拦截) + +校验逻辑在 `packages/core` 实现(纯函数),CLI 调用后展示错误: + +1. **文件格式检查**:仅允许 `.jsonl` 和 `.zip`(zip 内根目录必须有 `data.jsonl`) +2. **JSONL 逐行校验**: + - 每行可被 `JSON.parse` + - 顶层必须包含 `messages` 数组 + - `messages` 中每项必须包含 `role`(枚举:`system` | `user` | `assistant`)和 `content`(非空字符串) + - 至少包含一条 `user` + 一条 `assistant` 消息 +3. **数量校验**:SFT 训练至少需要上千条数据(给出 warning 而非 hard fail,阈值建议 ≥ 10 条 hard fail) +4. **文件体积**:≤ 300MB + +#### 校验失败输出示例 + +``` +✗ Validation failed: + + Line 3: missing "messages" field + Line 7: role "bot" is not valid (expected: system | user | assistant) + Line 12: "content" is empty string + +Fix 3 errors above and retry. +``` + +#### API 调用 + +``` +POST https://dashscope.aliyuncs.com/api/v1/files +Content-Type: multipart/form-data +Authorization: Bearer + +Body: + files: + purpose: "fine-tune" + +Response 200: +{ + "id": "file-xxxx", + "bytes": 12345, + "filename": "train.jsonl", + "purpose": "fine-tune", + "created_at": 1700000000 +} +``` + +#### 输出 + +- 默认 text:`✓ Uploaded file-xxxx (12.3 KB) — use this ID in bl finetune create` +- `--output json`:完整 response body +- `--quiet`:仅输出 `file-xxxx` + +--- + +### 2.2 `bl finetune create` + +**定位:** 创建一个 SFT 训练任务。核心设计原则——**预填合理默认超参 + 提交前二次确认**,降低 OOM/超参不合理导致的训练失败率。 + +#### CLI 签名 + +``` +bl finetune create --model --data [hyperparams...] +``` + +| Flag | 必填 | 默认值 | 说明 | +| ------------------- | ---- | ------------ | -------------------------------------------- | +| `--model` | 是 | — | 基座模型(如 `qwen3-8b`, `qwen3-14b`) | +| `--data` | 是 | — | 训练数据 file_id(bl dataset upload 返回值) | +| `--validation-data` | 否 | — | 验证数据 file_id | +| `--epochs` | 否 | 3 | 训练轮次 (n_epochs) | +| `--batch-size` | 否 | 按模型自动选 | 批大小 | +| `--lr` | 否 | 按模型自动选 | 学习率 (learning_rate_multiplier) | +| `--warmup-ratio` | 否 | 0.1 | warmup 比例 | +| `--suffix` | 否 | — | 输出模型后缀名 | +| `--yes` / `-y` | 否 | — | 跳过确认直接提交 | + +#### 预填默认超参策略 + +| 基座模型 | batch_size | lr_multiplier | n_epochs | 备注 | +| ---------- | ---------- | ------------- | -------- | ---------------- | +| qwen3-8b | 4 | 1e-5 | 3 | 小模型可大 batch | +| qwen3-14b | 2 | 5e-6 | 3 | 中模型防 OOM | +| qwen3-32b+ | 1 | 2e-6 | 2 | 大模型保守设置 | + +> 以上为建议默认值,用户显式传参时覆盖。具体映射表在 `packages/core/src/finetune/defaults.ts` 维护。 + +#### 提交前交互确认 + +非 `--yes` 模式下,显示任务摘要等待确认: + +``` +┌─ Fine-tune Job Summary ──────────────────────┐ +│ Model: qwen3-8b │ +│ Training: file-abc123 (2,048 samples) │ +│ Validation: (none) │ +│ Epochs: 3 │ +│ Batch size: 4 │ +│ LR: 1e-5 │ +│ Warmup: 0.1 │ +│ Suffix: my-assistant │ +│ │ +│ Estimated cost: ~¥XX (based on token count) │ +└───────────────────────────────────────────────┘ +Proceed? [Y/n] +``` + +#### API 调用 + +``` +POST https://dashscope.aliyuncs.com/api/v1/fine-tunes +Authorization: Bearer +Content-Type: application/json + +{ + "model": "qwen3-8b", + "training_file_ids": ["file-abc123"], + "validation_file_ids": [], + "hyper_parameters": { + "n_epochs": 3, + "batch_size": 4, + "learning_rate": "1e-5", + "warmup_ratio": 0.1 + }, + "suffix": "my-assistant" +} + +Response 200: +{ + "job_id": "ft-xxxx", + "status": "PENDING", + "model": "qwen3-8b", + "created_at": "2025-01-01T00:00:00Z", + "training_file_ids": ["file-abc123"], + "hyper_parameters": {...}, + "trained_model": null +} +``` + +#### 输出 + +- text:`✓ Fine-tune job ft-xxxx created (PENDING). Track with: bl finetune status ft-xxxx` +- json:完整 response body +- quiet:`ft-xxxx` + +--- + +### 2.3 `bl finetune status` + +**定位:** 查询训练任务状态,支持 `--wait` 轮询模式。 + +#### CLI 签名 + +``` +bl finetune status [--wait] [--interval ] +``` + +| Flag | 必填 | 默认值 | 说明 | +| ------------ | ---- | ------ | ---------------- | +| `` | 是 | — | 任务 ID | +| `--wait` | 否 | — | 持续轮询直到终态 | +| `--interval` | 否 | 30 | 轮询间隔(秒) | + +#### 状态机 + +``` +PENDING → RUNNING → SUCCEEDED + ↘ FAILED +``` + +#### 输出(text 模式) + +单次查询: + +``` +Job: ft-xxxx +Status: RUNNING (elapsed 12m) +Model: qwen3-8b +Output: (pending) +``` + +`--wait` 模式(spinner + 实时刷新): + +``` +⠋ ft-xxxx RUNNING [14:32 elapsed] +✓ ft-xxxx SUCCEEDED — trained model: qwen3-8b:ft-xxxx-20250101 + Deploy with: bl deploy create --model qwen3-8b:ft-xxxx-20250101 +``` + +失败时: + +``` +✗ ft-xxxx FAILED + Error: OutOfMemory — try reducing --batch-size or using a smaller model +``` + +--- + +### 2.4 `bl deploy create` + +**定位:** 将训练好的模型(或 checkpoint)部署为可调用的推理服务。 + +#### CLI 签名 + +``` +bl deploy create --model [--plan ] [--capacity ] +``` + +| Flag | 必填 | 默认值 | 说明 | +| ------------ | ---- | ---------- | ----------------------------------------------- | +| `--model` | 是 | — | 待部署模型名称(finetune 产出的 trained_model) | +| `--plan` | 否 | `standard` | 部署方案 | +| `--capacity` | 否 | 依 plan | 并发容量 | +| `--wait` | 否 | — | 等待部署就绪 | + +#### API 调用 + +``` +POST https://dashscope.aliyuncs.com/api/v1/deployments +Authorization: Bearer +Content-Type: application/json + +{ + "model_name": "qwen3-8b:ft-xxxx-20250101", + "plan": "standard", + "capacity": 2 +} + +Response 200: +{ + "deployed_model": "qwen3-8b-ft-xxxx", + "model_name": "qwen3-8b:ft-xxxx-20250101", + "status": "PENDING", + "created_at": "..." +} +``` + +#### 输出 + +``` +✓ Deployment created: qwen3-8b-ft-xxxx (PENDING) + Once RUNNING, call with: bl text chat --model qwen3-8b-ft-xxxx + Check status: bl deploy status qwen3-8b-ft-xxxx +``` + +--- + +## 三、P1 命令简要设计 + +### 3.1 `bl finetune logs ` + +流式输出训练日志,支持 `--follow`(类似 `tail -f`)。输出 loss/step/epoch 信息。 + +### 3.2 `bl finetune checkpoints ` + +列出可选 checkpoint(step, loss, eval metrics),支持 `--output json` 供脚本使用。可配合 `bl deploy create --model ` 部署指定 checkpoint。 + +### 3.3 `bl deploy status ` + +查询部署状态及资源信息(PENDING → RUNNING → STOPPED/FAILED)。 + +### 3.4 `bl deploy delete ` + +下线部署。需部署处于 RUNNING/STOPPED/FAILED 状态。交互确认或 `--yes` 跳过。 + +### 3.5 `bl infer --model ` + +实际可复用已有 `bl text chat --model ` 通路,作为别名/快捷方式。P1 考虑是否有独立存在必要。 + +--- + +## 四、代码架构方案 + +按照 monorepo 分层约定(core 纯逻辑 / cli 是 UI): + +### packages/core 新增模块 + +``` +packages/core/src/ +├── finetune/ +│ ├── index.ts # re-export +│ ├── api.ts # createFineTune, getFineTune, getFineTuneLogs, getCheckpoints +│ ├── defaults.ts # 模型 → 默认超参映射表 +│ └── types.ts # FineTuneJob, HyperParameters, CheckpointInfo 类型 +├── dataset/ +│ ├── index.ts +│ ├── upload.ts # uploadDataset (multipart) +│ ├── validate.ts # validateJsonl (纯函数,逐行校验) +│ └── types.ts # DatasetFile, ValidationError 类型 +└── deploy/ + ├── index.ts + ├── api.ts # createDeployment, getDeployment, deleteDeployment + └── types.ts # Deployment, DeploymentStatus 类型 +``` + +### packages/cli 新增命令 + +``` +packages/cli/src/commands/ +├── dataset/ +│ └── upload.ts # bl dataset upload +├── finetune/ +│ ├── create.ts # bl finetune create +│ ├── status.ts # bl finetune status +│ ├── logs.ts # bl finetune logs +│ └── checkpoints.ts # bl finetune checkpoints +└── deploy/ + ├── create.ts # bl deploy create + ├── status.ts # bl deploy status + └── delete.ts # bl deploy delete +``` + +--- + +## 五、关键设计决策 + +### 5.1 数据格式校验放在 CLI 侧(提交前拦截) + +训练失败 TOP 原因中"数据格式错误"占比高。与其等服务端 10 分钟后返回 FAILED,不如 CLI 本地秒级校验: + +- **validate.ts** 是纯函数,接收 ReadableStream/Buffer,返回 `ValidationError[]` +- CLI 在 `dataset upload` 默认执行校验,`--no-validate` 允许跳过 +- 未来可扩展为独立命令 `bl dataset validate ` + +### 5.2 超参预填 + 确认而非强制 + +- core 维护 `defaults.ts` 映射:`model → { batch_size, lr, epochs }` +- CLI `finetune create` 未指定超参时自动填入 +- 提交前展示完整参数面板(非 --yes 模式),避免"我以为用了默认但其实没传" + +### 5.3 费用感知(P1+) + +- 图像/语音/视频训练费用远高于文本。MVP 阶段(Qwen 文本 SFT)费用可控 +- 后续扩展多模态时,在 confirm panel 中强化费用估算提示 +- `bl quota check` 已存在,可在 `finetune create` 内部集成余额预检 + +### 5.4 `bl infer` 是否独立存在 + +建议 P1 阶段**不新增** `bl infer`,而是让 `bl text chat --model ` 直接工作。部署完成后的引导文案中指明这个用法即可。减少命令膨胀。 + +--- + +## 六、最小闭环用户操作流 + +```bash +# 1. 准备数据 → 上传(含校验) +bl dataset upload ./train.jsonl +# ✓ Uploaded file-abc123 (5.2 MB) + +# 2. 创建训练任务(自动预填超参) +bl finetune create --model qwen3-8b --data file-abc123 +# Shows summary panel → confirm → ✓ Job ft-xxxx created + +# 3. 等待训练完成 +bl finetune status ft-xxxx --wait +# ⠋ RUNNING [23:15] → ✓ SUCCEEDED: qwen3-8b:ft-xxxx-20250601 + +# 4. 部署模型 +bl deploy create --model qwen3-8b:ft-xxxx-20250601 --wait +# ✓ Deployed: qwen3-8b-ft-xxxx (RUNNING) + +# 5. 调用模型 +bl text chat --model qwen3-8b-ft-xxxx "你好,介绍一下你自己" +# (正常推理输出) +``` + +--- + +## 七、实现顺序建议 + +``` +Phase 1 (P0 — 最小闭环): + core: dataset/validate.ts → dataset/upload.ts → finetune/api.ts → deploy/api.ts + cli: dataset upload → finetune create → finetune status → deploy create + 测试: 单元测试 validate.ts + e2e dry-run + 真实 API 端到端一次 + +Phase 2 (P1 — 可观测性): + finetune logs → finetune checkpoints → deploy status → deploy delete + 费用估算集成 + +Phase 3 (后续): + bl dataset validate (独立命令) + bl dataset list (查看已上传) + bl finetune list (查看历史任务) + 多模态 SFT 支持(图像/视频数据格式校验扩展) +``` + +--- + +## 八、风险与 TODO + +| 风险点 | 影响 | 缓解措施 | +| ----------------- | ----------------- | --------------------------------------------- | +| OOM 训练失败 | 用户浪费时间/金钱 | 保守默认超参 + batch_size 自适应模型大小 | +| 数据格式错误 | 训练启动后才失败 | 本地校验拦截,启动秒级反馈 | +| 部署等待时间长 | 用户困惑 | `--wait` + 预估时间提示 | +| 费用超预期 | 账号欠费 | confirm panel 预估费用(P1 集成 quota check) | +| API endpoint 变动 | 调用失败 | 端点集中管理在 core/client/endpoints.ts | diff --git a/packages/cli/src/commands/advisor/recommend.ts b/packages/cli/src/commands/advisor/recommend.ts index 46ee0a8..121c15a 100644 --- a/packages/cli/src/commands/advisor/recommend.ts +++ b/packages/cli/src/commands/advisor/recommend.ts @@ -29,41 +29,41 @@ function formatContextWindow(tokens: number): string { } const MODALITY_LABELS: Record = { - Text: "文本", - Image: "图片", - Video: "视频", - Audio: "音频", + Text: "Text", + Image: "Image", + Video: "Video", + Audio: "Audio", }; const CAPABILITY_LABELS: Record = { - TG: "文本生成", - VU: "视觉理解", - IG: "图像生成", - VG: "视频生成", - TTS: "语音合成", - ASR: "语音识别", - Reasoning: "推理", + TG: "Text Gen", + VU: "Vision", + IG: "Image Gen", + VG: "Video Gen", + TTS: "Text-to-Speech", + ASR: "Speech-to-Text", + Reasoning: "Reasoning", }; const BUDGET_LABELS: Record = { - low: "低成本优先", - medium: "适中", - high: "高投入", + low: "Cost-Effective", + medium: "Balanced", + high: "High Investment", }; const QUALITY_LABELS: Record = { - flagship: "旗舰优先", - balanced: "均衡", - "cost-optimized": "性价比优先", + flagship: "Flagship", + balanced: "Balanced", + "cost-optimized": "Value", }; const PREFERENCE_MODE_LABELS: Record = { - scoped: "限定范围", - comparison: "对比评估", - alternative: "替代推荐", + scoped: "Scoped", + comparison: "Comparison", + alternative: "Alternative", }; function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { const colorize = noColor ? new Chalk({ level: 0 }) : chalk; const lines: string[] = []; - lines.push(colorize.cyan.bold("需求理解")); + lines.push(colorize.cyan.bold("Intent Analysis")); if (intent.taskSummary) { lines.push(""); @@ -72,7 +72,7 @@ function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { if (intent.scenarioHints.length) { lines.push(""); - lines.push(`${colorize.dim("场景特征")} ${intent.scenarioHints.join(" · ")}`); + lines.push(`${colorize.dim("Scenario")} ${intent.scenarioHints.join(" · ")}`); } const inputLabels = intent.inputModality.map((mod) => MODALITY_LABELS[mod] ?? mod); @@ -80,40 +80,40 @@ function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { if (inputLabels.length || outputLabels.length) { lines.push(""); const parts: string[] = []; - if (inputLabels.length) parts.push(`${colorize.dim("输入")} ${inputLabels.join(", ")}`); - if (outputLabels.length) parts.push(`${colorize.dim("输出")} ${outputLabels.join(", ")}`); + if (inputLabels.length) parts.push(`${colorize.dim("Input")} ${inputLabels.join(", ")}`); + if (outputLabels.length) parts.push(`${colorize.dim("Output")} ${outputLabels.join(", ")}`); lines.push(parts.join(" ")); } const capLabels = intent.requiredCapabilities.map((cap) => CAPABILITY_LABELS[cap] ?? cap); if (capLabels.length) { - lines.push(`${colorize.dim("所需能力")} ${capLabels.join(", ")}`); + lines.push(`${colorize.dim("Capabilities")} ${capLabels.join(", ")}`); } const budgetLabel = BUDGET_LABELS[intent.budget] ?? intent.budget; const qualityLabel = QUALITY_LABELS[intent.qualityPreference] ?? intent.qualityPreference; lines.push(""); lines.push( - `${colorize.dim("预算倾向")} ${budgetLabel} ${colorize.dim("质量偏好")} ${qualityLabel}`, + `${colorize.dim("Budget")} ${budgetLabel} ${colorize.dim("Quality")} ${qualityLabel}`, ); const preference = intent.modelPreference; if (preference && preference.mode !== "unconstrained") { lines.push(""); const modeLabel = PREFERENCE_MODE_LABELS[preference.mode] ?? preference.mode; - const prefParts = [colorize.dim("推荐模式") + ` ${colorize.yellow(modeLabel)}`]; + const prefParts = [colorize.dim("Mode") + ` ${colorize.yellow(modeLabel)}`]; if (preference.targets?.length) { - prefParts.push(colorize.dim("目标") + ` ${preference.targets.join(", ")}`); + prefParts.push(colorize.dim("Targets") + ` ${preference.targets.join(", ")}`); } if (preference.excludes?.length) { - prefParts.push(colorize.dim("排除") + ` ${preference.excludes.join(", ")}`); + prefParts.push(colorize.dim("Excludes") + ` ${preference.excludes.join(", ")}`); } lines.push(prefParts.join(" ")); } if (intent.segments?.length) { lines.push(""); - lines.push(colorize.dim("任务拆解")); + lines.push(colorize.dim("Pipeline")); for (const [idx, segment] of intent.segments.entries()) { const outMods = segment.outputModality.map((mod) => MODALITY_LABELS[mod] ?? mod).join(", "); lines.push( @@ -131,19 +131,19 @@ function formatIntentSummary(intent: IntentProfile, noColor: boolean): string { }); } -const RECOMMEND_LABELS = ["最佳推荐", "次优选择", "备选参考"]; +const RECOMMEND_LABELS = ["Best Pick", "Runner-Up", "Alternative"]; function renderCard(rec: RecommendedModel, index: number, colorize: ChalkInstance): string { const labelColors = [colorize.green.bold, colorize.blue.bold, colorize.magenta.bold]; const colorFn = labelColors[index] ?? colorize.white.bold; - const label = RECOMMEND_LABELS[index] ?? `推荐 #${index + 1}`; + const label = RECOMMEND_LABELS[index] ?? `#${index + 1}`; const lines: string[] = []; - lines.push(colorFn(`⬢ 推荐 #${index + 1} — ${label}`)); + lines.push(colorFn(`⬢ #${index + 1} — ${label}`)); lines.push(""); lines.push(`${colorize.bold(rec.name)} ${colorize.dim(`(${rec.model})`)}`); lines.push(""); - lines.push(`${colorize.cyan("推荐理由")} ${rec.reason}`); + lines.push(`${colorize.cyan("Why")} ${rec.reason}`); if (rec.highlights.length) { lines.push(""); @@ -153,8 +153,8 @@ function renderCard(rec: RecommendedModel, index: number, colorize: ChalkInstanc } const meta: string[] = []; - if (rec.contextWindow) meta.push(`上下文 ${formatContextWindow(rec.contextWindow)}`); - if (rec.maxOutputTokens) meta.push(`最大输出 ${formatContextWindow(rec.maxOutputTokens)}`); + if (rec.contextWindow) meta.push(`Context ${formatContextWindow(rec.contextWindow)}`); + if (rec.maxOutputTokens) meta.push(`Max Output ${formatContextWindow(rec.maxOutputTokens)}`); if (meta.length) { lines.push(""); lines.push(colorize.dim(meta.join(" · "))); @@ -163,7 +163,7 @@ function renderCard(rec: RecommendedModel, index: number, colorize: ChalkInstanc const docLink = buildDocLink(rec.docUrl); if (docLink) { lines.push(""); - lines.push(colorize.dim(`文档 ${docLink}`)); + lines.push(colorize.dim(`Docs ${docLink}`)); } return boxen(lines.join("\n"), { @@ -183,7 +183,7 @@ function formatSingleResult(results: RecommendedModel[], noColor: boolean): stri function formatPipelineResult(summary: string, steps: PipelineStep[], noColor: boolean): string { const colorize = noColor ? new Chalk({ level: 0 }) : chalk; const lines: string[] = []; - lines.push(` ${colorize.yellow.bold("⚡ 组合方案")} ${summary}`); + lines.push(` ${colorize.yellow.bold("⚡ Pipeline")} ${summary}`); for (const [stepIdx, { step, recommendations, warnings }] of steps.entries()) { lines.push(""); @@ -247,14 +247,14 @@ export default defineCommand({ if (!userInput.trim()) { if (isInteractive({ nonInteractive: config.nonInteractive })) { - const hint = await promptText({ message: "描述你的需求:" }); + const hint = await promptText({ message: "Describe your requirement:" }); if (!hint) { - process.stderr.write("已取消。\n"); + process.stderr.write("Cancelled.\n"); process.exit(1); } userInput = hint; } else { - failIfMissing("message", 'bl advisor recommend "你的需求"'); + failIfMissing("message", 'bl advisor recommend "your requirement"'); } } @@ -262,16 +262,16 @@ export default defineCommand({ const format = detectOutputFormat(config.output); const modelsOptions: GetModelsOptions = { - onPrepareStart: () => process.stderr.write("初始化中...\n"), + onPrepareStart: () => process.stderr.write("Initializing model data...\n"), }; - process.stderr.write("正在分析需求...\n"); + process.stderr.write("Analyzing your request...\n"); const [allModels, intent] = await Promise.all([ getModels(config, modelsOptions), analyzeIntent(config, userInput), ]); if (intent.confidence === 0) { - process.stderr.write("需求分析超时,使用默认参数继续...\n"); + process.stderr.write("Intent analysis timed out, using defaults...\n"); } else { process.stderr.write("\n"); } @@ -297,7 +297,7 @@ export default defineCommand({ } // Stage 3: LLM Ranking - const spinner = createSpinner("正在推荐最佳模型..."); + const spinner = createSpinner("Recommending best models..."); spinner.start(); const result = await rankModels(config, candidates, intent, userInput, top); @@ -305,12 +305,31 @@ export default defineCommand({ spinner.stop(); if (isEmptyResult(result)) { - emitBare("暂无满足该需求的模型。"); + emitBare("No suitable models found for this request."); return; } if (format !== "text") { - emitResult(result, format); + emitResult( + { + intent: { + taskSummary: intent.taskSummary, + scenarioHints: intent.scenarioHints, + complexity: intent.complexity, + inputModality: intent.inputModality, + outputModality: intent.outputModality, + requiredCapabilities: intent.requiredCapabilities, + budget: intent.budget, + qualityPreference: intent.qualityPreference, + modelPreference: + intent.modelPreference?.mode !== "unconstrained" ? intent.modelPreference : undefined, + segments: intent.segments, + }, + result, + candidates: candidates.length, + }, + format, + ); return; } diff --git a/packages/cli/src/commands/quota/history.ts b/packages/cli/src/commands/quota/history.ts index 703e77b..f3035db 100644 --- a/packages/cli/src/commands/quota/history.ts +++ b/packages/cli/src/commands/quota/history.ts @@ -159,11 +159,6 @@ export default defineCommand({ throw err; } - if (format === "json") { - emitResult(result, format); - return; - } - const resp = extractResponseData(result as Record); let records = (resp.records as LimitApplicationItem[]) ?? []; const total = (resp.items as number) ?? records.length; @@ -172,6 +167,16 @@ export default defineCommand({ records = records.filter((r) => r.deployedModel === modelFilter); } + if (format === "json") { + const items = records.map((r) => ({ + model: r.deployedModel, + tokenLimit: r.usageLimit, + appliedAt: formatDateTime(r.gmtCreate), + })); + emitResult({ records: items, total: modelFilter ? records.length : total }, format); + return; + } + if (records.length === 0) { process.stdout.write("No quota change history found.\n"); return; diff --git a/packages/cli/src/commands/quota/list.ts b/packages/cli/src/commands/quota/list.ts index f448d2d..e0eb7c4 100644 --- a/packages/cli/src/commands/quota/list.ts +++ b/packages/cli/src/commands/quota/list.ts @@ -218,7 +218,25 @@ export default defineCommand({ } if (format === "json") { - emitResult(models, format); + const items = models.map((m) => { + const qpm = m.qpmInfo; + const modelDefault = qpm?.["model-default"]; + const userSpec = qpm?.["user-spec"]; + + const defaultRPM = calculateRPM(modelDefault); + const defaultTPM = calculateTPM(modelDefault); + const currentRPM = calculateRPM(userSpec, modelDefault?.count_limit_period) || defaultRPM; + const currentTPM = calculateTPM(userSpec, modelDefault?.usage_limit_period) || defaultTPM; + const maxTPM = defaultTPM * 2; + + return { + model: m.model, + rpm: currentRPM > 0 ? currentRPM : null, + tpm: currentTPM > 0 ? currentTPM : null, + maxTPM: maxTPM > 0 ? maxTPM : null, + }; + }); + emitResult(items, format); return; } diff --git a/packages/cli/src/commands/usage/free.ts b/packages/cli/src/commands/usage/free.ts index bdc8837..b0d63b5 100644 --- a/packages/cli/src/commands/usage/free.ts +++ b/packages/cli/src/commands/usage/free.ts @@ -297,11 +297,6 @@ export default defineCommand({ }), ]); - if (format === "json") { - emitResult(quotaResult, format); - return; - } - const allQuotas = extractQuotas(quotaResult); let quotas = modelFlag ? allQuotas @@ -322,14 +317,44 @@ export default defineCommand({ quotas.sort((a, b) => (a.quotaValidityPeriod ?? 0) - (b.quotaValidityPeriod ?? 0)); } + const stopStatuses = extractFreeTierOnlyStatuses(stopResult); + const stopMap = new Map(stopStatuses.map((status) => [status.model, status.freeTierOnly])); + + if (format === "json") { + const items = quotas.map((quota) => { + const hasQuota = quota.quotaInitTotal != null && quota.quotaTotal != null; + const used = hasQuota ? quota.quotaInitTotal - quota.quotaTotal : 0; + const stopStatus = stopMap.get(quota.model); + const autoStop = + quota.quotaStatus === "UNKNOWN" + ? "unsupported" + : stopStatus === true + ? true + : stopStatus === false + ? false + : null; + return { + model: quota.model, + type: typeMap.get(quota.model) || null, + remaining: hasQuota ? quota.quotaTotal : null, + total: hasQuota ? quota.quotaInitTotal : null, + usagePercent: + hasQuota && quota.quotaInitTotal > 0 + ? Math.round((used / quota.quotaInitTotal) * 1000) / 10 + : null, + expires: quota.quotaValidityPeriod ? formatDate(quota.quotaValidityPeriod) : null, + autoStop, + }; + }); + emitResult(items, format); + return; + } + if (quotas.length === 0) { process.stdout.write("No free-tier quota found.\n"); return; } - const stopStatuses = extractFreeTierOnlyStatuses(stopResult); - const stopMap = new Map(stopStatuses.map((status) => [status.model, status.freeTierOnly])); - printTable(quotas, stopMap, typeMap, config.noColor); }, }); diff --git a/packages/cli/src/commands/usage/stats.ts b/packages/cli/src/commands/usage/stats.ts index 903b2fc..db3e9fa 100644 --- a/packages/cli/src/commands/usage/stats.ts +++ b/packages/cli/src/commands/usage/stats.ts @@ -375,16 +375,31 @@ export default defineCommand({ ); const allItems: ModelStatisticItem[] = []; - const jsonResults: unknown[] = []; for (const result of results) { if (!result) continue; - jsonResults.push(result); const listData = extractListData(result); allItems.push(...listData.list); } if (format === "json") { - emitResult(jsonResults.length === 1 ? jsonResults[0] : jsonResults, format); + const items = allItems.map((item) => { + const usage = resolveUsageMap(item); + const clean: Record = { + model: item.model, + successfulCalls: item.callSuccessCount ?? 0, + }; + for (const [key, val] of Object.entries(usage)) { + clean[key] = val; + } + return clean; + }); + emitResult( + { + period: { start: formatDate(startTime), end: formatDate(endTime), days: daysFlag }, + items, + }, + format, + ); return; } @@ -411,12 +426,37 @@ export default defineCommand({ process.exit(1); } + const stat = extractOverviewData(result); + if (format === "json") { - emitResult(result, format); + if (!stat) { + emitResult( + { + period: { start: formatDate(startTime), end: formatDate(endTime), days: daysFlag }, + modelsCalled: 0, + successfulCalls: 0, + }, + format, + ); + return; + } + emitResult( + { + period: { start: formatDate(startTime), end: formatDate(endTime), days: daysFlag }, + modelsCalled: stat.modelCount ?? 0, + successfulCalls: stat.callSuccessCount ?? 0, + usages: (stat.usages ?? []).map((u) => ({ + key: u.key, + value: u.value, + unit: u.unit, + label: USAGE_KEY_LABELS[u.key]?.en ?? u.key, + })), + }, + format, + ); return; } - const stat = extractOverviewData(result); if (!stat) { process.stdout.write("No usage data found.\n"); return; diff --git a/packages/cli/src/commands/workspace/list.ts b/packages/cli/src/commands/workspace/list.ts index a944151..8df978d 100644 --- a/packages/cli/src/commands/workspace/list.ts +++ b/packages/cli/src/commands/workspace/list.ts @@ -113,21 +113,30 @@ export default defineCommand({ data: {}, }); - if (format === "json") { - emitResult(result, format); - return; - } - const resp = extractResponseData(result as Record); const dataArr = resp.data as Record[] | undefined; if (!Array.isArray(dataArr) || dataArr.length === 0) { - process.stdout.write("No workspace found.\n"); + if (format === "json") { + emitResult([], format); + } else { + process.stdout.write("No workspace found.\n"); + } return; } let workspaces = dataArr as unknown as WorkspaceInfo[]; if (limit > 0) workspaces = workspaces.slice(0, limit); + if (format === "json") { + const items = workspaces.map((ws) => ({ + workspaceId: ws.workspaceId, + name: ws.agentName, + default: ws.defaultAgent, + })); + emitResult(items, format); + return; + } + printTable(workspaces, config.noColor); }, }); diff --git a/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts b/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts index cac6328..7f6724e 100644 --- a/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts +++ b/packages/cli/tests/e2e/advisor-recommend.e2e.test.ts @@ -2,21 +2,21 @@ import { describe, expect, test } from "vite-plus/test"; import { isDashScopeE2EReady, parseStdoutJson, runCli } from "./helpers.ts"; describe("e2e: advisor recommend", () => { - test("advisor 分组展示子命令帮助且成功退出", async () => { + test("advisor shows subcommand groups and exits successfully", async () => { const { stdout, stderr, exitCode } = await runCli(["advisor"]); expect(exitCode, stderr).toBe(0); expect(`${stdout}\n${stderr}`).toMatch(/advisor|recommend/i); }); - test("advisor recommend --help 正常退出", async () => { + test("advisor recommend --help exits successfully", async () => { const { stderr, exitCode } = await runCli(["advisor", "recommend", "--help"]); expect(exitCode, stderr).toBe(0); expect(stderr).toMatch(/recommend|--message|dry-run/i); }); }); -describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", () => { - test("advisor recommend 缺少 --message 时打印帮助并退出 (0)", async () => { +describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend (DashScope)", () => { + test("advisor recommend without --message prints help and exits", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", @@ -26,13 +26,13 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(`${stdout}\n${stderr}`).toMatch(/--message|Usage:/i); }); - test("advisor recommend --dry-run 输出意图分析和候选列表", async () => { + test("advisor recommend --dry-run outputs intent analysis and candidates", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "我想做一个能理解图片的客服机器人", + "I want to build a customer service bot that understands images", "--non-interactive", "--output", "json", @@ -44,7 +44,7 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", candidateCount?: number; candidates?: Array<{ model?: string; score?: number }>; }>(stdout); - expect(data.userInput).toBe("我想做一个能理解图片的客服机器人"); + expect(data.userInput).toBe("I want to build a customer service bot that understands images"); expect(data.intent?.requiredCapabilities).toContain("VU"); expect(data.intent?.inputModality).toContain("Image"); expect(data.candidateCount).toBeGreaterThan(0); @@ -52,40 +52,44 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(data.candidates?.[0]?.score).toBeGreaterThan(0); }, 60_000); - test("advisor recommend 完整推荐流程返回结果", async () => { + test("advisor recommend full flow returns results", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--message", - "低成本高并发的在线客服", + "low-cost high-concurrency online customer service", "--non-interactive", "--output", "json", ]); expect(exitCode, stderr).toBe(0); const data = parseStdoutJson<{ - type?: string; - recommendations?: Array<{ - model?: string; - name?: string; - reason?: string; - }>; + intent?: { taskSummary?: string }; + result?: { + type?: string; + recommendations?: Array<{ + model?: string; + name?: string; + reason?: string; + }>; + }; + candidates?: number; }>(stdout); - expect(data.type).toBe("single"); - expect(data.recommendations?.length).toBeGreaterThan(0); - expect(data.recommendations?.[0]?.model).toBeDefined(); - expect(data.recommendations?.[0]?.reason).toBeDefined(); + expect(data.result?.type).toBe("single"); + expect(data.result?.recommendations?.length).toBeGreaterThan(0); + expect(data.result?.recommendations?.[0]?.model).toBeDefined(); + expect(data.result?.recommendations?.[0]?.reason).toBeDefined(); }, 120_000); - // ---- 模型偏好:正例 ---- + // ---- Model preference: positive cases ---- - test("scoped 偏好 — 限定系列时 intent 含 modelPreference.mode=scoped", async () => { + test("scoped preference — intent contains modelPreference.mode=scoped when family is specified", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "deepseek系列中哪个模型最适合用来进行快速推理", + "Which model in the deepseek family is best for fast reasoning?", "--non-interactive", "--output", "json", @@ -103,13 +107,13 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", ).toBe(true); }, 60_000); - test("comparison 偏好 — 对比模型时 intent 含 modelPreference.mode=comparison", async () => { + test("comparison preference — intent contains modelPreference.mode=comparison when comparing models", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "qwen-max和deepseek-v3哪个更适合做代码生成", + "Which is better for code generation, qwen-max or deepseek-v3?", "--non-interactive", "--output", "json", @@ -122,13 +126,13 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(data.intent?.modelPreference?.targets?.length).toBeGreaterThanOrEqual(2); }, 60_000); - test("excludes 偏好 — 排除模型时 intent 识别出 modelPreference", async () => { + test("excludes preference — intent detects modelPreference when excluding models", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "不要qwen,推荐一个适合文本生成的模型", + "Not qwen, recommend a model suitable for text generation", "--non-interactive", "--output", "json", @@ -147,15 +151,15 @@ describe.skipIf(!isDashScopeE2EReady())("e2e: advisor recommend(DashScope)", expect(hasExcludes).toBe(true); }, 60_000); - // ---- 模型偏好:反例 ---- + // ---- Model preference: negative cases ---- - test("无偏好 — 普通需求查询时 intent 不含 modelPreference 或 mode=unconstrained", async () => { + test("no preference — intent has no modelPreference or mode=unconstrained for generic queries", async () => { const { stdout, stderr, exitCode } = await runCli([ "advisor", "recommend", "--dry-run", "--message", - "我要做一个能理解图片的客服机器人", + "I want to build a customer service bot that understands images", "--non-interactive", "--output", "json", diff --git a/packages/cli/tests/e2e/quota.e2e.test.ts b/packages/cli/tests/e2e/quota.e2e.test.ts index f12e6de..e2d3f6b 100644 --- a/packages/cli/tests/e2e/quota.e2e.test.ts +++ b/packages/cli/tests/e2e/quota.e2e.test.ts @@ -139,14 +139,19 @@ describe.skipIf(!isConsoleE2EReady())("e2e: quota(Console)", () => { expect(stderr).toContain("no matching models found"); }); - test("quota list JSON 输出包含 qpmInfo", async () => { + test("quota list JSON 输出包含 model/rpm/tpm/maxTPM", async () => { const { stdout, stderr, exitCode } = await runCli(["quota", "list", "--output", "json"]); expect(exitCode, stderr).toBe(0); - const data = parseStdoutJson>(stdout); + const data = + parseStdoutJson< + Array<{ model?: string; rpm?: number | null; tpm?: number | null; maxTPM?: number | null }> + >(stdout); expect(Array.isArray(data)).toBe(true); expect(data.length).toBeGreaterThan(0); expect(data[0].model).toBeTypeOf("string"); - expect(data[0].qpmInfo).toBeDefined(); + expect(data[0].rpm).toBeTypeOf("number"); + expect(data[0].tpm).toBeTypeOf("number"); + expect(data[0].maxTPM).toBeTypeOf("number"); }); test("quota request --dry-run 输出请求参数", async () => { diff --git a/packages/cli/tests/e2e/usage-free.e2e.test.ts b/packages/cli/tests/e2e/usage-free.e2e.test.ts index 589239b..4de0bd6 100644 --- a/packages/cli/tests/e2e/usage-free.e2e.test.ts +++ b/packages/cli/tests/e2e/usage-free.e2e.test.ts @@ -133,12 +133,21 @@ describe.skipIf(!isConsoleE2EReady())("e2e: usage free(Console)", () => { "json", ]); expect(exitCode, stderr).toBe(0); - const data = parseStdoutJson<{ - code?: string; - successResponse?: boolean; - }>(stdout); - expect(data.code).toBe("200"); - expect(data.successResponse).toBe(true); + const data = parseStdoutJson< + Array<{ + model?: string; + type?: string | null; + remaining?: number | null; + total?: number | null; + usagePercent?: number | null; + expires?: string | null; + autoStop?: boolean | string | null; + }> + >(stdout); + expect(Array.isArray(data)).toBe(true); + expect(data.length).toBeGreaterThan(0); + expect(data[0].model).toBe("qwen3-max"); + expect(data[0].type).toBeTypeOf("string"); }); test("usage free --model 单模型文本输出包含表头", async () => { @@ -276,7 +285,9 @@ describe.skipIf(!isConsoleE2EReady())("e2e: usage free(Console)", () => { "json", ]); expect(exitCode, stderr).toBe(0); - const data = parseStdoutJson<{ code?: string }>(stdout); - expect(data.code).toBe("200"); + const data = parseStdoutJson>(stdout); + expect(Array.isArray(data)).toBe(true); + expect(data.length).toBeGreaterThan(0); + expect(data[0].model).toBe("qwen3-max"); }); }); diff --git a/packages/cli/tests/e2e/usage-stats.e2e.test.ts b/packages/cli/tests/e2e/usage-stats.e2e.test.ts index 2e2ac89..ceab4b6 100644 --- a/packages/cli/tests/e2e/usage-stats.e2e.test.ts +++ b/packages/cli/tests/e2e/usage-stats.e2e.test.ts @@ -169,11 +169,17 @@ describe.skipIf(!isConsoleE2EReady())("e2e: usage stats(Console)", () => { ]); expect(exitCode, stderr).toBe(0); const data = parseStdoutJson<{ - code?: string; - successResponse?: boolean; + period?: { start?: string; end?: string; days?: number }; + modelsCalled?: number; + successfulCalls?: number; + usages?: Array<{ key?: string; value?: number }>; }>(stdout); - expect(data.code).toBe("200"); - expect(data.successResponse).toBe(true); + expect(data.period).toBeDefined(); + expect(data.period?.start).toBeTypeOf("string"); + expect(data.period?.end).toBeTypeOf("string"); + expect(data.period?.days).toBeTypeOf("number"); + expect(data.modelsCalled).toBeTypeOf("number"); + expect(data.successfulCalls).toBeTypeOf("number"); }); test("usage stats 概览文本输出包含英文标签", async () => { diff --git a/packages/core/src/advisor/constants/prompts.ts b/packages/core/src/advisor/constants/prompts.ts index 9d2f04d..2930f92 100644 --- a/packages/core/src/advisor/constants/prompts.ts +++ b/packages/core/src/advisor/constants/prompts.ts @@ -1,196 +1,181 @@ -export const INTENT_MODEL = "qwen-turbo"; +export const INTENT_MODEL = "qwen-flash"; export const RANKING_MODEL = "qwen3.6-flash"; -export const RANKING_MODEL_FAST = "qwen-turbo"; - -export const INTENT_SYSTEM_PROMPT = `你是一个意图分析器。根据用户的需求描述,先理解用户场景,再提取结构化信息。 - -## 分析步骤 -1. 用一句话总结用户的核心需求(taskSummary),要体现具体场景而非泛泛描述 -2. 推断场景特征(scenarioHints),例如:["需要低延迟","面向C端用户","高并发","对话式交互","离线批处理","需要精准度"] -3. 基于场景特征推断 budget 和 qualityPreference - - 只在用户明确表达或场景强烈暗示时偏离默认值 - - 用户明确说"低成本"、"便宜"、"省钱" → budget:"low" - - 用户明确说"最好的"、"高精度"、"不计成本" → qualityPreference:"flagship" - - 场景本身有强约束时才推断:如"日均百万请求的客服" → budget:"low"(高并发=成本敏感) - - 其他情况保持 budget:"medium", qualityPreference:"balanced" -4. 提取模态、能力、特性等结构化字段 - -## 示例 - -用户: "做一个低成本高并发的在线客服" -→ budget:"low", qualityPreference:"cost-optimized"(用户明确说了低成本) - -用户: "法律合同审查,要求高精准度" -→ budget:"medium", qualityPreference:"flagship"(用户明确要求高精准度,但没提预算) - -用户: "我要做一个能理解图片的客服机器人" -→ budget:"medium", qualityPreference:"balanced"(用户没提成本和质量要求,不过度推断) - -用户: "帮我选一个写代码的模型" -→ budget:"medium", qualityPreference:"balanced"(通用需求,无明确倾向) - -用户: "预算有限,做个简单的文本摘要功能" -→ budget:"low", qualityPreference:"cost-optimized"(用户说了预算有限) - -用户: "企业级知识库问答,准确率是第一优先级" -→ budget:"high", qualityPreference:"flagship"(企业级+准确率第一=愿投入高成本) - -用户: "个人学习项目,试试AI生成图片" -→ budget:"low", qualityPreference:"cost-optimized"(个人学习=成本敏感) - -用户: "做一个Agent自动根据用户意图生成动画片" -→ budget:"medium", qualityPreference:"balanced"(复杂pipeline,但没明确成本/质量约束) - -## 模型偏好识别 -分析用户是否提到了特定的模型、模型系列或厂商,据此判断推荐模式: -- 用户未提到任何模型/系列/厂商 → mode:"unconstrained",不填 targets -- 用户限定了范围(如"deepseek系列哪个好"、"通义千问的模型推荐"、"开源的推理模型") → mode:"scoped",targets:["deepseek"] 或 ["通义千问"] -- 用户要对比特定模型(如"wan2.6和wan2.7哪个好"、"qwen-max和deepseek-v3对比"、"qwen-max适合做法律分析吗") → mode:"comparison",targets:["wan2.6","wan2.7"] - - 单模型评估也算 comparison,targets 只填一个 -- 用户以某模型为参照找替代(如"有没有类似qwen-max但更便宜的") → mode:"alternative",targets:["qwen-max"] -- 用户明确排除某些模型/系列(如"除了qwen还有什么好的") → excludes:["qwen"],mode 根据其他条件判断 -- targets 填写用户原文中的模型/系列名称,保持原文写法 - -## 输出字段 -- taskSummary: 一句话场景理解(必须具体,禁止"用户想用AI做某事"这种废话) -- scenarioHints: 推断的场景特征数组 -- complexity: "single"(单一模型可完成)或 "pipeline"(需要多个模型协同) -- segments: 仅 pipeline 时填写,每步包含 step/inputModality/outputModality/requiredCapabilities。 - - step 必须是一句话描述该步骤在用户任务中解决的具体问题,例如"解析天气预报数据,生成适合视频制作的场景描述文本",禁止用编号或泛化的模态标签 - - segments 必须形成模态链路:每步的 inputModality 应包含上一步的 outputModality,确保上下游数据可以衔接 -- inputModality: 用户输入涉及的模态 ["Text","Image","Video","Audio"] -- outputModality: 期望输出的模态 -- requiredCapabilities: 需要的能力。可选代码(必须严格使用,不要自创): - TG=文本生成, Reasoning=推理, VU=视觉理解, IG=图像生成, VG=视频生成, - TTS=语音合成, ASR=语音识别, Realtime-ASR=实时语音识别, - Realtime-Text-to-Speech=实时语音合成, Realtime-Audio-Translate=实时音频翻译, - Realtime-Omni=实时全模态, Multimodal-Omni=全模态, ME=多模态嵌入, - TR=翻译, 3D-generation=3D生成 -- requiredFeatures: 需要的特性 (function-calling, web-search, structured-outputs, prefix-completion) -- budget: "low"/"medium"/"high"(基于场景推断,不要默认 medium) +export const RANKING_MODEL_FAST = "qwen-flash"; + +export const INTENT_SYSTEM_PROMPT = `You are an intent analyzer. Given the user's requirement, understand the scenario first, then extract structured information. + +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. All text fields (taskSummary, scenarioHints) must be in English. + +## Analysis Steps +1. Summarize the user's core need in one sentence (taskSummary) — be specific about the scenario, not generic +2. Infer scenario hints (scenarioHints), e.g.: ["low-latency", "consumer-facing", "high-concurrency", "conversational", "offline-batch", "high-precision"] +3. Infer budget and qualityPreference from scenario hints + - Only deviate from defaults when the user explicitly states or the scenario strongly implies + - User says "low cost", "cheap", "save money" → budget:"low" + - User says "best", "high precision", "cost no object" → qualityPreference:"flagship" + - Infer from scenario constraints only when strong: e.g. "1M requests/day customer service" → budget:"low" (high concurrency = cost-sensitive) + - Otherwise keep budget:"medium", qualityPreference:"balanced" +4. Extract modalities, capabilities, features etc. + +## Model preference detection +Analyze whether the user mentioned specific models, model families, or vendors: +- No models/families/vendors mentioned → mode:"unconstrained", no targets +- User scoped the range (e.g. "recommend from the deepseek family", "open-source reasoning models") → mode:"scoped", targets:["deepseek"] +- User wants to compare specific models (e.g. "compare wan2.6 and wan2.7", "is qwen-max good for legal analysis") → mode:"comparison", targets:["wan2.6","wan2.7"] + - Single model evaluation is also comparison with one target +- User wants alternatives to a reference model (e.g. "something like qwen-max but cheaper") → mode:"alternative", targets:["qwen-max"] +- User explicitly excludes certain models/families (e.g. "good models besides qwen") → excludes:["qwen"], mode determined by other signals +- targets should capture the model/family names as the user wrote them + +## Output fields +- taskSummary: one-sentence scenario understanding (must be specific, never generic like "user wants AI") +- scenarioHints: array of inferred scenario features +- complexity: "single" or "pipeline" +- segments: only for pipeline, each with step/inputModality/outputModality/requiredCapabilities + - step must describe the specific problem this step solves in the user's task, no numbered or generic modal labels + - segments must form a modality chain: each step's inputModality should cover the previous step's outputModality +- inputModality: user input modalities ["Text","Image","Video","Audio"] +- outputModality: expected output modalities +- requiredCapabilities: capability codes (use strictly from the list, don't invent): + TG=Text Generation, Reasoning=Reasoning, VU=Vision Understanding, IG=Image Generation, VG=Video Generation, + TTS=Text-to-Speech, ASR=Speech-to-Text, Realtime-ASR=Realtime Speech-to-Text, + Realtime-Text-to-Speech=Realtime Text-to-Speech, Realtime-Audio-Translate=Realtime Audio Translation, + Realtime-Omni=Realtime Omni-modal, Multimodal-Omni=Multimodal Omni, ME=Multimodal Embedding, + TR=Translation, 3D-generation=3D Generation +- requiredFeatures: required features (function-calling, web-search, structured-outputs, prefix-completion) +- budget: "low"/"medium"/"high" - contextNeed: "standard"/"large"/"extra-large" -- qualityPreference: "flagship"/"balanced"/"cost-optimized"(基于场景推断,不要默认 balanced) -- modelPreference: { mode, targets?, excludes? }(见上方"模型偏好识别") +- qualityPreference: "flagship"/"balanced"/"cost-optimized" +- modelPreference: { mode, targets?, excludes? } -只输出 JSON,不要有其他文字。`; +Output only JSON, no other text.`; -export const SINGLE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型推荐顾问。从以下候选模型中选出最佳推荐。 +export const SINGLE_SYSTEM_PROMPT = `You are a model recommendation advisor for Alibaba Cloud Model Studio. From the candidate models below, select the best recommendations. -## 背景 -系统已根据用户意图预筛选了候选模型,你只需从中精选并排序。 -意图分析中包含 budget 和 qualityPreference 字段,这代表了用户的实际需求层次。 +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights, step, summary — must be written in English. -## 推荐策略 +## Background +The system has pre-filtered candidate models based on intent analysis. Your job is to rank and pick from these candidates. +The intent includes budget and qualityPreference fields representing the user's actual needs. -推荐 3 个不同档次的模型,但排序必须反映用户的真实需求: +## Recommendation Strategy -- 推荐 #1(最佳推荐):根据 budget 和 qualityPreference 判断哪个档次最适合用户,把那个档次的最佳模型放在第一位 -- 推荐 #2(次优选择):另一个档次中值得考虑的模型,说明与 #1 相比的 tradeoff -- 推荐 #3(备选参考):第三个视角的选择,说明适用场景差异 +Recommend 3 models at different tiers, but ordering must reflect the user's true needs: -关键原则: -- budget:"low" / qualityPreference:"cost-optimized" → 推荐 #1 应该是性价比最高的模型,而非旗舰模型 -- budget:"high" / qualityPreference:"flagship" → 推荐 #1 应该是能力最强的旗舰模型 -- budget:"medium" / qualityPreference:"balanced" → 推荐 #1 应该是综合匹配度最高的模型,不预设档次偏好 +- #1 (Best Pick): Based on budget and qualityPreference, pick the best-fitting tier and put its top model first +- #2 (Runner-Up): A worthy consideration from another tier, explaining tradeoffs vs #1 +- #3 (Alternative): A third-perspective choice, explaining scenario differences -每个推荐都必须说明该模型为什么适合(或作为备选为什么值得考虑),理由必须关联用户的具体需求。 +Key principles: +- budget:"low" / qualityPreference:"cost-optimized" → #1 should be the best value model, not a flagship +- budget:"high" / qualityPreference:"flagship" → #1 should be the most capable flagship model +- budget:"medium" / qualityPreference:"balanced" → #1 should be the best all-around match -## 规则 -- 只能推荐候选列表中的模型,严禁推荐列表外的模型 -- 严禁使用泛泛的推荐理由(如"性能强大"、"综合能力好"、"效果不错"),每条 reason 必须说明该模型解决用户任务中的什么具体问题 -- 三个推荐的理由不允许雷同,每个必须从不同维度论证 -- 有定价信息时:结合 budget 字段权衡,把最符合用户预算的放在最前面 -- 有家族信息时:避免推荐同一家族的多个模型,优先推荐稳定版本 -- 有版本标签时:优先推荐 stable/latest 版本,除非用户明确需要特定版本 -- 没有增强字段的模型:按能力和描述排序即可,不因缺少信息而降权 -- 如果没有合适的模型,返回空数组 -- 如果你认为该需求实际需要多模型协同完成(pipeline),可以输出 type:"pipeline" 格式 -- 输出严格 JSON,不要输出其他内容 +Each recommendation must explain why the model fits (or as an alternative, why it's worth considering), with reasoning tied to the user's specific needs. -## 输出格式 +## Rules +- Only recommend models from the candidate list — never recommend outside it +- No generic reasons ("powerful", "good performance", "effective"). Each reason must describe how the model solves a specific aspect of the user's task +- All three recommendations must have distinct reasoning angles, not duplicate reasons +- When pricing is available: factor in budget, put the most budget-friendly option first +- When family info is available: avoid recommending multiple models from the same family, prefer stable versions +- When version tags are available: prefer stable/latest versions unless the user explicitly needs a specific version +- Models without enriched fields: rank by capability and description — don't penalize for missing info +- If no model fits, return an empty array +- If you believe the task actually requires multi-model collaboration (pipeline), you may output type:"pipeline" format +- Output strict JSON, no other text -单一任务: -{"type":"single","recommendations":[{"model":"模型ID","reason":"推荐理由","highlights":["亮点"]}]} +## Output Format -复合任务(仅当你确信需要多模型协同时): -{"type":"pipeline","summary":"一句话方案描述","steps":[{"step":"步骤描述","recommendations":[{"model":"模型ID","reason":"选择理由","highlights":["亮点"]}]}]}`; +Single task: +{"type":"single","recommendations":[{"model":"model ID","reason":"recommendation reason","highlights":["key highlights"]}]} -export const PIPELINE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型推荐顾问。用户需求已被拆解为多步骤流水线,请为每步选出最佳模型。 +Pipeline (only when confident multi-model is needed): +{"type":"pipeline","summary":"one-line solution description","steps":[{"step":"step description","recommendations":[{"model":"model ID","reason":"reason for choosing","highlights":["highlights"]}]}]}`; -## 背景 -系统已根据各步骤需求预筛选了候选模型。 -意图分析中包含 budget 和 qualityPreference 字段,这代表了用户的实际需求层次。 +export const PIPELINE_SYSTEM_PROMPT = `You are a model recommendation advisor for Alibaba Cloud Model Studio. The user's need has been decomposed into multi-step pipeline. Select the best model for each step. -## 推荐策略 +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights, step, summary — must be written in English. -每步推荐 3 个不同档次的模型,但排序必须反映用户的真实需求: +## Background +The system has pre-filtered candidate models for each step's requirements. +The intent includes budget and qualityPreference fields representing the user's actual needs. -- 推荐 #1(最佳推荐):根据 budget 和 qualityPreference 判断哪个档次最适合用户,把那个档次的最佳模型放在第一位 -- 推荐 #2(次优选择):另一个档次中值得考虑的模型,说明 tradeoff -- 推荐 #3(备选参考):第三个视角的选择,说明适用场景差异 +## Recommendation Strategy -关键原则: -- budget:"low" / qualityPreference:"cost-optimized" → 推荐 #1 应该是性价比最高的模型 -- budget:"high" / qualityPreference:"flagship" → 推荐 #1 应该是能力最强的旗舰模型 -- budget:"medium" / qualityPreference:"balanced" → 推荐 #1 应该是综合匹配度最高的模型 +Recommend 3 models at different tiers per step, ordering by user needs: -## 规则 -- 只能推荐候选列表中的模型 -- 每步推荐多个模型,按优先级排序,每个推荐给出简短理由和关键亮点 -- step 字段必须用一句话描述该步骤在用户任务中解决的具体问题,禁止用编号或泛化的模态标签(如"输出: Text") -- 严禁使用泛泛的推荐理由,每条 reason 必须说明该模型在这一步解决用户任务中的什么具体问题 -- 有定价信息时:结合 budget 字段权衡,把最符合用户预算的放在最前面 -- 有家族信息时:避免在相邻步骤使用同一家族的不同规格模型,除非确实需要 -- 没有增强字段的模型:按能力和描述排序即可,不因缺少信息而降权 -- 相邻步骤的模型必须模态兼容:上一步模型的输出模态必须被下一步模型的输入模态支持 -- 如果你认为该需求其实单模型可以完成,可以输出 type:"single" 格式 -- 输出严格 JSON +- #1 (Best Pick): Based on budget and qualityPreference, pick the best-fitting tier and put its top model first +- #2 (Runner-Up): A worthy consideration from another tier, explaining tradeoffs +- #3 (Alternative): A third-perspective choice -## 输出格式 +Key principles: +- budget:"low" / qualityPreference:"cost-optimized" → #1 should be the best value model +- budget:"high" / qualityPreference:"flagship" → #1 should be the most capable flagship model +- budget:"medium" / qualityPreference:"balanced" → #1 should be the best all-around match -{"type":"pipeline","summary":"一句话方案描述","steps":[{"step":"该步骤在用户任务中解决的具体问题","recommendations":[{"model":"模型ID","reason":"该模型如何解决这一步的具体问题","highlights":["亮点"]}]}]} +## Rules +- Only recommend models from the candidate list +- Each step recommends multiple models sorted by priority, each with brief reason and key highlights +- The "step" field must describe the specific problem this step solves in the user's task — no numbered or generic modal labels (e.g. "Output: Text") +- No generic reasons. Each reason must describe how the model solves a specific aspect of the user's task at this step +- When pricing is available: factor in budget, put the most budget-friendly option first +- When family info is available: avoid using different tiers of the same family in adjacent steps unless truly needed +- Models without enriched fields: rank by capability and description — don't penalize for missing info +- Adjacent steps must be modality-compatible: the previous step's output modalities must be supported as input modalities by the next step +- If you believe the task can be done with a single model, output type:"single" format +- Output strict JSON -或者(如果你认为单模型即可): -{"type":"single","recommendations":[{"model":"模型ID","reason":"推荐理由","highlights":["亮点"]}]}`; +## Output Format -export const COMPARISON_SYSTEM_PROMPT = `你是阿里云百炼平台的模型对比顾问。用户想对比特定模型,请根据使用场景进行对比分析。 +{"type":"pipeline","summary":"one-line solution description","steps":[{"step":"specific problem this step solves in the user's task","recommendations":[{"model":"model ID","reason":"how this model solves the specific problem at this step","highlights":["highlights"]}]}]} -## 背景 -用户指定了要对比的模型,系统已将这些模型和相关候选预筛选到列表中。 -意图分析中的 modelPreference.targets 是用户要对比的模型。 +Or (if single model suffices): +{"type":"single","recommendations":[{"model":"model ID","reason":"recommendation reason","highlights": +["key highlights"]}]}`; -## 对比策略 -- 用户指定的模型必须全部出现在推荐结果中,按适合程度排序 -- 每个模型的 reason 必须是对比性的,说明该模型相对于其他对比模型的优势和劣势 -- 如果候选中有比用户指定的更合适的模型,可以额外推荐,但用户指定的必须优先包含 -- 单模型评估场景(targets 只有一个):评估该模型是否适合用户需求,同时推荐更优的替代 +export const COMPARISON_SYSTEM_PROMPT = `You are a model comparison advisor for Alibaba Cloud Model Studio. The user wants to compare specific models — analyze them against the use case. -## 规则 -- 只能推荐候选列表中的模型 -- reason 必须包含对比视角:该模型相比其他模型在哪些方面更好/更差 -- highlights 突出各模型的差异化特点 -- 输出严格 JSON,不要输出其他内容 +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights — must be written in English. -## 输出格式 -{"type":"single","recommendations":[{"model":"模型ID","reason":"对比分析理由","highlights":["差异化亮点"]}]}`; +## Background +The user specified models to compare. The system has pre-filtered these models and related candidates into the list. +The intent's modelPreference.targets are the models to compare. -export const ALTERNATIVE_SYSTEM_PROMPT = `你是阿里云百炼平台的模型替代顾问。用户以某个模型为参照,寻找替代方案。 +## Comparison Strategy +- All user-specified models must appear in the results, sorted by suitability +- Each model's reason must be comparative: describe strengths and weaknesses relative to other models being compared +- If candidates contain better fits than what the user specified, they can be additionally recommended, but user-specified models take priority +- Single-model evaluation (one target): evaluate if the model fits, and recommend better alternatives -## 背景 -用户以某个模型为参照点,想找到在特定维度上更优的替代方案(如更便宜、更快、更强)。 -意图分析中的 modelPreference.targets 是参照模型。 +## Rules +- Only recommend models from the candidate list +- reason must include comparative perspective: how this model is better/worse compared to others +- highlights should emphasize differentiating characteristics +- Output strict JSON -## 替代策略 -- 推荐 #1:如果参照模型在候选中,先评估它是否满足用户需求,给出其基本定位 -- 推荐 #2~#3:推荐替代方案,reason 必须说明相比参照模型在用户关注维度上的 tradeoff -- 关注用户提到的替代维度(如"更便宜"→重点对比定价,"更强"→重点对比能力) - -## 规则 -- 只能推荐候选列表中的模型 -- 参照模型必须包含在结果中(如果在候选列表中) -- 替代推荐的 reason 必须说明与参照模型的具体差异 -- 避免推荐和参照模型同系列的其他版本(除非确实有显著差异) -- 输出严格 JSON,不要输出其他内容 +## Output Format +{"type":"single","recommendations":[{"model":"model ID","reason":"comparative analysis","highlights":["differentiators"]}]}`; -## 输出格式 -{"type":"single","recommendations":[{"model":"模型ID","reason":"替代分析理由","highlights":["差异化亮点"]}]}`; +export const ALTERNATIVE_SYSTEM_PROMPT = `You are a model alternative advisor for Alibaba Cloud Model Studio. The user has a reference model and wants to find alternatives. + +CRITICAL: You MUST respond entirely in English. Do not use any Chinese characters anywhere in your response. Every field — reason, highlights — must be written in English. + +## Background +The user has a reference model and wants to find alternatives that are better in specific dimensions (cheaper, faster, more capable). +The intent's modelPreference.targets is the reference model. + +## Alternative Strategy +- #1: If the reference model is in candidates, first evaluate if it meets the user's needs — give its positioning +- #2~#3: Recommend alternatives. reason must explain the tradeoff vs the reference model in the user's dimensions of interest +- Focus on the user's stated alternative dimension (e.g. "cheaper" → focus on pricing comparison, "better" → focus on capability comparison) + +## Rules +- Only recommend models from the candidate list +- The reference model must be included in results if it's in the candidate list +- Alternative recommendations must explain concrete differences from the reference model +- Avoid recommending other versions from the same family unless there's a significant difference +- Output strict JSON + +## Output Format +{"type":"single","recommendations":[{"model":"model ID","reason":"alternative analysis","highlights":["differentiators"]}]}`; diff --git a/packages/core/src/advisor/embedding.ts b/packages/core/src/advisor/embedding.ts index fb9311e..d8fc620 100644 --- a/packages/core/src/advisor/embedding.ts +++ b/packages/core/src/advisor/embedding.ts @@ -76,20 +76,20 @@ async function embedBatch(config: Config, texts: string[]): Promise } const CAPABILITY_LABELS: Record = { - TG: "文本生成", - Reasoning: "推理", - VU: "视觉理解", - IG: "图像生成", - VG: "视频生成", - TTS: "语音合成", - ASR: "语音识别", + TG: "Text Generation", + Reasoning: "Reasoning", + VU: "Vision Understanding", + IG: "Image Generation", + VG: "Video Generation", + TTS: "Text-to-Speech", + ASR: "Speech-to-Text", }; const MODALITY_LABELS: Record = { - Text: "文本", - Image: "图片/图像", - Video: "视频", - Audio: "音频/语音", + Text: "Text", + Image: "Image", + Video: "Video", + Audio: "Audio", }; interface GroupData { @@ -135,12 +135,12 @@ function buildModelText(model: ModelProfile, descriptions: Map): model.name, model.model, description, - caps ? `能力: ${caps}` : "", - inputMods ? `输入: ${inputMods}` : "", - outputMods ? `输出: ${outputMods}` : "", - model.features?.length ? `特性: ${model.features.join(", ")}` : "", + caps ? `Capabilities: ${caps}` : "", + inputMods ? `Input: ${inputMods}` : "", + outputMods ? `Output: ${outputMods}` : "", + model.features?.length ? `Features: ${model.features.join(", ")}` : "", model.familyName || "", - model.category ? `定位: ${model.category}` : "", + model.category ? `Category: ${model.category}` : "", ].filter(Boolean); return parts.join(" | "); diff --git a/packages/core/src/advisor/recommend.ts b/packages/core/src/advisor/recommend.ts index 863334c..53244d8 100644 --- a/packages/core/src/advisor/recommend.ts +++ b/packages/core/src/advisor/recommend.ts @@ -46,26 +46,27 @@ function buildCandidatesContext(candidates: ScoredCandidate[]): string { .map(({ model: profile }) => { const parts = [ `ID: ${profile.model}`, - `名称: ${profile.name}`, - `描述: ${profile.shortDescription || profile.description}`, - `能力: ${profile.capabilities.join(", ")}`, - `特性: ${profile.features.join(", ")}`, + `Name: ${profile.name}`, + `Description: ${profile.shortDescription || profile.description}`, + `Capabilities: ${profile.capabilities.join(", ")}`, + `Features: ${profile.features.join(", ")}`, ]; - if (profile.contextWindow) parts.push(`上下文窗口: ${profile.contextWindow}`); - if (profile.maxOutputTokens) parts.push(`最大输出: ${profile.maxOutputTokens}`); - if (profile.category) parts.push(`类别: ${profile.category}`); + if (profile.contextWindow) parts.push(`Context Window: ${profile.contextWindow}`); + if (profile.maxOutputTokens) parts.push(`Max Output: ${profile.maxOutputTokens}`); + if (profile.category) parts.push(`Category: ${profile.category}`); const modality = profile.inferenceMetadata; if (modality?.request_modality?.length) - parts.push(`输入模态: ${modality.request_modality.join(", ")}`); + parts.push(`Input Modality: ${modality.request_modality.join(", ")}`); if (modality?.response_modality?.length) - parts.push(`输出模态: ${modality.response_modality.join(", ")}`); + parts.push(`Output Modality: ${modality.response_modality.join(", ")}`); const prices = formatPrices(profile); - if (prices) parts.push(`定价: ${prices}`); + if (prices) parts.push(`Pricing: ${prices}`); const qpm = formatQpm(profile); if (qpm) parts.push(`QPM: ${qpm}`); - if (profile.versionTag) parts.push(`版本: ${profile.versionTag}`); - if (profile.openSource !== undefined) parts.push(`开源: ${profile.openSource ? "是" : "否"}`); - if (profile.family) parts.push(`家族: ${profile.family}`); + if (profile.versionTag) parts.push(`Version: ${profile.versionTag}`); + if (profile.openSource !== undefined) + parts.push(`Open Source: ${profile.openSource ? "Yes" : "No"}`); + if (profile.family) parts.push(`Family: ${profile.family}`); return parts.join(" | "); }) .join("\n"); @@ -86,29 +87,29 @@ function buildIntentContext(intent: IntentProfile): string { modelPreference, } = intent; const parts: string[] = []; - if (taskSummary) parts.push(`场景理解: ${taskSummary}`); - if (scenarioHints.length) parts.push(`场景特征: ${scenarioHints.join(", ")}`); - if (inputModality.length) parts.push(`输入模态: ${inputModality.join(", ")}`); - if (outputModality.length) parts.push(`输出模态: ${outputModality.join(", ")}`); - if (requiredCapabilities.length) parts.push(`所需能力: ${requiredCapabilities.join(", ")}`); - if (requiredFeatures.length) parts.push(`所需特性: ${requiredFeatures.join(", ")}`); - parts.push(`预算倾向: ${budget}`); - parts.push(`质量偏好: ${qualityPreference}`); - if (contextNeed !== ContextNeeds.Standard) parts.push(`上下文需求: ${contextNeed}`); + if (taskSummary) parts.push(`Task: ${taskSummary}`); + if (scenarioHints.length) parts.push(`Scenario: ${scenarioHints.join(", ")}`); + if (inputModality.length) parts.push(`Input: ${inputModality.join(", ")}`); + if (outputModality.length) parts.push(`Output: ${outputModality.join(", ")}`); + if (requiredCapabilities.length) parts.push(`Capabilities: ${requiredCapabilities.join(", ")}`); + if (requiredFeatures.length) parts.push(`Features: ${requiredFeatures.join(", ")}`); + parts.push(`Budget: ${budget}`); + parts.push(`Quality: ${qualityPreference}`); + if (contextNeed !== ContextNeeds.Standard) parts.push(`Context: ${contextNeed}`); if (modelPreference && modelPreference.mode !== "unconstrained") { - parts.push(`模型偏好: ${modelPreference.mode}`); + parts.push(`Mode: ${modelPreference.mode}`); if (modelPreference.targets?.length) - parts.push(`目标模型: ${modelPreference.targets.join(", ")}`); + parts.push(`Targets: ${modelPreference.targets.join(", ")}`); if (modelPreference.excludes?.length) - parts.push(`排除模型: ${modelPreference.excludes.join(", ")}`); + parts.push(`Excludes: ${modelPreference.excludes.join(", ")}`); } if (segments?.length) { - parts.push(`拆解步骤:`); + parts.push(`Pipeline Steps:`); for (const seg of segments) { - const inMod = seg.inputModality.join(",") || "无"; - const outMod = seg.outputModality.join(",") || "无"; - const caps = seg.requiredCapabilities.join(",") || "无"; - parts.push(` - ${seg.step} (输入: ${inMod} → 输出: ${outMod}, 能力: ${caps})`); + const inMod = seg.inputModality.join(",") || "none"; + const outMod = seg.outputModality.join(",") || "none"; + const caps = seg.requiredCapabilities.join(",") || "none"; + parts.push(` - ${seg.step} (Input: ${inMod} → Output: ${outMod}, Capabilities: ${caps})`); } } return parts.join("\n"); @@ -175,7 +176,7 @@ function validatePipelineCompatibility( const compatible = accepts.some((mod) => prevOutputs.has(mod)); if (!compatible && accepts.length > 0) { warnings.push( - `${rec.name} 的输入模态 [${accepts.join(", ")}] 可能不兼容上一步的输出模态 [${[...prevOutputs].join(", ")}]`, + `${rec.name}'s input modalities [${accepts.join(", ")}] may not be compatible with the previous step's output modalities [${[...prevOutputs].join(", ")}]`, ); } } @@ -204,7 +205,7 @@ export async function rankModels( systemPrompt = ALTERNATIVE_SYSTEM_PROMPT; } else if (preferenceMode === "scoped") { const scopeNote = intent.modelPreference?.targets?.length - ? `\n\n## 范围限定\n用户明确要求在以下范围内推荐:${intent.modelPreference.targets.join("、")}。请优先从匹配该范围的模型中选择。` + ? `\n\n## Scope Restriction\nThe user explicitly requested recommendations from: ${intent.modelPreference.targets.join(", ")}. Prioritize models within this scope.` : ""; systemPrompt = (intent.complexity === Complexities.Pipeline @@ -219,8 +220,8 @@ export async function rankModels( const userMessage = intent.complexity === Complexities.Pipeline - ? `意图分析结果:\n${intentContext}\n\n候选模型列表:\n${candidatesContext}\n\n用户原始需求:${userInput}\n\n请为流水线各步骤各推荐最多 ${top} 个模型。` - : `意图分析结果:\n${intentContext}\n\n候选模型列表:\n${candidatesContext}\n\n用户原始需求:${userInput}\n\n请推荐最多 ${top} 个模型。`; + ? `Intent Analysis:\n${intentContext}\n\nCandidate Models:\n${candidatesContext}\n\nUser Request: ${userInput}\n\nRecommend up to ${top} models for each pipeline step. Respond in English only.` + : `Intent Analysis:\n${intentContext}\n\nCandidate Models:\n${candidatesContext}\n\nUser Request: ${userInput}\n\nRecommend up to ${top} models. Respond in English only.`; const body: Record = { model: useThinkingModel ? RANKING_MODEL : RANKING_MODEL_FAST,