Features: - Add editable variable selection in workflow plan (SingleVarSelect + MultiVarTags) - Implement 3-layer flexible interception (warning bar + icon + blocking dialog) - Add tool_param_constraints.json for 12 statistical tools parameter validation - Add PATCH /workflow/:id/params API with Zod structural validation - Implement synchronous parameter sync before execution (Promise chaining) - Fix LLM hallucination by strict system prompt constraints - Fix DynamicReport object-based rows compatibility (R baseline_table) - Fix Word export row.map error with same normalization logic - Restore inferGroupingVar for smart default variable selection - Add ReactMarkdown rendering in SSAChatPane - Update SSA module status document to v3.5 Modified files: - backend: workflow.routes, ChatHandlerService, SystemPromptService, FlowTemplateService - frontend: WorkflowTimeline, SSAWorkspacePane, DynamicReport, SSAChatPane, ssaStore, ssa.css - config: tool_param_constraints.json (new) - docs: SSA status doc, team review reports Tested: Cohort study end-to-end execution + report export verified Co-authored-by: Cursor <cursoragent@cursor.com>
171 lines
7.0 KiB
TypeScript
171 lines
7.0 KiB
TypeScript
/**
|
||
* Phase II — System Prompt 动态组装服务
|
||
*
|
||
* 六段式组装(H2 Lost-in-the-Middle 修正):
|
||
* [1] base_system — 固定角色定义
|
||
* [2] data_context — DataOverview 摘要
|
||
* [3] pico_inference — PICO 分类
|
||
* [4] variable_dictionary — 变量字典摘要
|
||
* [5] tool_outputs — 工具调用结果(冗长数据放中间)
|
||
* [6] intent_instruction — 意图指令(核心指令放最后,永不裁剪)
|
||
*
|
||
* Token 预算 <= 4000(C2),超出按 [5] > [4] > [3] > [2] 优先级裁剪。
|
||
* [6] intent_instruction 永不裁剪。
|
||
*/
|
||
|
||
import { logger } from '../../../common/logging/index.js';
|
||
import { getPromptService } from '../../../common/prompt/index.js';
|
||
import { prisma } from '../../../config/database.js';
|
||
import { tokenTruncationService, type TruncationOptions } from './TokenTruncationService.js';
|
||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||
import type { SessionBlackboard } from '../types/session-blackboard.types.js';
|
||
|
||
export type IntentType = 'chat' | 'explore' | 'consult' | 'analyze' | 'discuss' | 'feedback';
|
||
|
||
const INTENT_PROMPT_CODES: Record<IntentType, string> = {
|
||
chat: 'SSA_INTENT_CHAT',
|
||
explore: 'SSA_INTENT_EXPLORE',
|
||
consult: 'SSA_INTENT_CONSULT',
|
||
analyze: 'SSA_INTENT_ANALYZE',
|
||
discuss: 'SSA_INTENT_DISCUSS',
|
||
feedback: 'SSA_INTENT_FEEDBACK',
|
||
};
|
||
|
||
const MAX_SYSTEM_TOKENS = 4000;
|
||
|
||
export class SystemPromptService {
|
||
|
||
/**
|
||
* 组装完整 System Prompt(六段式,H2 修正顺序)
|
||
*/
|
||
async assemble(
|
||
sessionId: string,
|
||
intent: IntentType,
|
||
toolOutputs?: string,
|
||
): Promise<string> {
|
||
const promptService = getPromptService(prisma);
|
||
|
||
// [1] Base system role
|
||
let baseSystem = '';
|
||
try {
|
||
const rendered = await promptService.get('SSA_BASE_SYSTEM', {});
|
||
baseSystem = rendered.content;
|
||
} catch {
|
||
baseSystem = this.fallbackBaseSystem();
|
||
}
|
||
|
||
// [2-4] DataContext from SessionBlackboard (truncated)
|
||
let dataContextBlock = '';
|
||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||
if (blackboard) {
|
||
const truncated = tokenTruncationService.truncate(blackboard, {
|
||
maxTokens: this.calculateDataBudget(baseSystem, toolOutputs),
|
||
strategy: 'balanced',
|
||
});
|
||
dataContextBlock = tokenTruncationService.toPromptString(truncated);
|
||
}
|
||
|
||
// [5] Tool outputs (placed in middle — H2 fix)
|
||
const toolBlock = toolOutputs
|
||
? `\n\n## 工具执行结果\n${toolOutputs}`
|
||
: '';
|
||
|
||
// [6] Intent instruction (placed LAST — H2 fix, never truncated)
|
||
let intentInstruction = '';
|
||
const intentCode = INTENT_PROMPT_CODES[intent];
|
||
try {
|
||
const rendered = await promptService.get(intentCode, {});
|
||
intentInstruction = rendered.content;
|
||
} catch {
|
||
intentInstruction = this.fallbackIntentInstruction(intent);
|
||
}
|
||
|
||
// Assemble: [1] Base → [2-4] DataContext → [5] ToolOutputs → [6] IntentInstruction
|
||
const parts: string[] = [baseSystem];
|
||
|
||
if (dataContextBlock) {
|
||
parts.push(dataContextBlock);
|
||
}
|
||
|
||
if (toolBlock) {
|
||
parts.push(toolBlock);
|
||
}
|
||
|
||
// Intent instruction is ALWAYS last (H2 — Lost in the Middle fix)
|
||
parts.push(`\n\n## 当前任务指令\n${intentInstruction}`);
|
||
|
||
const assembled = parts.join('\n\n');
|
||
|
||
const estimatedTokens = Math.ceil(assembled.length / 2);
|
||
logger.debug('[SSA:SystemPrompt] Assembled', {
|
||
sessionId,
|
||
intent,
|
||
estimatedTokens,
|
||
hasData: !!blackboard,
|
||
hasToolOutput: !!toolOutputs,
|
||
});
|
||
|
||
if (estimatedTokens > MAX_SYSTEM_TOKENS) {
|
||
logger.warn('[SSA:SystemPrompt] Exceeded token budget', {
|
||
estimatedTokens,
|
||
maxTokens: MAX_SYSTEM_TOKENS,
|
||
});
|
||
}
|
||
|
||
return assembled;
|
||
}
|
||
|
||
private calculateDataBudget(baseSystem: string, toolOutputs?: string): number {
|
||
const baseTokens = Math.ceil(baseSystem.length / 2);
|
||
const toolTokens = toolOutputs ? Math.ceil(toolOutputs.length / 2) : 0;
|
||
const intentReserve = 500; // intent instruction reserve
|
||
return Math.max(500, MAX_SYSTEM_TOKENS - baseTokens - toolTokens - intentReserve);
|
||
}
|
||
|
||
private fallbackBaseSystem(): string {
|
||
return `你是 SSA-Pro 智能统计分析助手。你的职责是**规划、解释和沟通**,而非计算。
|
||
|
||
## 你的身份与职能边界
|
||
|
||
你是「分析规划者」和「结果解读者」,不是「计算引擎」。
|
||
系统后端有独立的 R 统计计算引擎,所有统计计算均由 R 引擎完成。
|
||
|
||
### 你可以做的:
|
||
- 理解用户的分析需求,识别意图
|
||
- 推荐合适的统计方法,解释选择理由
|
||
- 制定分析方案(选择工具、参数)
|
||
- 解读 R 引擎返回的真实结果
|
||
- 用通俗语言向研究者解释统计概念
|
||
|
||
### 绝对禁止:
|
||
- **禁止编造或生成任何数值结果**(P值、均值、标准差、置信区间、检验统计量等)
|
||
- **禁止模拟或假设分析结果**(即使用户催促,也不能捏造数据)
|
||
- **禁止生成结果表格**(除非表格数据来自 R 引擎的真实输出)
|
||
- 如果还没有 R 引擎的执行结果,只能说"正在等待执行"或"方案已确认,即将启动分析"
|
||
|
||
### 关键原则:
|
||
没有 R 引擎的真实输出 → 不回答任何具体数值。
|
||
这是铁律,违反将导致临床研究的严重错误。
|
||
|
||
沟通原则:
|
||
- 使用中文回复
|
||
- 语言专业但不晦涩
|
||
- 分点作答,条理清晰
|
||
- 对不确定的内容如实说明`;
|
||
}
|
||
|
||
private fallbackIntentInstruction(intent: IntentType): string {
|
||
const map: Record<IntentType, string> = {
|
||
chat: '请基于统计知识和用户数据直接回答用户的问题。不要主动建议执行分析,除非用户明确要求。简洁作答,分点清晰。禁止编造任何数值。',
|
||
explore: '用户想了解数据的特征。请基于上方的数据摘要信息,帮用户解读数据特征(缺失、分布、异常值等)。可以推断 PICO 结构。不要执行分析,不要编造统计数值。',
|
||
consult: '用户在咨询统计方法。请根据数据特征和研究目的推荐合适的统计方法,给出选择理由和前提条件。不要直接执行分析。提供替代方案。禁止给出任何假设的分析结果数值。',
|
||
analyze: '你正在协助用户进行分析规划。你的职责限于:解释分析方案的思路和方法选择理由。禁止生成任何P值、统计量、均值、分析结果表格。所有数值结果只能来自 R 引擎的真实执行输出。如果 R 引擎还没有返回结果,只能说明方案状态,不能自行填充结果。',
|
||
discuss: '用户想讨论分析结果。请仅基于 R 引擎返回的真实数据帮助用户解读,解释统计量的含义,讨论临床意义和局限性。禁止补充或编造 R 引擎未返回的数值。',
|
||
feedback: '用户对之前的分析结果不满意或有改进建议。请分析问题原因,提出改进方案(如更换统计方法、调整参数等)。禁止编造数值来论证改进效果。',
|
||
};
|
||
return map[intent];
|
||
}
|
||
}
|
||
|
||
export const systemPromptService = new SystemPromptService();
|