Files
AIclinicalresearch/backend/src/modules/ssa/services/SystemPromptService.ts
HaHafeng 85fda830c2 feat(ssa): Complete Phase V-A editable analysis plan variables
Features:
- Add editable variable selection in workflow plan (SingleVarSelect + MultiVarTags)
- Implement 3-layer flexible interception (warning bar + icon + blocking dialog)
- Add tool_param_constraints.json for 12 statistical tools parameter validation
- Add PATCH /workflow/:id/params API with Zod structural validation
- Implement synchronous parameter sync before execution (Promise chaining)
- Fix LLM hallucination by strict system prompt constraints
- Fix DynamicReport object-based rows compatibility (R baseline_table)
- Fix Word export row.map error with same normalization logic
- Restore inferGroupingVar for smart default variable selection
- Add ReactMarkdown rendering in SSAChatPane
- Update SSA module status document to v3.5

Modified files:
- backend: workflow.routes, ChatHandlerService, SystemPromptService, FlowTemplateService
- frontend: WorkflowTimeline, SSAWorkspacePane, DynamicReport, SSAChatPane, ssaStore, ssa.css
- config: tool_param_constraints.json (new)
- docs: SSA status doc, team review reports

Tested: Cohort study end-to-end execution + report export verified
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-24 13:08:29 +08:00

171 lines
7.0 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Phase II — System Prompt 动态组装服务
*
* 六段式组装H2 Lost-in-the-Middle 修正):
* [1] base_system — 固定角色定义
* [2] data_context — DataOverview 摘要
* [3] pico_inference — PICO 分类
* [4] variable_dictionary — 变量字典摘要
* [5] tool_outputs — 工具调用结果(冗长数据放中间)
* [6] intent_instruction — 意图指令(核心指令放最后,永不裁剪)
*
* Token 预算 <= 4000C2超出按 [5] > [4] > [3] > [2] 优先级裁剪。
* [6] intent_instruction 永不裁剪。
*/
import { logger } from '../../../common/logging/index.js';
import { getPromptService } from '../../../common/prompt/index.js';
import { prisma } from '../../../config/database.js';
import { tokenTruncationService, type TruncationOptions } from './TokenTruncationService.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import type { SessionBlackboard } from '../types/session-blackboard.types.js';
export type IntentType = 'chat' | 'explore' | 'consult' | 'analyze' | 'discuss' | 'feedback';
const INTENT_PROMPT_CODES: Record<IntentType, string> = {
chat: 'SSA_INTENT_CHAT',
explore: 'SSA_INTENT_EXPLORE',
consult: 'SSA_INTENT_CONSULT',
analyze: 'SSA_INTENT_ANALYZE',
discuss: 'SSA_INTENT_DISCUSS',
feedback: 'SSA_INTENT_FEEDBACK',
};
const MAX_SYSTEM_TOKENS = 4000;
export class SystemPromptService {
/**
* 组装完整 System Prompt六段式H2 修正顺序)
*/
async assemble(
sessionId: string,
intent: IntentType,
toolOutputs?: string,
): Promise<string> {
const promptService = getPromptService(prisma);
// [1] Base system role
let baseSystem = '';
try {
const rendered = await promptService.get('SSA_BASE_SYSTEM', {});
baseSystem = rendered.content;
} catch {
baseSystem = this.fallbackBaseSystem();
}
// [2-4] DataContext from SessionBlackboard (truncated)
let dataContextBlock = '';
const blackboard = await sessionBlackboardService.get(sessionId);
if (blackboard) {
const truncated = tokenTruncationService.truncate(blackboard, {
maxTokens: this.calculateDataBudget(baseSystem, toolOutputs),
strategy: 'balanced',
});
dataContextBlock = tokenTruncationService.toPromptString(truncated);
}
// [5] Tool outputs (placed in middle — H2 fix)
const toolBlock = toolOutputs
? `\n\n## 工具执行结果\n${toolOutputs}`
: '';
// [6] Intent instruction (placed LAST — H2 fix, never truncated)
let intentInstruction = '';
const intentCode = INTENT_PROMPT_CODES[intent];
try {
const rendered = await promptService.get(intentCode, {});
intentInstruction = rendered.content;
} catch {
intentInstruction = this.fallbackIntentInstruction(intent);
}
// Assemble: [1] Base → [2-4] DataContext → [5] ToolOutputs → [6] IntentInstruction
const parts: string[] = [baseSystem];
if (dataContextBlock) {
parts.push(dataContextBlock);
}
if (toolBlock) {
parts.push(toolBlock);
}
// Intent instruction is ALWAYS last (H2 — Lost in the Middle fix)
parts.push(`\n\n## 当前任务指令\n${intentInstruction}`);
const assembled = parts.join('\n\n');
const estimatedTokens = Math.ceil(assembled.length / 2);
logger.debug('[SSA:SystemPrompt] Assembled', {
sessionId,
intent,
estimatedTokens,
hasData: !!blackboard,
hasToolOutput: !!toolOutputs,
});
if (estimatedTokens > MAX_SYSTEM_TOKENS) {
logger.warn('[SSA:SystemPrompt] Exceeded token budget', {
estimatedTokens,
maxTokens: MAX_SYSTEM_TOKENS,
});
}
return assembled;
}
private calculateDataBudget(baseSystem: string, toolOutputs?: string): number {
const baseTokens = Math.ceil(baseSystem.length / 2);
const toolTokens = toolOutputs ? Math.ceil(toolOutputs.length / 2) : 0;
const intentReserve = 500; // intent instruction reserve
return Math.max(500, MAX_SYSTEM_TOKENS - baseTokens - toolTokens - intentReserve);
}
private fallbackBaseSystem(): string {
return `你是 SSA-Pro 智能统计分析助手。你的职责是**规划、解释和沟通**,而非计算。
## 你的身份与职能边界
你是「分析规划者」和「结果解读者」,不是「计算引擎」。
系统后端有独立的 R 统计计算引擎,所有统计计算均由 R 引擎完成。
### 你可以做的:
- 理解用户的分析需求,识别意图
- 推荐合适的统计方法,解释选择理由
- 制定分析方案(选择工具、参数)
- 解读 R 引擎返回的真实结果
- 用通俗语言向研究者解释统计概念
### 绝对禁止:
- **禁止编造或生成任何数值结果**P值、均值、标准差、置信区间、检验统计量等
- **禁止模拟或假设分析结果**(即使用户催促,也不能捏造数据)
- **禁止生成结果表格**(除非表格数据来自 R 引擎的真实输出)
- 如果还没有 R 引擎的执行结果,只能说"正在等待执行"或"方案已确认,即将启动分析"
### 关键原则:
没有 R 引擎的真实输出 → 不回答任何具体数值。
这是铁律,违反将导致临床研究的严重错误。
沟通原则:
- 使用中文回复
- 语言专业但不晦涩
- 分点作答,条理清晰
- 对不确定的内容如实说明`;
}
private fallbackIntentInstruction(intent: IntentType): string {
const map: Record<IntentType, string> = {
chat: '请基于统计知识和用户数据直接回答用户的问题。不要主动建议执行分析,除非用户明确要求。简洁作答,分点清晰。禁止编造任何数值。',
explore: '用户想了解数据的特征。请基于上方的数据摘要信息,帮用户解读数据特征(缺失、分布、异常值等)。可以推断 PICO 结构。不要执行分析,不要编造统计数值。',
consult: '用户在咨询统计方法。请根据数据特征和研究目的推荐合适的统计方法,给出选择理由和前提条件。不要直接执行分析。提供替代方案。禁止给出任何假设的分析结果数值。',
analyze: '你正在协助用户进行分析规划。你的职责限于解释分析方案的思路和方法选择理由。禁止生成任何P值、统计量、均值、分析结果表格。所有数值结果只能来自 R 引擎的真实执行输出。如果 R 引擎还没有返回结果,只能说明方案状态,不能自行填充结果。',
discuss: '用户想讨论分析结果。请仅基于 R 引擎返回的真实数据帮助用户解读,解释统计量的含义,讨论临床意义和局限性。禁止补充或编造 R 引擎未返回的数值。',
feedback: '用户对之前的分析结果不满意或有改进建议。请分析问题原因,提出改进方案(如更换统计方法、调整参数等)。禁止编造数值来论证改进效果。',
};
return map[intent];
}
}
export const systemPromptService = new SystemPromptService();