feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development
Phase I - Session Blackboard + READ Layer: - SessionBlackboardService with Postgres-Only cache - DataProfileService for data overview generation - PicoInferenceService for LLM-driven PICO extraction - Frontend DataContextCard and VariableDictionaryPanel - E2E tests: 31/31 passed Phase II - Conversation Layer LLM + Intent Router: - ConversationService with SSE streaming - IntentRouterService (rule-first + LLM fallback, 6 intents) - SystemPromptService with 6-segment dynamic assembly - TokenTruncationService for context management - ChatHandlerService as unified chat entry - Frontend SSAChatPane and useSSAChat hook - E2E tests: 38/38 passed Phase III - Method Consultation + AskUser Standardization: - ToolRegistryService with Repository Pattern - MethodConsultService with DecisionTable + LLM enhancement - AskUserService with global interrupt handling - Frontend AskUserCard component - E2E tests: 13/13 passed Phase IV - Dialogue-Driven Analysis + QPER Integration: - ToolOrchestratorService (plan/execute/report) - analysis_plan SSE event for WorkflowPlan transmission - Dual-channel confirmation (ask_user card + workspace button) - PICO as optional hint for LLM parsing - E2E tests: 25/25 passed R Statistics Service: - 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon - Enhanced guardrails and block helpers - Comprehensive test suite (run_all_tools_test.js) Documentation: - Updated system status document (v5.9) - Updated SSA module status and development plan (v1.8) Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25) Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -19,8 +19,8 @@
|
||||
"predictorType": "binary",
|
||||
"design": "paired",
|
||||
"primaryTool": "ST_T_TEST_PAIRED",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"fallbackTool": "ST_WILCOXON",
|
||||
"switchCondition": "normality_fail: 差值 Shapiro-Wilk P<0.05 时切换 Wilcoxon 符号秩检验",
|
||||
"templateId": "paired_analysis",
|
||||
"priority": 10,
|
||||
"description": "配对设计前后对比"
|
||||
@@ -31,12 +31,12 @@
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "categorical",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_T_TEST_IND",
|
||||
"fallbackTool": "ST_MANN_WHITNEY",
|
||||
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
|
||||
"primaryTool": "ST_ANOVA_ONE",
|
||||
"fallbackTool": "ST_ANOVA_ONE",
|
||||
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05 时内部自动切换 Kruskal-Wallis",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 5,
|
||||
"description": "多组连续变量比较(暂用 T 检验处理两组场景,ANOVA 待扩展)"
|
||||
"description": "多组连续变量比较(ANOVA / Kruskal-Wallis)"
|
||||
},
|
||||
{
|
||||
"id": "DIFF_CAT_CAT_IND",
|
||||
@@ -45,12 +45,25 @@
|
||||
"predictorType": "categorical",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_CHI_SQUARE",
|
||||
"fallbackTool": "ST_CHI_SQUARE",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
|
||||
"fallbackTool": "ST_FISHER",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 10,
|
||||
"description": "两个分类变量的独立性检验"
|
||||
},
|
||||
{
|
||||
"id": "DIFF_CAT_CAT_SMALL",
|
||||
"goal": "comparison",
|
||||
"outcomeType": "categorical",
|
||||
"predictorType": "binary",
|
||||
"design": "independent",
|
||||
"primaryTool": "ST_FISHER",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 8,
|
||||
"description": "小样本分类变量独立性检验(Fisher 精确检验)"
|
||||
},
|
||||
{
|
||||
"id": "ASSOC_CONT_CONT",
|
||||
"goal": "correlation",
|
||||
@@ -71,8 +84,8 @@
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_CHI_SQUARE",
|
||||
"fallbackTool": "ST_CHI_SQUARE",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
|
||||
"fallbackTool": "ST_FISHER",
|
||||
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
|
||||
"templateId": "standard_analysis",
|
||||
"priority": 5,
|
||||
"description": "分类变量关联分析"
|
||||
@@ -96,12 +109,12 @@
|
||||
"outcomeType": "continuous",
|
||||
"predictorType": "*",
|
||||
"design": "*",
|
||||
"primaryTool": "ST_CORRELATION",
|
||||
"primaryTool": "ST_LINEAR_REG",
|
||||
"fallbackTool": null,
|
||||
"switchCondition": null,
|
||||
"templateId": "regression_analysis",
|
||||
"priority": 5,
|
||||
"description": "连续结局的回归分析(线性回归待扩展,暂用相关分析)"
|
||||
"description": "连续结局的多因素线性回归分析"
|
||||
},
|
||||
{
|
||||
"id": "DESC_ANY",
|
||||
|
||||
@@ -45,16 +45,16 @@
|
||||
{
|
||||
"order": 1,
|
||||
"role": "baseline_table",
|
||||
"tool": "ST_DESCRIPTIVE",
|
||||
"tool": "ST_BASELINE_TABLE",
|
||||
"name": "表1: 组间基线特征比较",
|
||||
"paramsMapping": { "group_var": "{{grouping_var}}", "variables": "{{all_predictors}}" }
|
||||
"paramsMapping": { "group_var": "{{grouping_var}}", "analyze_vars": "{{all_predictors}}" }
|
||||
},
|
||||
{
|
||||
"order": 2,
|
||||
"role": "univariate_screen",
|
||||
"tool": "ST_DESCRIPTIVE",
|
||||
"tool": "ST_BASELINE_TABLE",
|
||||
"name": "表2: 结局指标单因素分析",
|
||||
"paramsMapping": { "group_var": "{{outcome_var}}", "variables": "{{all_predictors}}" }
|
||||
"paramsMapping": { "group_var": "{{outcome_var}}", "analyze_vars": "{{all_predictors}}" }
|
||||
},
|
||||
{
|
||||
"order": 3,
|
||||
|
||||
58
backend/src/modules/ssa/config/intent_rules.json
Normal file
58
backend/src/modules/ssa/config/intent_rules.json
Normal file
@@ -0,0 +1,58 @@
|
||||
{
|
||||
"rules": [
|
||||
{
|
||||
"intent": "analyze",
|
||||
"keywords": ["分析", "检验", "t检验", "卡方", "回归", "比较一下", "跑一下", "执行分析", "做个分析", "方差分析", "ANOVA", "相关分析", "logistic", "生存分析", "Cox", "基线表"],
|
||||
"excludeKeywords": ["什么方法", "用什么", "应该怎么", "推荐"],
|
||||
"requires": ["dataOverview"],
|
||||
"priority": 10
|
||||
},
|
||||
{
|
||||
"intent": "discuss",
|
||||
"keywords": ["什么意思", "说明什么", "怎么解释", "p值", "置信区间", "结果说明", "为什么显著", "为什么不显著", "临床意义", "效应量"],
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"priority": 9
|
||||
},
|
||||
{
|
||||
"intent": "feedback",
|
||||
"keywords": ["结果不对", "不太对", "换个方法", "重新分析", "有问题", "不满意", "重做"],
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"priority": 9
|
||||
},
|
||||
{
|
||||
"intent": "explore",
|
||||
"keywords": ["看看", "分布", "缺失", "概况", "有哪些变量", "数据特征", "异常值", "样本量", "描述一下数据", "多少例", "变量类型"],
|
||||
"requires": ["dataOverview"],
|
||||
"priority": 8
|
||||
},
|
||||
{
|
||||
"intent": "consult",
|
||||
"keywords": ["什么方法", "用什么", "应该怎么分析", "推荐方法", "分析方案", "哪种检验", "怎么选", "前提条件"],
|
||||
"requires": ["dataOverview"],
|
||||
"priority": 7
|
||||
}
|
||||
],
|
||||
"contextGuards": {
|
||||
"explore": {
|
||||
"requires": ["dataOverview"],
|
||||
"fallbackMessage": "您还没有上传数据。请先上传 CSV 或 Excel 文件,我就能帮您探索数据了。您也可以先问我统计方法相关的问题。"
|
||||
},
|
||||
"analyze": {
|
||||
"requires": ["dataOverview"],
|
||||
"fallbackMessage": "您还没有上传数据。请先上传数据文件,我才能帮您执行统计分析。"
|
||||
},
|
||||
"consult": {
|
||||
"requires": ["dataOverview"],
|
||||
"fallbackMessage": "如果您上传了数据,我可以根据数据特征给出更精准的方法推荐。不过您也可以直接描述研究设计,我来给出一般性建议。"
|
||||
},
|
||||
"discuss": {
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"fallbackMessage": "目前还没有分析结果可以讨论。请先执行一次统计分析,然后我们就可以深入讨论结果了。"
|
||||
},
|
||||
"feedback": {
|
||||
"requires": ["dataOverview", "hasAnalysisResults"],
|
||||
"fallbackMessage": "目前还没有分析结果可以改进。请先执行一次统计分析,如果对结果不满意,我来帮您调整。"
|
||||
}
|
||||
},
|
||||
"defaultIntent": "chat"
|
||||
}
|
||||
@@ -82,6 +82,65 @@
|
||||
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
|
||||
],
|
||||
"outputType": "regression"
|
||||
},
|
||||
{
|
||||
"code": "ST_FISHER",
|
||||
"name": "Fisher精确检验",
|
||||
"category": "categorical",
|
||||
"description": "小样本或稀疏列联表的精确独立性检验(卡方检验的替代方法)",
|
||||
"inputParams": [
|
||||
{ "name": "var1", "type": "string", "required": true, "description": "分类变量1" },
|
||||
{ "name": "var2", "type": "string", "required": true, "description": "分类变量2" }
|
||||
],
|
||||
"outputType": "association"
|
||||
},
|
||||
{
|
||||
"code": "ST_ANOVA_ONE",
|
||||
"name": "单因素方差分析",
|
||||
"category": "parametric",
|
||||
"description": "三组及以上独立样本的均值差异比较(含事后多重比较)",
|
||||
"inputParams": [
|
||||
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量(3+水平)" },
|
||||
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
|
||||
],
|
||||
"outputType": "comparison",
|
||||
"prerequisite": "正态分布 + 方差齐性",
|
||||
"fallback": "Kruskal-Wallis"
|
||||
},
|
||||
{
|
||||
"code": "ST_WILCOXON",
|
||||
"name": "Wilcoxon符号秩检验",
|
||||
"category": "nonparametric",
|
||||
"description": "配对样本的非参数检验(配对T检验的替代方法)",
|
||||
"inputParams": [
|
||||
{ "name": "before_var", "type": "string", "required": true, "description": "前测变量" },
|
||||
{ "name": "after_var", "type": "string", "required": true, "description": "后测变量" }
|
||||
],
|
||||
"outputType": "comparison"
|
||||
},
|
||||
{
|
||||
"code": "ST_LINEAR_REG",
|
||||
"name": "线性回归",
|
||||
"category": "regression",
|
||||
"description": "连续型结局变量的多因素线性回归分析",
|
||||
"inputParams": [
|
||||
{ "name": "outcome_var", "type": "string", "required": true, "description": "连续型结局变量" },
|
||||
{ "name": "predictors", "type": "string[]", "required": true, "description": "预测变量列表" },
|
||||
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
|
||||
],
|
||||
"outputType": "regression"
|
||||
},
|
||||
{
|
||||
"code": "ST_BASELINE_TABLE",
|
||||
"name": "基线特征表",
|
||||
"category": "composite",
|
||||
"description": "基于 gtsummary 的一键式基线特征表生成,自动判断变量类型、选方法、合并出表(复合工具)",
|
||||
"inputParams": [
|
||||
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量" },
|
||||
{ "name": "analyze_vars", "type": "string[]", "required": false, "description": "分析变量列表(不传则自动选取全部)" }
|
||||
],
|
||||
"outputType": "baseline_table",
|
||||
"composite": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@ import analysisRoutes from './routes/analysis.routes.js';
|
||||
import consultRoutes from './routes/consult.routes.js';
|
||||
import configRoutes from './routes/config.routes.js';
|
||||
import workflowRoutes from './routes/workflow.routes.js';
|
||||
import blackboardRoutes from './routes/blackboard.routes.js';
|
||||
import chatRoutes from './routes/chat.routes.js';
|
||||
|
||||
export async function ssaRoutes(app: FastifyInstance) {
|
||||
// 注册认证中间件(遵循模块认证规范)
|
||||
@@ -26,6 +28,10 @@ export async function ssaRoutes(app: FastifyInstance) {
|
||||
app.register(configRoutes, { prefix: '/config' });
|
||||
// Phase 2A: 多步骤工作流
|
||||
app.register(workflowRoutes, { prefix: '/workflow' });
|
||||
// Phase I: Session 黑板 + READ 层
|
||||
app.register(blackboardRoutes, { prefix: '/sessions/:sessionId/blackboard' });
|
||||
// Phase II: 统一对话入口
|
||||
app.register(chatRoutes, { prefix: '/sessions' });
|
||||
}
|
||||
|
||||
export default ssaRoutes;
|
||||
|
||||
102
backend/src/modules/ssa/routes/blackboard.routes.ts
Normal file
102
backend/src/modules/ssa/routes/blackboard.routes.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
/**
|
||||
* Phase I — Session Blackboard + READ Layer 路由
|
||||
*
|
||||
* 路由前缀: /sessions/:sessionId/blackboard
|
||||
*
|
||||
* GET / — 获取完整 SessionBlackboard
|
||||
* POST /data-overview — 触发 get_data_overview 工具
|
||||
* POST /variable-detail — 触发 get_variable_detail 工具
|
||||
* PATCH /variables/:name — 用户确认/修改变量类型
|
||||
*/
|
||||
|
||||
import { FastifyInstance, FastifyRequest } from 'fastify';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { sessionBlackboardService } from '../services/SessionBlackboardService.js';
|
||||
import { executeGetDataOverview } from '../services/tools/GetDataOverviewTool.js';
|
||||
import { executeGetVariableDetail } from '../services/tools/GetVariableDetailTool.js';
|
||||
import type { ColumnType, PicoRole, VariableDictPatch } from '../types/session-blackboard.types.js';
|
||||
|
||||
export default async function blackboardRoutes(app: FastifyInstance) {
|
||||
|
||||
// GET /sessions/:sessionId/blackboard — 获取完整黑板
|
||||
app.get('/', async (req, reply) => {
|
||||
const { sessionId } = req.params as { sessionId: string };
|
||||
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
if (!blackboard) {
|
||||
return reply.status(404).send({ error: 'Blackboard not found for this session' });
|
||||
}
|
||||
|
||||
const report = blackboard.dataOverview
|
||||
? sessionBlackboardService.generateFiveSectionReport(
|
||||
blackboard.dataOverview,
|
||||
blackboard.variableDictionary,
|
||||
)
|
||||
: null;
|
||||
|
||||
return reply.send({ blackboard, report });
|
||||
});
|
||||
|
||||
// POST /sessions/:sessionId/blackboard/data-overview — 执行 get_data_overview
|
||||
app.post('/data-overview', async (req, reply) => {
|
||||
const { sessionId } = req.params as { sessionId: string };
|
||||
|
||||
logger.info('[SSA:Route] Triggering data overview', { sessionId });
|
||||
|
||||
const result = await executeGetDataOverview(sessionId);
|
||||
|
||||
if (!result.success) {
|
||||
return reply.status(400).send({ error: result.error });
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
report: result.report,
|
||||
});
|
||||
});
|
||||
|
||||
// POST /sessions/:sessionId/blackboard/variable-detail — 执行 get_variable_detail
|
||||
app.post('/variable-detail', async (req, reply) => {
|
||||
const { sessionId } = req.params as { sessionId: string };
|
||||
const { variableName, confirmedType, label } = req.body as {
|
||||
variableName: string;
|
||||
confirmedType?: ColumnType;
|
||||
label?: string;
|
||||
};
|
||||
|
||||
if (!variableName) {
|
||||
return reply.status(400).send({ error: 'variableName is required' });
|
||||
}
|
||||
|
||||
logger.info('[SSA:Route] Triggering variable detail', { sessionId, variableName });
|
||||
|
||||
const result = await executeGetVariableDetail(sessionId, variableName, confirmedType, label);
|
||||
|
||||
if (!result.success) {
|
||||
return reply.status(400).send({ error: result.error });
|
||||
}
|
||||
|
||||
return reply.send(result);
|
||||
});
|
||||
|
||||
// PATCH /sessions/:sessionId/blackboard/variables/:name — 更新变量字典条目
|
||||
app.patch('/variables/:name', async (req, reply) => {
|
||||
const { sessionId, name } = req.params as { sessionId: string; name: string };
|
||||
const body = req.body as {
|
||||
confirmedType?: ColumnType;
|
||||
label?: string;
|
||||
picoRole?: PicoRole | null;
|
||||
};
|
||||
|
||||
logger.info('[SSA:Route] Updating variable dictionary entry', { sessionId, name, body });
|
||||
|
||||
const dictPatch: VariableDictPatch = {};
|
||||
if (body.confirmedType !== undefined) dictPatch.confirmedType = body.confirmedType;
|
||||
if (body.label !== undefined) dictPatch.label = body.label;
|
||||
if (body.picoRole !== undefined) dictPatch.picoRole = body.picoRole;
|
||||
|
||||
await sessionBlackboardService.updateVariable(sessionId, name, dictPatch);
|
||||
|
||||
return reply.send({ success: true });
|
||||
});
|
||||
}
|
||||
217
backend/src/modules/ssa/routes/chat.routes.ts
Normal file
217
backend/src/modules/ssa/routes/chat.routes.ts
Normal file
@@ -0,0 +1,217 @@
|
||||
/**
|
||||
* Phase II — 统一对话 API 路由
|
||||
*
|
||||
* POST /sessions/:id/chat — 统一对话入口(SSE 流式)
|
||||
* GET /sessions/:id/chat/history — 获取对话历史
|
||||
* GET /sessions/:id/chat/conversation — 获取 conversation 元信息
|
||||
*
|
||||
* SSE 格式:OpenAI Compatible(与前端 useAIStream 兼容)
|
||||
* 心跳:5 秒(H1)
|
||||
* 竞态保护:placeholder 占位(H3)
|
||||
*/
|
||||
|
||||
import { FastifyInstance, FastifyRequest } from 'fastify';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { conversationService } from '../services/ConversationService.js';
|
||||
import { intentRouterService } from '../services/IntentRouterService.js';
|
||||
import { chatHandlerService } from '../services/ChatHandlerService.js';
|
||||
import { askUserService } from '../services/AskUserService.js';
|
||||
|
||||
function getUserId(request: FastifyRequest): string {
|
||||
const userId = (request as any).user?.userId;
|
||||
if (!userId) throw new Error('User not authenticated');
|
||||
return userId;
|
||||
}
|
||||
|
||||
export default async function chatRoutes(app: FastifyInstance) {
|
||||
|
||||
/**
|
||||
* POST /sessions/:id/chat
|
||||
* 统一对话入口 — SSE 流式响应
|
||||
*/
|
||||
app.post('/:id/chat', async (req, reply) => {
|
||||
const { id: sessionId } = req.params as { id: string };
|
||||
const userId = getUserId(req);
|
||||
const { content, enableDeepThinking, metadata } = req.body as {
|
||||
content: string;
|
||||
enableDeepThinking?: boolean;
|
||||
metadata?: Record<string, any>;
|
||||
};
|
||||
|
||||
if (!content?.trim()) {
|
||||
return reply.status(400).send({ error: '消息内容不能为空' });
|
||||
}
|
||||
|
||||
// SSE 响应头
|
||||
reply.raw.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
'X-Accel-Buffering': 'no',
|
||||
});
|
||||
|
||||
const writer = {
|
||||
write: (data: string) => {
|
||||
try {
|
||||
return reply.raw.write(data);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
end: () => {
|
||||
try { reply.raw.end(); } catch { /* ignore */ }
|
||||
},
|
||||
on: (event: string, handler: () => void) => {
|
||||
req.raw.on(event, handler);
|
||||
},
|
||||
};
|
||||
|
||||
try {
|
||||
// 1. 获取或创建 Conversation(延迟创建)
|
||||
const conversationId = await conversationService.getOrCreateConversation(sessionId, userId);
|
||||
|
||||
// 2. 保存用户消息
|
||||
await conversationService.saveUserMessage(conversationId, content.trim());
|
||||
|
||||
// ── H1 全局打断判定 ──
|
||||
const pending = await askUserService.getPending(sessionId);
|
||||
if (pending) {
|
||||
const askUserResponse = metadata?.askUserResponse
|
||||
? askUserService.parseResponse(metadata)
|
||||
: null;
|
||||
|
||||
if (askUserResponse) {
|
||||
// 正常回答问题(含 skip)
|
||||
const placeholderMsgId = await conversationService.createAssistantPlaceholder(
|
||||
conversationId, 'chat',
|
||||
);
|
||||
|
||||
const metaEvent = JSON.stringify({
|
||||
type: 'intent_classified',
|
||||
intent: 'chat',
|
||||
confidence: 1,
|
||||
source: 'ask_user_response',
|
||||
guardTriggered: false,
|
||||
});
|
||||
writer.write(`data: ${metaEvent}\n\n`);
|
||||
|
||||
const result = await chatHandlerService.handleAskUserResponse(
|
||||
sessionId, conversationId, askUserResponse, writer, placeholderMsgId,
|
||||
);
|
||||
|
||||
logger.info('[SSA:Chat] AskUser response handled', {
|
||||
sessionId, action: askUserResponse.action, success: result.success,
|
||||
});
|
||||
|
||||
writer.end();
|
||||
return;
|
||||
} else {
|
||||
// 用户无视卡片,强行打字转移话题
|
||||
await askUserService.clearPending(sessionId);
|
||||
logger.info('[SSA:Chat] 用户转移话题,已取消挂起的 ask_user 状态', { sessionId });
|
||||
}
|
||||
}
|
||||
// ── H1 结束 ──
|
||||
|
||||
// 3. 意图分类
|
||||
const intentResult = await intentRouterService.classify(content.trim(), sessionId);
|
||||
|
||||
// 发送意图元数据事件(前端可用于 UI 切换)
|
||||
const metaEvent = JSON.stringify({
|
||||
type: 'intent_classified',
|
||||
intent: intentResult.intent,
|
||||
confidence: intentResult.confidence,
|
||||
source: intentResult.source,
|
||||
guardTriggered: intentResult.guardTriggered || false,
|
||||
guardMessage: intentResult.guardMessage,
|
||||
});
|
||||
writer.write(`data: ${metaEvent}\n\n`);
|
||||
|
||||
// 4. 创建 assistant placeholder(H3 竞态保护)
|
||||
const placeholderMsgId = await conversationService.createAssistantPlaceholder(
|
||||
conversationId, intentResult.intent,
|
||||
);
|
||||
|
||||
// 5. 分发到意图处理器
|
||||
const result = await chatHandlerService.handle(
|
||||
sessionId, conversationId, content.trim(),
|
||||
intentResult, writer, placeholderMsgId,
|
||||
);
|
||||
|
||||
logger.info('[SSA:Chat] Request completed', {
|
||||
sessionId,
|
||||
intent: result.intent,
|
||||
success: result.success,
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:Chat] Unhandled error', {
|
||||
sessionId,
|
||||
error: error.message,
|
||||
});
|
||||
|
||||
const errorEvent = JSON.stringify({
|
||||
type: 'error',
|
||||
code: 'CHAT_ERROR',
|
||||
message: error.message || '处理消息时发生错误',
|
||||
});
|
||||
try {
|
||||
writer.write(`data: ${errorEvent}\n\n`);
|
||||
} catch { /* ignore */ }
|
||||
} finally {
|
||||
writer.end();
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /sessions/:id/chat/history
|
||||
* 获取对话历史消息
|
||||
*/
|
||||
app.get('/:id/chat/history', async (req, reply) => {
|
||||
const { id: sessionId } = req.params as { id: string };
|
||||
|
||||
const conversation = await conversationService.getConversationBySession(sessionId);
|
||||
if (!conversation) {
|
||||
return reply.send({ messages: [], conversationId: null });
|
||||
}
|
||||
|
||||
const messages = await conversationService.getMessages(conversation.id);
|
||||
|
||||
return reply.send({
|
||||
conversationId: conversation.id,
|
||||
messages: messages.map(m => ({
|
||||
id: m.id,
|
||||
role: m.role,
|
||||
content: m.content,
|
||||
thinkingContent: m.thinkingContent,
|
||||
intent: (m.metadata as any)?.intent,
|
||||
status: (m.metadata as any)?.status,
|
||||
createdAt: m.createdAt,
|
||||
})),
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /sessions/:id/chat/conversation
|
||||
* 获取 conversation 元信息
|
||||
*/
|
||||
app.get('/:id/chat/conversation', async (req, reply) => {
|
||||
const { id: sessionId } = req.params as { id: string };
|
||||
|
||||
const conversation = await conversationService.getConversationBySession(sessionId);
|
||||
if (!conversation) {
|
||||
return reply.send({ conversation: null });
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
conversation: {
|
||||
id: conversation.id,
|
||||
title: conversation.title,
|
||||
messageCount: conversation.messageCount,
|
||||
createdAt: conversation.createdAt,
|
||||
updatedAt: conversation.updatedAt,
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -13,6 +13,9 @@ import { prisma } from '../../../config/database.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { storage } from '../../../common/storage/index.js';
|
||||
import { DataParserService } from '../services/DataParserService.js';
|
||||
import { executeGetDataOverview } from '../services/tools/GetDataOverviewTool.js';
|
||||
import { picoInferenceService } from '../services/PicoInferenceService.js';
|
||||
import { sessionBlackboardService } from '../services/SessionBlackboardService.js';
|
||||
|
||||
function getUserId(request: FastifyRequest): string {
|
||||
const userId = (request as any).user?.userId;
|
||||
@@ -87,6 +90,15 @@ export default async function sessionRoutes(app: FastifyInstance) {
|
||||
sessionId: session.id,
|
||||
hasFile: !!dataOssKey
|
||||
});
|
||||
|
||||
// Phase I: 文件上传后异步触发 data overview + PICO 推断(不阻塞响应)
|
||||
if (dataOssKey) {
|
||||
triggerDataOverviewAsync(session.id).catch((err) => {
|
||||
logger.error('[SSA:Session] Async data overview trigger failed', {
|
||||
sessionId: session.id, error: err.message,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// 返回前端期望的格式
|
||||
return reply.send({
|
||||
@@ -135,4 +147,106 @@ export default async function sessionRoutes(app: FastifyInstance) {
|
||||
|
||||
return reply.send(messages);
|
||||
});
|
||||
|
||||
/**
|
||||
* GET /sessions/:id/data-context/stream
|
||||
* Phase I: SSE 端点 — 实时推送 data overview + PICO 推断进度
|
||||
*/
|
||||
app.get('/:id/data-context/stream', async (req, reply) => {
|
||||
const { id } = req.params as { id: string };
|
||||
|
||||
reply.raw.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
});
|
||||
|
||||
const send = (type: string, data: any) => {
|
||||
reply.raw.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
|
||||
};
|
||||
|
||||
send('connected', { sessionId: id });
|
||||
|
||||
const heartbeat = setInterval(() => {
|
||||
reply.raw.write(':heartbeat\n\n');
|
||||
}, 15000);
|
||||
|
||||
const cleanup = () => {
|
||||
clearInterval(heartbeat);
|
||||
reply.raw.end();
|
||||
};
|
||||
|
||||
req.raw.on('close', cleanup);
|
||||
|
||||
try {
|
||||
// Step 1: Data Overview
|
||||
send('data_overview_start', { message: '正在生成数据概览...' });
|
||||
const overviewResult = await executeGetDataOverview(id);
|
||||
|
||||
if (!overviewResult.success) {
|
||||
send('data_overview_error', { error: overviewResult.error });
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
send('data_overview_complete', {
|
||||
report: overviewResult.report,
|
||||
});
|
||||
|
||||
// Step 2: PICO Inference
|
||||
if (overviewResult.blackboard?.dataOverview && overviewResult.blackboard?.variableDictionary) {
|
||||
send('pico_start', { message: '正在推断 PICO 结构...' });
|
||||
|
||||
const pico = await picoInferenceService.inferFromOverview(
|
||||
id,
|
||||
overviewResult.blackboard.dataOverview,
|
||||
overviewResult.blackboard.variableDictionary,
|
||||
);
|
||||
|
||||
if (pico) {
|
||||
send('pico_complete', { picoInference: pico });
|
||||
} else {
|
||||
send('pico_skip', { message: 'PICO 推断跳过(LLM 不可用或推断失败)' });
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: 完成
|
||||
const finalBlackboard = await sessionBlackboardService.get(id);
|
||||
send('data_context_complete', {
|
||||
blackboard: finalBlackboard,
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:SSE] Data context stream failed', { sessionId: id, error: error.message });
|
||||
send('data_context_error', { error: error.message });
|
||||
} finally {
|
||||
cleanup();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 异步触发 data overview(fire-and-forget,不阻塞 session 创建响应)
|
||||
*/
|
||||
async function triggerDataOverviewAsync(sessionId: string): Promise<void> {
|
||||
logger.info('[SSA:AutoTrigger] Starting async data overview', { sessionId });
|
||||
|
||||
const result = await executeGetDataOverview(sessionId);
|
||||
if (!result.success) {
|
||||
logger.warn('[SSA:AutoTrigger] Data overview failed', { sessionId, error: result.error });
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info('[SSA:AutoTrigger] Data overview complete, starting PICO inference', { sessionId });
|
||||
|
||||
if (result.blackboard?.dataOverview && result.blackboard?.variableDictionary) {
|
||||
await picoInferenceService.inferFromOverview(
|
||||
sessionId,
|
||||
result.blackboard.dataOverview,
|
||||
result.blackboard.variableDictionary,
|
||||
);
|
||||
}
|
||||
|
||||
logger.info('[SSA:AutoTrigger] Full data context pipeline complete', { sessionId });
|
||||
}
|
||||
|
||||
196
backend/src/modules/ssa/services/AskUserService.ts
Normal file
196
backend/src/modules/ssa/services/AskUserService.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
/**
|
||||
* AskUserService — 统一交互卡片服务
|
||||
*
|
||||
* H3: 统一 AskUser 领域模型,取代旧 Clarification 概念。
|
||||
* H1: 支持 skip/打断 — clearPending 逃生门。
|
||||
*
|
||||
* 职责:
|
||||
* 1. 构建 AskUserEvent(SSE 推送给前端)
|
||||
* 2. 解析 AskUserResponse(前端用户回复)
|
||||
* 3. 持久化 pendingAskUser 到 SessionBlackboard
|
||||
* 4. 全局打断清理
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'crypto';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Types (H3: 统一模型)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface AskUserOption {
|
||||
label: string;
|
||||
value: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export interface AskUserEvent {
|
||||
type: 'ask_user';
|
||||
questionId: string;
|
||||
question: string;
|
||||
context?: string;
|
||||
inputType: 'single_select' | 'multi_select' | 'free_text' | 'confirm';
|
||||
options?: AskUserOption[];
|
||||
defaultValue?: string;
|
||||
metadata?: Record<string, any>;
|
||||
}
|
||||
|
||||
export interface AskUserResponse {
|
||||
questionId: string;
|
||||
action: 'select' | 'skip' | 'free_text';
|
||||
selectedValues?: string[];
|
||||
freeText?: string;
|
||||
metadata?: Record<string, any>;
|
||||
}
|
||||
|
||||
export interface PendingAskUser {
|
||||
questionId: string;
|
||||
question: string;
|
||||
inputType: AskUserEvent['inputType'];
|
||||
metadata?: Record<string, any>;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Service
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export class AskUserService {
|
||||
|
||||
/**
|
||||
* 构建 ask_user 事件并写入黑板 pending 状态
|
||||
*/
|
||||
async createQuestion(
|
||||
sessionId: string,
|
||||
opts: {
|
||||
question: string;
|
||||
context?: string;
|
||||
inputType: AskUserEvent['inputType'];
|
||||
options?: AskUserOption[];
|
||||
defaultValue?: string;
|
||||
metadata?: Record<string, any>;
|
||||
},
|
||||
): Promise<AskUserEvent> {
|
||||
const questionId = randomUUID();
|
||||
|
||||
const event: AskUserEvent = {
|
||||
type: 'ask_user',
|
||||
questionId,
|
||||
question: opts.question,
|
||||
context: opts.context,
|
||||
inputType: opts.inputType,
|
||||
options: opts.options,
|
||||
defaultValue: opts.defaultValue,
|
||||
metadata: opts.metadata,
|
||||
};
|
||||
|
||||
// 持久化到黑板
|
||||
const pending: PendingAskUser = {
|
||||
questionId,
|
||||
question: opts.question,
|
||||
inputType: opts.inputType,
|
||||
metadata: opts.metadata,
|
||||
createdAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
await sessionBlackboardService.patch(sessionId, {
|
||||
pendingAskUser: pending,
|
||||
} as any);
|
||||
|
||||
logger.info('[SSA:AskUser] Question created', { sessionId, questionId, inputType: opts.inputType });
|
||||
return event;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析用户回复
|
||||
*/
|
||||
parseResponse(metadata: Record<string, any>): AskUserResponse | null {
|
||||
const raw = metadata?.askUserResponse;
|
||||
if (!raw) return null;
|
||||
|
||||
return {
|
||||
questionId: raw.questionId || '',
|
||||
action: raw.action || 'select',
|
||||
selectedValues: raw.selectedValues,
|
||||
freeText: raw.freeText,
|
||||
metadata: raw.metadata,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查 session 是否有挂起的 ask_user
|
||||
*/
|
||||
async getPending(sessionId: string): Promise<PendingAskUser | null> {
|
||||
const bb = await sessionBlackboardService.get(sessionId);
|
||||
return (bb as any)?.pendingAskUser ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* H1: 清除挂起状态(用户跳过或打断时调用)
|
||||
*/
|
||||
async clearPending(sessionId: string): Promise<void> {
|
||||
await sessionBlackboardService.patch(sessionId, {
|
||||
pendingAskUser: null,
|
||||
} as any);
|
||||
|
||||
logger.info('[SSA:AskUser] Pending cleared', { sessionId });
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建方法确认卡片(consult 意图专用)
|
||||
*/
|
||||
buildMethodConfirmQuestion(
|
||||
primaryMethodName: string,
|
||||
primaryMethodCode: string,
|
||||
fallbackMethodName?: string,
|
||||
): {
|
||||
question: string;
|
||||
context: string;
|
||||
inputType: 'confirm';
|
||||
options: AskUserOption[];
|
||||
metadata: Record<string, any>;
|
||||
} {
|
||||
const options: AskUserOption[] = [
|
||||
{
|
||||
label: `确认,使用${primaryMethodName}`,
|
||||
value: 'confirm',
|
||||
description: '系统将开始执行分析',
|
||||
},
|
||||
];
|
||||
|
||||
if (fallbackMethodName) {
|
||||
options.push({
|
||||
label: `改用${fallbackMethodName}`,
|
||||
value: 'use_fallback',
|
||||
description: '使用备选方案',
|
||||
});
|
||||
}
|
||||
|
||||
options.push({
|
||||
label: '我想换个方法',
|
||||
value: 'change_method',
|
||||
description: '重新描述需求',
|
||||
});
|
||||
|
||||
return {
|
||||
question: `是否确认使用 ${primaryMethodName} 进行分析?`,
|
||||
context: '您可以确认执行,选择备选方案,或重新描述需求。',
|
||||
inputType: 'confirm',
|
||||
options,
|
||||
metadata: {
|
||||
recommendedMethod: primaryMethodCode,
|
||||
action: 'method_confirm',
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 AskUserEvent 序列化为 SSE data 行
|
||||
*/
|
||||
formatSSE(event: AskUserEvent): string {
|
||||
return `data: ${JSON.stringify(event)}\n\n`;
|
||||
}
|
||||
}
|
||||
|
||||
export const askUserService = new AskUserService();
|
||||
510
backend/src/modules/ssa/services/ChatHandlerService.ts
Normal file
510
backend/src/modules/ssa/services/ChatHandlerService.ts
Normal file
@@ -0,0 +1,510 @@
|
||||
/**
|
||||
* Phase II — 意图处理器(Intent Handlers)
|
||||
*
|
||||
* 按意图类型分发处理逻辑:
|
||||
* - chat: 直接 LLM 对话
|
||||
* - explore: READ 工具 + LLM 解读
|
||||
* - analyze: 转入 QPER 流水线 + LLM 摘要
|
||||
* - consult: LLM 方法推荐(Phase III 增强)
|
||||
* - discuss: LLM 结果解读(Phase V 增强)
|
||||
* - feedback: LLM 改进建议(Phase V 增强)
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { conversationService, type StreamWriter } from './ConversationService.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import { tokenTruncationService } from './TokenTruncationService.js';
|
||||
import { methodConsultService } from './MethodConsultService.js';
|
||||
import { askUserService, type AskUserResponse } from './AskUserService.js';
|
||||
import { toolOrchestratorService } from './ToolOrchestratorService.js';
|
||||
import type { IntentType } from './SystemPromptService.js';
|
||||
import type { IntentResult } from './IntentRouterService.js';
|
||||
|
||||
export interface HandleResult {
|
||||
messageId: string;
|
||||
intent: IntentType;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class ChatHandlerService {
|
||||
|
||||
/**
|
||||
* 统一处理入口:按意图分发
|
||||
*/
|
||||
async handle(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
userContent: string,
|
||||
intentResult: IntentResult,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
): Promise<HandleResult> {
|
||||
const intent = intentResult.intent;
|
||||
|
||||
try {
|
||||
// 如果上下文守卫被触发且有提示消息,直接作为 LLM 上下文的一部分
|
||||
let toolOutputs: string | undefined;
|
||||
|
||||
if (intentResult.guardTriggered && intentResult.guardMessage) {
|
||||
toolOutputs = `[系统提示] ${intentResult.guardMessage}`;
|
||||
}
|
||||
|
||||
switch (intent) {
|
||||
case 'chat':
|
||||
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, intent, toolOutputs);
|
||||
|
||||
case 'explore':
|
||||
return await this.handleExplore(sessionId, conversationId, writer, placeholderMessageId, toolOutputs);
|
||||
|
||||
case 'consult':
|
||||
return await this.handleConsult(sessionId, conversationId, writer, placeholderMessageId, toolOutputs);
|
||||
|
||||
case 'analyze':
|
||||
return await this.handleAnalyze(sessionId, conversationId, userContent, writer, placeholderMessageId, toolOutputs);
|
||||
|
||||
case 'discuss':
|
||||
return await this.handleDiscuss(sessionId, conversationId, writer, placeholderMessageId, toolOutputs);
|
||||
|
||||
case 'feedback':
|
||||
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, 'feedback', toolOutputs);
|
||||
|
||||
default:
|
||||
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, 'chat', toolOutputs);
|
||||
}
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:ChatHandler] Handler error', {
|
||||
sessionId, intent, error: error.message,
|
||||
});
|
||||
|
||||
await conversationService.markAssistantError(placeholderMessageId, error.message);
|
||||
|
||||
return {
|
||||
messageId: placeholderMessageId,
|
||||
intent,
|
||||
success: false,
|
||||
error: error.message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// chat / consult / feedback — 直接 LLM 对话
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private async handleChat(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
intent: IntentType,
|
||||
toolOutputs?: string,
|
||||
): Promise<HandleResult> {
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, intent, toolOutputs,
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.7,
|
||||
maxTokens: 2000,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
return {
|
||||
messageId: placeholderMessageId,
|
||||
intent,
|
||||
success: true,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// explore — 数据探索(READ 工具 + LLM 解读)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private async handleExplore(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
guardToolOutput?: string,
|
||||
): Promise<HandleResult> {
|
||||
// 从 SessionBlackboard 提取数据摘要作为 tool output
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
let toolOutputs = guardToolOutput || '';
|
||||
|
||||
if (blackboard) {
|
||||
const truncated = tokenTruncationService.truncate(blackboard, {
|
||||
maxTokens: 1500,
|
||||
strategy: 'balanced',
|
||||
});
|
||||
|
||||
const exploreData: string[] = [];
|
||||
|
||||
if (truncated.overview) {
|
||||
exploreData.push(`数据概览:\n${truncated.overview}`);
|
||||
}
|
||||
if (truncated.variables) {
|
||||
exploreData.push(`变量列表:\n${truncated.variables}`);
|
||||
}
|
||||
if (truncated.pico) {
|
||||
exploreData.push(`PICO 推断:\n${truncated.pico}`);
|
||||
}
|
||||
if (truncated.report) {
|
||||
exploreData.push(`数据诊断:\n${truncated.report}`);
|
||||
}
|
||||
|
||||
if (exploreData.length > 0) {
|
||||
toolOutputs = (toolOutputs ? toolOutputs + '\n\n' : '') + exploreData.join('\n\n');
|
||||
}
|
||||
}
|
||||
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'explore', toolOutputs || undefined,
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.7,
|
||||
maxTokens: 2000,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
return {
|
||||
messageId: placeholderMessageId,
|
||||
intent: 'explore',
|
||||
success: true,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// analyze — 转入 QPER(混合模式:LLM 摘要 + WorkspacePane 详情)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Phase IV: analyze 意图 — 对话驱动分析
|
||||
*
|
||||
* 流程: plan 生成 → SSE 推 analysis_plan → LLM 方案说明 → ask_user 确认
|
||||
* 执行由前端通过 /workflow/{id}/stream 触发(D1: 保留独立 workflow SSE)
|
||||
*/
|
||||
private async handleAnalyze(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
userMessage: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
guardToolOutput?: string,
|
||||
): Promise<HandleResult> {
|
||||
// 1. 调用 ToolOrchestratorService 生成计划(D5: PICO hint 自动注入)
|
||||
const planResult = await toolOrchestratorService.plan(sessionId, userMessage);
|
||||
|
||||
if (!planResult.success || !planResult.plan) {
|
||||
const fallbackHint = [
|
||||
guardToolOutput,
|
||||
`[系统提示] 分析计划生成失败: ${planResult.error || '未知错误'}。`,
|
||||
'请友好地告知用户需要更明确的分析需求描述,例如需要指明要分析哪些变量、比较什么。',
|
||||
].filter(Boolean).join('\n');
|
||||
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'analyze', fallbackHint,
|
||||
);
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.5, maxTokens: 800,
|
||||
});
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
}
|
||||
|
||||
const plan = planResult.plan;
|
||||
|
||||
// 2. SSE 推送 analysis_plan 事件(D2: 前端自动创建 AnalysisRecord)
|
||||
const planEvent = `data: ${JSON.stringify({
|
||||
type: 'analysis_plan',
|
||||
plan,
|
||||
})}\n\n`;
|
||||
writer.write(planEvent);
|
||||
|
||||
logger.info('[SSA:ChatHandler] analysis_plan pushed via SSE', {
|
||||
sessionId, workflowId: plan.workflow_id, totalSteps: plan.total_steps,
|
||||
});
|
||||
|
||||
// 3. LLM 流式生成方案说明
|
||||
const planSummary = toolOrchestratorService.formatPlanForLLM(plan);
|
||||
const toolOutputs = [
|
||||
guardToolOutput,
|
||||
planSummary,
|
||||
'[系统提示] 你刚刚为用户制定了上述分析方案。请用自然语言向用户解释这个方案:包括为什么选这些方法、分析步骤的逻辑。不要重复列步骤编号和工具代码,要用用户能理解的语言说明。最后提示用户确认方案后即可执行。',
|
||||
].filter(Boolean).join('\n\n');
|
||||
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'analyze', toolOutputs,
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.5, maxTokens: 1200,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
// 4. 推送 ask_user 确认卡片(复用 Phase III AskUserService)
|
||||
const confirmQ = {
|
||||
inputType: 'confirm' as const,
|
||||
question: '请确认上述分析方案',
|
||||
context: `方案: ${plan.title},共 ${plan.total_steps} 个步骤`,
|
||||
options: [
|
||||
{ id: 'confirm_plan', label: '确认执行', value: 'confirm_plan' },
|
||||
{ id: 'change_method', label: '修改方案', value: 'change_method' },
|
||||
],
|
||||
metadata: {
|
||||
workflowId: plan.workflow_id,
|
||||
planTitle: plan.title,
|
||||
},
|
||||
};
|
||||
|
||||
const event = await askUserService.createQuestion(sessionId, confirmQ);
|
||||
writer.write(askUserService.formatSSE(event));
|
||||
|
||||
return {
|
||||
messageId: placeholderMessageId,
|
||||
intent: 'analyze',
|
||||
success: true,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// discuss — 结果讨论(注入分析结果上下文)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private async handleDiscuss(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
guardToolOutput?: string,
|
||||
): Promise<HandleResult> {
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
let toolOutputs = guardToolOutput || '';
|
||||
|
||||
// 注入 QPER trace 摘要
|
||||
if (blackboard?.qperTrace && blackboard.qperTrace.length > 0) {
|
||||
const traceItems = blackboard.qperTrace
|
||||
.filter(t => t.status === 'success')
|
||||
.slice(-5)
|
||||
.map(t => `- 步骤${t.stepIndex}: ${t.toolCode} → ${t.summary}`)
|
||||
.join('\n');
|
||||
|
||||
if (traceItems) {
|
||||
toolOutputs = (toolOutputs ? toolOutputs + '\n\n' : '') +
|
||||
`最近分析结果:\n${traceItems}`;
|
||||
}
|
||||
}
|
||||
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'discuss', toolOutputs || undefined,
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.7,
|
||||
maxTokens: 2000,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
return {
|
||||
messageId: placeholderMessageId,
|
||||
intent: 'discuss',
|
||||
success: true,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// consult — 方法推荐(Phase III: method_consult + ask_user)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private async handleConsult(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
guardToolOutput?: string,
|
||||
): Promise<HandleResult> {
|
||||
let toolOutputs = guardToolOutput || '';
|
||||
|
||||
// 1. 调用 MethodConsultService 获取推荐
|
||||
const recommendation = await methodConsultService.recommend(sessionId);
|
||||
const recText = methodConsultService.formatForLLM(recommendation);
|
||||
toolOutputs = (toolOutputs ? toolOutputs + '\n\n' : '') + recText;
|
||||
|
||||
logger.info('[SSA:ChatHandler] Method consult result', {
|
||||
sessionId,
|
||||
matched: recommendation.matched,
|
||||
primaryMethod: recommendation.primaryMethod?.code,
|
||||
matchScore: recommendation.matchScore,
|
||||
needsClarification: recommendation.needsClarification,
|
||||
});
|
||||
|
||||
// 2. LLM 生成自然语言推荐(P1: 结论先行 + 结构化列表)
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'consult', toolOutputs,
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.5,
|
||||
maxTokens: 1500,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
// 3. 如果有明确推荐,推送 ask_user 确认卡片
|
||||
if (recommendation.matched && recommendation.primaryMethod) {
|
||||
const confirmQ = askUserService.buildMethodConfirmQuestion(
|
||||
recommendation.primaryMethod.name,
|
||||
recommendation.primaryMethod.code,
|
||||
recommendation.fallbackMethod?.name,
|
||||
);
|
||||
|
||||
const event = await askUserService.createQuestion(sessionId, confirmQ);
|
||||
writer.write(askUserService.formatSSE(event));
|
||||
}
|
||||
|
||||
return {
|
||||
messageId: placeholderMessageId,
|
||||
intent: 'consult',
|
||||
success: true,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// ask_user 响应处理(Phase III)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async handleAskUserResponse(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
response: AskUserResponse,
|
||||
writer: StreamWriter,
|
||||
placeholderMessageId: string,
|
||||
): Promise<HandleResult> {
|
||||
// 清除 pending 状态
|
||||
await askUserService.clearPending(sessionId);
|
||||
|
||||
if (response.action === 'skip') {
|
||||
// 用户跳过 — 生成友好回复
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'chat',
|
||||
'[系统提示] 用户跳过了上一个确认问题。请友好地回应,表示随时可以继续。',
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.7,
|
||||
maxTokens: 500,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'chat', success: true };
|
||||
}
|
||||
|
||||
// 用户选择了具体选项
|
||||
const selectedValue = response.selectedValues?.[0];
|
||||
|
||||
if (selectedValue === 'confirm_plan') {
|
||||
// Phase IV: 确认分析方案 → 前端将触发 executeWorkflow
|
||||
const workflowId = response.metadata?.workflowId || '';
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'analyze',
|
||||
`[系统提示] 用户已确认分析方案(workflow: ${workflowId})。请简要确认:"好的,方案已确认,正在准备执行分析..."。`,
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.3, maxTokens: 300,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
// 推送 plan_confirmed 事件,前端据此触发 executeWorkflow
|
||||
const confirmEvent = `data: ${JSON.stringify({
|
||||
type: 'plan_confirmed',
|
||||
workflowId,
|
||||
})}\n\n`;
|
||||
writer.write(confirmEvent);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
|
||||
} else if (selectedValue === 'confirm') {
|
||||
// Phase III: 确认使用推荐方法 → 提示可以开始分析
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'analyze',
|
||||
'[系统提示] 用户已确认使用推荐的统计方法。请简要确认方案,告知用户可以在对话中说"开始分析"或在右侧面板触发执行。',
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.5,
|
||||
maxTokens: 800,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
|
||||
|
||||
} else if (selectedValue === 'use_fallback') {
|
||||
// 使用备选方案
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'consult',
|
||||
'[系统提示] 用户选择使用备选方案。请确认切换,并简要说明备选方案的适用场景。',
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.5,
|
||||
maxTokens: 800,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'consult', success: true };
|
||||
|
||||
} else if (selectedValue === 'change_method') {
|
||||
// 用户想换方法 → 引导重新描述
|
||||
const messages = await conversationService.buildContext(
|
||||
sessionId, conversationId, 'consult',
|
||||
'[系统提示] 用户不满意当前推荐,想换方法。请询问用户希望使用什么方法,或引导其更详细地描述分析需求。',
|
||||
);
|
||||
|
||||
const result = await conversationService.streamToSSE(messages, writer, {
|
||||
temperature: 0.7,
|
||||
maxTokens: 800,
|
||||
});
|
||||
|
||||
await conversationService.finalizeAssistantMessage(
|
||||
placeholderMessageId, result.content, result.thinking, result.tokens,
|
||||
);
|
||||
|
||||
return { messageId: placeholderMessageId, intent: 'consult', success: true };
|
||||
}
|
||||
|
||||
// 其他情况:fallback
|
||||
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, 'chat');
|
||||
}
|
||||
}
|
||||
|
||||
export const chatHandlerService = new ChatHandlerService();
|
||||
382
backend/src/modules/ssa/services/ConversationService.ts
Normal file
382
backend/src/modules/ssa/services/ConversationService.ts
Normal file
@@ -0,0 +1,382 @@
|
||||
/**
|
||||
* Phase II — 对话核心服务
|
||||
*
|
||||
* 职责:
|
||||
* - 对话历史持久化(复用 aia_schema.conversations + messages)
|
||||
* - Placeholder 占位机制(H3 竞态保护)
|
||||
* - LLM 流式调用 + OpenAI Compatible SSE 输出
|
||||
* - 5 秒心跳保活(H1)
|
||||
* - 消息上下文组装(滑动窗口)
|
||||
*
|
||||
* 架构约束:
|
||||
* - C1: 对话层 LLM 禁止 Function Calling
|
||||
* - C3: 默认使用 chatStream 流式输出
|
||||
*/
|
||||
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||||
import type { Message as LLMMessage, StreamChunk } from '../../../common/llm/adapters/types.js';
|
||||
import { systemPromptService, type IntentType } from './SystemPromptService.js';
|
||||
|
||||
const MAX_CONTEXT_MESSAGES = 20;
|
||||
const DEFAULT_MODEL = 'deepseek-v3';
|
||||
const HEARTBEAT_INTERVAL_MS = 5000;
|
||||
const SSA_AGENT_ID = 'SSA_ANALYST';
|
||||
|
||||
export interface ChatRequest {
|
||||
content: string;
|
||||
enableDeepThinking?: boolean;
|
||||
}
|
||||
|
||||
export interface StreamWriter {
|
||||
write(data: string): boolean;
|
||||
end(): void;
|
||||
on(event: string, handler: () => void): void;
|
||||
}
|
||||
|
||||
export class ConversationService {
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Conversation CRUD
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 获取或创建 session 关联的 conversation(延迟创建模式)
|
||||
*/
|
||||
async getOrCreateConversation(sessionId: string, userId: string): Promise<string> {
|
||||
const existing = await prisma.conversation.findFirst({
|
||||
where: {
|
||||
agentId: SSA_AGENT_ID,
|
||||
metadata: { path: ['sessionId'], equals: sessionId },
|
||||
deletedAt: null,
|
||||
},
|
||||
});
|
||||
|
||||
if (existing) return existing.id;
|
||||
|
||||
const session = await prisma.ssaSession.findUnique({ where: { id: sessionId } });
|
||||
const title = session?.title || '统计分析对话';
|
||||
|
||||
const conversation = await prisma.conversation.create({
|
||||
data: {
|
||||
userId,
|
||||
agentId: SSA_AGENT_ID,
|
||||
title,
|
||||
modelName: DEFAULT_MODEL,
|
||||
metadata: { sessionId },
|
||||
},
|
||||
});
|
||||
|
||||
logger.info('[SSA:Conv] Conversation created', {
|
||||
conversationId: conversation.id,
|
||||
sessionId,
|
||||
});
|
||||
|
||||
return conversation.id;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Message persistence
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 保存用户消息到 DB
|
||||
*/
|
||||
async saveUserMessage(
|
||||
conversationId: string,
|
||||
content: string,
|
||||
metadata?: Record<string, any>,
|
||||
): Promise<string> {
|
||||
const msg = await prisma.message.create({
|
||||
data: {
|
||||
conversationId,
|
||||
role: 'user',
|
||||
content,
|
||||
metadata: metadata ?? undefined,
|
||||
},
|
||||
});
|
||||
|
||||
await this.incrementMessageCount(conversationId);
|
||||
return msg.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建 assistant placeholder(H3 竞态保护)
|
||||
* 在 LLM 流式输出之前立即创建,确保并发请求能感知到"正在生成"
|
||||
*/
|
||||
async createAssistantPlaceholder(
|
||||
conversationId: string,
|
||||
intent: IntentType,
|
||||
): Promise<string> {
|
||||
const msg = await prisma.message.create({
|
||||
data: {
|
||||
conversationId,
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
model: DEFAULT_MODEL,
|
||||
metadata: { intent, status: 'generating' },
|
||||
},
|
||||
});
|
||||
|
||||
logger.debug('[SSA:Conv] Assistant placeholder created', {
|
||||
messageId: msg.id,
|
||||
intent,
|
||||
});
|
||||
|
||||
return msg.id;
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新 placeholder 为最终内容
|
||||
*/
|
||||
async finalizeAssistantMessage(
|
||||
messageId: string,
|
||||
content: string,
|
||||
thinkingContent?: string,
|
||||
tokens?: number,
|
||||
): Promise<void> {
|
||||
const existing = await prisma.message.findUnique({ where: { id: messageId } });
|
||||
const prevMeta = (existing?.metadata as Record<string, any>) || {};
|
||||
|
||||
await prisma.message.update({
|
||||
where: { id: messageId },
|
||||
data: {
|
||||
content,
|
||||
thinkingContent: thinkingContent || undefined,
|
||||
tokens,
|
||||
metadata: { ...prevMeta, status: 'complete' },
|
||||
},
|
||||
});
|
||||
|
||||
const msg = await prisma.message.findUnique({
|
||||
where: { id: messageId },
|
||||
select: { conversationId: true },
|
||||
});
|
||||
if (msg) {
|
||||
await this.incrementMessageCount(msg.conversationId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 标记 placeholder 为错误状态(崩溃恢复)
|
||||
*/
|
||||
async markAssistantError(messageId: string, error: string): Promise<void> {
|
||||
const existing = await prisma.message.findUnique({ where: { id: messageId } });
|
||||
const prevMeta = (existing?.metadata as Record<string, any>) || {};
|
||||
|
||||
await prisma.message.update({
|
||||
where: { id: messageId },
|
||||
data: {
|
||||
content: `[生成中断] ${error}`,
|
||||
metadata: { ...prevMeta, status: 'error', error },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Context building
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 构建 LLM 消息上下文(System Prompt + 对话历史)
|
||||
*/
|
||||
async buildContext(
|
||||
sessionId: string,
|
||||
conversationId: string,
|
||||
intent: IntentType,
|
||||
toolOutputs?: string,
|
||||
): Promise<LLMMessage[]> {
|
||||
const systemPrompt = await systemPromptService.assemble(
|
||||
sessionId,
|
||||
intent,
|
||||
toolOutputs,
|
||||
);
|
||||
|
||||
const history = await prisma.message.findMany({
|
||||
where: { conversationId },
|
||||
orderBy: { createdAt: 'asc' },
|
||||
});
|
||||
|
||||
// 滑动窗口:取最近 N 条有效消息(排除 generating 状态的 placeholder)
|
||||
const validMessages = history.filter(m => {
|
||||
if (m.role === 'assistant') {
|
||||
const meta = m.metadata as any;
|
||||
return meta?.status !== 'generating';
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
const recentMessages = validMessages.slice(-MAX_CONTEXT_MESSAGES);
|
||||
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
];
|
||||
|
||||
for (const msg of recentMessages) {
|
||||
messages.push({
|
||||
role: msg.role as 'user' | 'assistant',
|
||||
content: msg.content,
|
||||
});
|
||||
}
|
||||
|
||||
logger.debug('[SSA:Conv] Context built', {
|
||||
sessionId,
|
||||
intent,
|
||||
historyCount: recentMessages.length,
|
||||
systemPromptLength: systemPrompt.length,
|
||||
});
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// LLM Streaming (C1: no function calling, C3: chatStream)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* 执行 LLM 流式输出并写入 SSE 流
|
||||
*
|
||||
* 包含 5 秒心跳保活(H1)和标准化错误事件(H1)
|
||||
*/
|
||||
async streamToSSE(
|
||||
messages: LLMMessage[],
|
||||
writer: StreamWriter,
|
||||
options?: {
|
||||
model?: string;
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
onComplete?: (content: string, thinking: string) => void;
|
||||
},
|
||||
): Promise<{ content: string; thinking: string; tokens?: number }> {
|
||||
const model = options?.model || DEFAULT_MODEL;
|
||||
const adapter = LLMFactory.getAdapter(model as any);
|
||||
|
||||
let accContent = '';
|
||||
let accThinking = '';
|
||||
let clientDisconnected = false;
|
||||
|
||||
writer.on('close', () => {
|
||||
clientDisconnected = true;
|
||||
});
|
||||
|
||||
// H1: 5 秒心跳保活
|
||||
const heartbeat = setInterval(() => {
|
||||
if (!clientDisconnected) {
|
||||
try {
|
||||
writer.write(': keep-alive\n\n');
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
}, HEARTBEAT_INTERVAL_MS);
|
||||
|
||||
try {
|
||||
const stream = adapter.chatStream(messages, {
|
||||
temperature: options?.temperature ?? 0.7,
|
||||
maxTokens: options?.maxTokens ?? 2000,
|
||||
});
|
||||
|
||||
for await (const chunk of stream) {
|
||||
if (clientDisconnected) break;
|
||||
|
||||
if (chunk.content) {
|
||||
accContent += chunk.content;
|
||||
|
||||
const sseData = JSON.stringify({
|
||||
id: `chatcmpl-ssa-${Date.now()}`,
|
||||
object: 'chat.completion.chunk',
|
||||
choices: [{
|
||||
delta: { content: chunk.content },
|
||||
finish_reason: null,
|
||||
}],
|
||||
});
|
||||
writer.write(`data: ${sseData}\n\n`);
|
||||
}
|
||||
|
||||
if (chunk.done) {
|
||||
const doneData = JSON.stringify({
|
||||
id: `chatcmpl-ssa-${Date.now()}`,
|
||||
object: 'chat.completion.chunk',
|
||||
choices: [{
|
||||
delta: {},
|
||||
finish_reason: 'stop',
|
||||
}],
|
||||
usage: chunk.usage,
|
||||
});
|
||||
writer.write(`data: ${doneData}\n\n`);
|
||||
writer.write('data: [DONE]\n\n');
|
||||
}
|
||||
}
|
||||
|
||||
options?.onComplete?.(accContent, accThinking);
|
||||
|
||||
return {
|
||||
content: accContent,
|
||||
thinking: accThinking,
|
||||
tokens: undefined,
|
||||
};
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:Conv] Stream error', { error: error.message });
|
||||
|
||||
if (!clientDisconnected) {
|
||||
const errorData = JSON.stringify({
|
||||
type: 'error',
|
||||
code: 'STREAM_ERROR',
|
||||
message: error.message || '生成回复时发生错误',
|
||||
});
|
||||
try {
|
||||
writer.write(`data: ${errorData}\n\n`);
|
||||
} catch { /* ignore */ }
|
||||
}
|
||||
|
||||
throw error;
|
||||
} finally {
|
||||
clearInterval(heartbeat);
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Conversation history API
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async getMessages(conversationId: string): Promise<any[]> {
|
||||
return prisma.message.findMany({
|
||||
where: { conversationId },
|
||||
orderBy: { createdAt: 'asc' },
|
||||
select: {
|
||||
id: true,
|
||||
role: true,
|
||||
content: true,
|
||||
thinkingContent: true,
|
||||
metadata: true,
|
||||
createdAt: true,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async getConversationBySession(sessionId: string): Promise<any | null> {
|
||||
return prisma.conversation.findFirst({
|
||||
where: {
|
||||
agentId: SSA_AGENT_ID,
|
||||
metadata: { path: ['sessionId'], equals: sessionId },
|
||||
deletedAt: null,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Helpers
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private async incrementMessageCount(conversationId: string): Promise<void> {
|
||||
try {
|
||||
await prisma.conversation.update({
|
||||
where: { id: conversationId },
|
||||
data: { messageCount: { increment: 1 } },
|
||||
});
|
||||
} catch (err) {
|
||||
logger.warn('[SSA:Conv] Failed to increment message count', { conversationId });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const conversationService = new ConversationService();
|
||||
@@ -349,6 +349,66 @@ export class DataProfileService {
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase I: 获取单变量详细分析(调用 Python variable-detail 端点)
|
||||
*
|
||||
* @param sessionId SSA 会话 ID
|
||||
* @param variableName 目标变量名
|
||||
*/
|
||||
async getVariableDetail(sessionId: string, variableName: string): Promise<any> {
|
||||
try {
|
||||
const csvContent = await this.loadCSVFromSession(sessionId);
|
||||
if (!csvContent) {
|
||||
return { success: false, error: 'No CSV data available for session' };
|
||||
}
|
||||
|
||||
const response = await this.client.post('/api/ssa/variable-detail', {
|
||||
csv_content: csvContent,
|
||||
variable_name: variableName,
|
||||
max_bins: 30,
|
||||
max_qq_points: 200,
|
||||
});
|
||||
|
||||
return response.data;
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:DataProfile] Variable detail failed', {
|
||||
sessionId, variableName, error: error.message,
|
||||
});
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Session 加载原始 CSV 字符串(供 variable-detail 复用)
|
||||
*/
|
||||
private async loadCSVFromSession(sessionId: string): Promise<string | null> {
|
||||
const session = await prisma.ssaSession.findUnique({ where: { id: sessionId } });
|
||||
if (!session) return null;
|
||||
|
||||
if (session.dataOssKey) {
|
||||
const buffer = await storage.download(session.dataOssKey);
|
||||
return buffer.toString('utf-8');
|
||||
}
|
||||
|
||||
if (session.dataPayload) {
|
||||
const rows = session.dataPayload as unknown as Record<string, any>[];
|
||||
if (rows.length === 0) return null;
|
||||
const cols = Object.keys(rows[0]);
|
||||
const lines = [cols.join(',')];
|
||||
for (const row of rows) {
|
||||
lines.push(cols.map(c => {
|
||||
const v = row[c];
|
||||
if (v === null || v === undefined) return '';
|
||||
const s = String(v);
|
||||
return s.includes(',') || s.includes('"') ? `"${s.replace(/"/g, '""')}"` : s;
|
||||
}).join(','));
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// 单例导出
|
||||
|
||||
325
backend/src/modules/ssa/services/IntentRouterService.ts
Normal file
325
backend/src/modules/ssa/services/IntentRouterService.ts
Normal file
@@ -0,0 +1,325 @@
|
||||
/**
|
||||
* Phase II — 意图路由服务
|
||||
*
|
||||
* 混合路由策略:
|
||||
* 1. 规则引擎优先(零延迟,零 Token)
|
||||
* 2. LLM 兜底(仅规则无法判断时)
|
||||
* 3. 上下文守卫(C5:数据依赖意图必须有上下文)
|
||||
*
|
||||
* 路由结果:chat | explore | consult | analyze | discuss | feedback
|
||||
* 默认安全兜底:chat
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'fs';
|
||||
import { fileURLToPath } from 'url';
|
||||
import { dirname, join } from 'path';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||||
import { getPromptService } from '../../../common/prompt/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import type { IntentType } from './SystemPromptService.js';
|
||||
import type { Message } from '../../../common/llm/adapters/types.js';
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Types
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
interface IntentRule {
|
||||
intent: IntentType;
|
||||
keywords: string[];
|
||||
excludeKeywords?: string[];
|
||||
requires?: string[];
|
||||
priority: number;
|
||||
}
|
||||
|
||||
interface ContextGuard {
|
||||
requires: string[];
|
||||
fallbackMessage: string;
|
||||
}
|
||||
|
||||
interface IntentRulesConfig {
|
||||
rules: IntentRule[];
|
||||
contextGuards: Record<string, ContextGuard>;
|
||||
defaultIntent: IntentType;
|
||||
}
|
||||
|
||||
export interface IntentResult {
|
||||
intent: IntentType;
|
||||
confidence: number;
|
||||
source: 'rules' | 'llm' | 'default' | 'guard';
|
||||
guardTriggered?: boolean;
|
||||
guardMessage?: string;
|
||||
originalIntent?: IntentType;
|
||||
}
|
||||
|
||||
interface SessionContext {
|
||||
hasDataOverview: boolean;
|
||||
hasAnalysisResults: boolean;
|
||||
hasPico: boolean;
|
||||
lastIntent?: IntentType;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Service
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
class IntentRouterService {
|
||||
private rules: IntentRulesConfig;
|
||||
|
||||
constructor() {
|
||||
this.rules = this.loadRules();
|
||||
}
|
||||
|
||||
/**
|
||||
* 核心分类方法
|
||||
*/
|
||||
async classify(
|
||||
message: string,
|
||||
sessionId: string,
|
||||
): Promise<IntentResult> {
|
||||
const context = await this.buildSessionContext(sessionId);
|
||||
|
||||
// Step 1: 规则引擎(零延迟)
|
||||
const ruleResult = this.matchRules(message, context);
|
||||
if (ruleResult && ruleResult.confidence >= 0.8) {
|
||||
const guarded = this.applyContextGuard(ruleResult, context);
|
||||
logger.info('[SSA:IntentRouter] Rule match', {
|
||||
sessionId,
|
||||
intent: guarded.intent,
|
||||
confidence: guarded.confidence,
|
||||
guardTriggered: guarded.guardTriggered,
|
||||
});
|
||||
return guarded;
|
||||
}
|
||||
|
||||
// Step 2: LLM 兜底(仅规则无法判断时)
|
||||
try {
|
||||
const llmResult = await this.llmClassify(message, context);
|
||||
if (llmResult.confidence >= 0.7) {
|
||||
const guarded = this.applyContextGuard(llmResult, context);
|
||||
logger.info('[SSA:IntentRouter] LLM classify', {
|
||||
sessionId,
|
||||
intent: guarded.intent,
|
||||
confidence: guarded.confidence,
|
||||
guardTriggered: guarded.guardTriggered,
|
||||
});
|
||||
return guarded;
|
||||
}
|
||||
} catch (err: any) {
|
||||
logger.warn('[SSA:IntentRouter] LLM classify failed, using default', {
|
||||
error: err.message,
|
||||
});
|
||||
}
|
||||
|
||||
// Step 3: 默认兜底 → chat(最安全)
|
||||
logger.info('[SSA:IntentRouter] Default fallback to chat', { sessionId });
|
||||
return {
|
||||
intent: 'chat',
|
||||
confidence: 0.5,
|
||||
source: 'default',
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 规则引擎
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private matchRules(message: string, context: SessionContext): IntentResult | null {
|
||||
const normalizedMsg = message.toLowerCase().trim();
|
||||
|
||||
const matches: Array<{ rule: IntentRule; score: number }> = [];
|
||||
|
||||
for (const rule of this.rules.rules) {
|
||||
if (rule.excludeKeywords?.some(kw => normalizedMsg.includes(kw.toLowerCase()))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const matchedKeywords = rule.keywords.filter(kw =>
|
||||
normalizedMsg.includes(kw.toLowerCase())
|
||||
);
|
||||
|
||||
if (matchedKeywords.length > 0) {
|
||||
const score = matchedKeywords.length * rule.priority;
|
||||
matches.push({ rule, score });
|
||||
}
|
||||
}
|
||||
|
||||
if (matches.length === 0) return null;
|
||||
|
||||
matches.sort((a, b) => b.score - a.score);
|
||||
const best = matches[0];
|
||||
|
||||
return {
|
||||
intent: best.rule.intent,
|
||||
confidence: Math.min(0.95, 0.7 + best.score * 0.05),
|
||||
source: 'rules',
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// LLM 分类(轻量级,<500 tokens)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private async llmClassify(message: string, context: SessionContext): Promise<IntentResult> {
|
||||
const promptService = getPromptService(prisma);
|
||||
|
||||
let systemPrompt: string;
|
||||
try {
|
||||
const rendered = await promptService.get('SSA_INTENT_ROUTER', {});
|
||||
systemPrompt = rendered.content;
|
||||
} catch {
|
||||
systemPrompt = this.fallbackRouterPrompt();
|
||||
}
|
||||
|
||||
const contextDesc = [
|
||||
`有数据: ${context.hasDataOverview ? '是' : '否'}`,
|
||||
`有分析结果: ${context.hasAnalysisResults ? '是' : '否'}`,
|
||||
`有PICO: ${context.hasPico ? '是' : '否'}`,
|
||||
].join(', ');
|
||||
|
||||
const adapter = LLMFactory.getAdapter('deepseek-v3');
|
||||
const messages: Message[] = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: `会话状态: ${contextDesc}\n用户消息: ${message}` },
|
||||
];
|
||||
|
||||
const response = await adapter.chat(messages, {
|
||||
temperature: 0.1,
|
||||
maxTokens: 100,
|
||||
});
|
||||
|
||||
return this.parseLLMResponse(response.content);
|
||||
}
|
||||
|
||||
private parseLLMResponse(text: string): IntentResult {
|
||||
const cleaned = text.trim().toLowerCase();
|
||||
|
||||
try {
|
||||
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
||||
if (jsonMatch) {
|
||||
const parsed = JSON.parse(jsonMatch[0]);
|
||||
const validIntents: IntentType[] = ['chat', 'explore', 'consult', 'analyze', 'discuss', 'feedback'];
|
||||
const intent = validIntents.includes(parsed.intent) ? parsed.intent : 'chat';
|
||||
const confidence = typeof parsed.confidence === 'number'
|
||||
? Math.min(1, Math.max(0, parsed.confidence))
|
||||
: 0.6;
|
||||
|
||||
return { intent, confidence, source: 'llm' };
|
||||
}
|
||||
} catch { /* fall through */ }
|
||||
|
||||
const intentMap: Record<string, IntentType> = {
|
||||
chat: 'chat', explore: 'explore', consult: 'consult',
|
||||
analyze: 'analyze', discuss: 'discuss', feedback: 'feedback',
|
||||
};
|
||||
|
||||
for (const [key, intent] of Object.entries(intentMap)) {
|
||||
if (cleaned.includes(key)) {
|
||||
return { intent, confidence: 0.7, source: 'llm' };
|
||||
}
|
||||
}
|
||||
|
||||
return { intent: 'chat', confidence: 0.5, source: 'llm' };
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 上下文守卫(C5)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private applyContextGuard(result: IntentResult, context: SessionContext): IntentResult {
|
||||
const guard = this.rules.contextGuards[result.intent];
|
||||
if (!guard) return result;
|
||||
|
||||
const unmet = guard.requires.filter(req => {
|
||||
if (req === 'dataOverview') return !context.hasDataOverview;
|
||||
if (req === 'hasAnalysisResults') return !context.hasAnalysisResults;
|
||||
return false;
|
||||
});
|
||||
|
||||
if (unmet.length === 0) return result;
|
||||
|
||||
// consult 意图特殊处理:即使没有数据,也可以给一般性建议
|
||||
if (result.intent === 'consult') {
|
||||
return {
|
||||
...result,
|
||||
guardTriggered: true,
|
||||
guardMessage: guard.fallbackMessage,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
intent: 'chat',
|
||||
confidence: result.confidence,
|
||||
source: 'guard',
|
||||
guardTriggered: true,
|
||||
guardMessage: guard.fallbackMessage,
|
||||
originalIntent: result.intent,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 会话上下文构建
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private async buildSessionContext(sessionId: string): Promise<SessionContext> {
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
|
||||
let hasAnalysisResults = false;
|
||||
if (blackboard?.qperTrace && blackboard.qperTrace.length > 0) {
|
||||
hasAnalysisResults = blackboard.qperTrace.some(t => t.status === 'success');
|
||||
}
|
||||
|
||||
return {
|
||||
hasDataOverview: !!blackboard?.dataOverview,
|
||||
hasAnalysisResults,
|
||||
hasPico: !!blackboard?.picoInference,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 配置加载
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
private loadRules(): IntentRulesConfig {
|
||||
try {
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
const configPath = join(__dirname, '..', 'config', 'intent_rules.json');
|
||||
const raw = readFileSync(configPath, 'utf-8');
|
||||
return JSON.parse(raw);
|
||||
} catch (err) {
|
||||
logger.warn('[SSA:IntentRouter] Failed to load intent_rules.json, using defaults');
|
||||
return this.defaultRules();
|
||||
}
|
||||
}
|
||||
|
||||
private defaultRules(): IntentRulesConfig {
|
||||
return {
|
||||
rules: [
|
||||
{ intent: 'analyze', keywords: ['分析', '检验', '比较', '回归'], priority: 10, requires: ['dataOverview'] },
|
||||
{ intent: 'explore', keywords: ['看看', '分布', '缺失', '概况'], priority: 8, requires: ['dataOverview'] },
|
||||
{ intent: 'discuss', keywords: ['什么意思', '怎么解释', 'p值'], priority: 9, requires: ['dataOverview', 'hasAnalysisResults'] },
|
||||
],
|
||||
contextGuards: {},
|
||||
defaultIntent: 'chat',
|
||||
};
|
||||
}
|
||||
|
||||
private fallbackRouterPrompt(): string {
|
||||
return `你是一个意图分类器。根据用户消息和会话状态,判断用户意图。
|
||||
|
||||
可选意图:
|
||||
- chat: 普通对话、统计知识问答
|
||||
- explore: 探索数据特征、了解数据概况
|
||||
- consult: 咨询分析方法、请求推荐
|
||||
- analyze: 要求执行统计分析
|
||||
- discuss: 讨论已有分析结果
|
||||
- feedback: 对结果不满意、要求改进
|
||||
|
||||
以 JSON 格式输出: {"intent": "xxx", "confidence": 0.9}
|
||||
只输出 JSON,不要输出其他内容。`;
|
||||
}
|
||||
}
|
||||
|
||||
export const intentRouterService = new IntentRouterService();
|
||||
274
backend/src/modules/ssa/services/MethodConsultService.ts
Normal file
274
backend/src/modules/ssa/services/MethodConsultService.ts
Normal file
@@ -0,0 +1,274 @@
|
||||
/**
|
||||
* MethodConsultService — 方法推荐工具
|
||||
*
|
||||
* 核心流程:
|
||||
* 1. 从 SessionBlackboard 读取 dataOverview + picoInference
|
||||
* 2. 将 PICO 推断映射为 ParsedQuery(goal/outcome/predictor/design)
|
||||
* 3. 调用 DecisionTableService.match() 获取 MatchResult
|
||||
* 4. 调用 ToolRegistryService.getByCode() 获取工具详情
|
||||
* 5. 返回 MethodRecommendation(供 LLM 生成自然语言推荐)
|
||||
*
|
||||
* 设计原则: 只推荐,不执行(D1)
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import { decisionTableService, type MatchResult } from './DecisionTableService.js';
|
||||
import { toolRegistryService } from './ToolRegistryService.js';
|
||||
import type { PicoInference } from '../types/session-blackboard.types.js';
|
||||
import type { ParsedQuery, AnalysisGoal, VariableType, StudyDesign } from '../types/query.types.js';
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Types
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface MethodDetail {
|
||||
code: string;
|
||||
name: string;
|
||||
description: string;
|
||||
category: string;
|
||||
prerequisite?: string;
|
||||
}
|
||||
|
||||
export interface MethodRecommendation {
|
||||
matched: boolean;
|
||||
primaryMethod: MethodDetail | null;
|
||||
fallbackMethod: (MethodDetail & { switchCondition: string }) | null;
|
||||
matchScore: number;
|
||||
maxPossibleScore: number;
|
||||
matchDimensions: {
|
||||
goal: string;
|
||||
outcomeType: string | null;
|
||||
predictorType: string | null;
|
||||
design: string;
|
||||
};
|
||||
needsClarification: boolean;
|
||||
clarificationReason?: string;
|
||||
rawMatchResult?: MatchResult;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Service
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export class MethodConsultService {
|
||||
|
||||
/**
|
||||
* 根据 session 上下文推荐统计方法
|
||||
*/
|
||||
async recommend(sessionId: string, _userMessage?: string): Promise<MethodRecommendation> {
|
||||
const bb = await sessionBlackboardService.get(sessionId);
|
||||
|
||||
if (!bb) {
|
||||
return this.noDataRecommendation('Session 黑板为空,无法推荐方法');
|
||||
}
|
||||
|
||||
const pico = bb.picoInference;
|
||||
const dataOverview = bb.dataOverview;
|
||||
|
||||
if (!dataOverview) {
|
||||
return this.noDataRecommendation('尚未上传数据,无法进行方法推荐');
|
||||
}
|
||||
|
||||
// PICO 不完整时标记需要澄清
|
||||
if (!pico || !pico.outcome) {
|
||||
return this.noDataRecommendation(
|
||||
'尚未完成 PICO 推断(缺少结局变量信息),请先描述您的研究目的和关注的变量',
|
||||
);
|
||||
}
|
||||
|
||||
// 映射 PICO → ParsedQuery
|
||||
const parsedQuery = this.picoToQuery(pico, dataOverview);
|
||||
|
||||
if (!parsedQuery) {
|
||||
return this.noDataRecommendation('无法从 PICO 推断中映射出有效的分析参数');
|
||||
}
|
||||
|
||||
// 调用决策表匹配
|
||||
const matchResult = decisionTableService.match(parsedQuery);
|
||||
|
||||
// 构建推荐结果
|
||||
return this.buildRecommendation(matchResult, parsedQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* PICO → ParsedQuery 映射
|
||||
* 将高层语义(Population/Intervention/Comparison/Outcome)映射为决策表可理解的四维
|
||||
*/
|
||||
private picoToQuery(pico: PicoInference, dataOverview: any): ParsedQuery | null {
|
||||
const columns = dataOverview.profile?.columns || [];
|
||||
|
||||
// 推断 goal
|
||||
const goal = this.inferGoal(pico);
|
||||
|
||||
// 查找 outcome 变量类型
|
||||
const outcomeCol = columns.find(
|
||||
(c: any) => c.name === pico.outcome || c.name?.toLowerCase() === pico.outcome?.toLowerCase(),
|
||||
);
|
||||
const outcomeType: VariableType | null = outcomeCol
|
||||
? this.columnToVarType(outcomeCol)
|
||||
: null;
|
||||
|
||||
// 查找 predictor 变量(intervention/comparison 映射为分组变量)
|
||||
const groupingVar = pico.intervention || pico.comparison || null;
|
||||
const groupCol = groupingVar
|
||||
? columns.find(
|
||||
(c: any) => c.name === groupingVar || c.name?.toLowerCase() === groupingVar?.toLowerCase(),
|
||||
)
|
||||
: null;
|
||||
const predictorType: VariableType | null = groupCol
|
||||
? this.columnToVarType(groupCol)
|
||||
: null;
|
||||
|
||||
// 推断 design
|
||||
const design: StudyDesign = this.inferDesign(pico);
|
||||
|
||||
return {
|
||||
goal,
|
||||
outcome_var: pico.outcome,
|
||||
outcome_type: outcomeType,
|
||||
predictor_vars: groupingVar ? [groupingVar] : [],
|
||||
predictor_types: predictorType ? [predictorType] : [],
|
||||
grouping_var: groupingVar,
|
||||
design,
|
||||
confidence: pico.confidence === 'high' ? 0.9 : pico.confidence === 'medium' ? 0.7 : 0.5,
|
||||
reasoning: `基于 PICO 推断: P=${pico.population}, I=${pico.intervention}, C=${pico.comparison}, O=${pico.outcome}`,
|
||||
needsClarification: false,
|
||||
};
|
||||
}
|
||||
|
||||
private inferGoal(pico: PicoInference): AnalysisGoal {
|
||||
// 有 intervention + comparison → comparison
|
||||
if (pico.intervention && pico.comparison) return 'comparison';
|
||||
// 有 intervention 无 comparison → comparison (vs baseline)
|
||||
if (pico.intervention) return 'comparison';
|
||||
// 无 intervention → descriptive or correlation
|
||||
return 'descriptive';
|
||||
}
|
||||
|
||||
private inferDesign(pico: PicoInference): StudyDesign {
|
||||
// 简单启发式:如果 intervention 暗示配对设计
|
||||
const pairedKeywords = ['前后', '治疗前', '治疗后', 'before', 'after', '干预前', '干预后'];
|
||||
const text = `${pico.intervention || ''} ${pico.comparison || ''}`.toLowerCase();
|
||||
if (pairedKeywords.some(k => text.includes(k))) return 'paired';
|
||||
return 'independent';
|
||||
}
|
||||
|
||||
private columnToVarType(col: any): VariableType {
|
||||
const dtype = col.dtype || col.type || '';
|
||||
if (col.is_id_like) return 'categorical';
|
||||
if (['float64', 'int64', 'numeric', 'continuous'].includes(dtype)) return 'continuous';
|
||||
if (col.uniqueValues === 2 || col.unique_count === 2) return 'binary';
|
||||
return 'categorical';
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 MatchResult 转换为 MethodRecommendation
|
||||
*/
|
||||
private buildRecommendation(matchResult: MatchResult, query: ParsedQuery): MethodRecommendation {
|
||||
const primaryTool = toolRegistryService.getByCode(matchResult.primaryTool);
|
||||
const fallbackTool = matchResult.fallbackTool
|
||||
? toolRegistryService.getByCode(matchResult.fallbackTool)
|
||||
: null;
|
||||
|
||||
const primaryMethod: MethodDetail | null = primaryTool
|
||||
? {
|
||||
code: primaryTool.code,
|
||||
name: primaryTool.name,
|
||||
description: primaryTool.description,
|
||||
category: primaryTool.category,
|
||||
prerequisite: primaryTool.prerequisite,
|
||||
}
|
||||
: null;
|
||||
|
||||
const fallbackMethod = fallbackTool && matchResult.switchCondition
|
||||
? {
|
||||
code: fallbackTool.code,
|
||||
name: fallbackTool.name,
|
||||
description: fallbackTool.description,
|
||||
category: fallbackTool.category,
|
||||
prerequisite: fallbackTool.prerequisite,
|
||||
switchCondition: matchResult.switchCondition,
|
||||
}
|
||||
: null;
|
||||
|
||||
const maxScore = 11; // 4+3+2+2
|
||||
|
||||
return {
|
||||
matched: matchResult.matchScore > 0,
|
||||
primaryMethod,
|
||||
fallbackMethod,
|
||||
matchScore: matchResult.matchScore,
|
||||
maxPossibleScore: maxScore,
|
||||
matchDimensions: {
|
||||
goal: query.goal,
|
||||
outcomeType: query.outcome_type,
|
||||
predictorType: query.predictor_types[0] || null,
|
||||
design: query.design,
|
||||
},
|
||||
needsClarification: matchResult.matchScore < 4,
|
||||
clarificationReason: matchResult.matchScore < 4
|
||||
? '匹配分数较低,建议补充变量类型或研究设计信息以获得更精准的推荐'
|
||||
: undefined,
|
||||
rawMatchResult: matchResult,
|
||||
};
|
||||
}
|
||||
|
||||
private noDataRecommendation(reason: string): MethodRecommendation {
|
||||
return {
|
||||
matched: false,
|
||||
primaryMethod: null,
|
||||
fallbackMethod: null,
|
||||
matchScore: 0,
|
||||
maxPossibleScore: 11,
|
||||
matchDimensions: { goal: 'descriptive', outcomeType: null, predictorType: null, design: 'independent' },
|
||||
needsClarification: true,
|
||||
clarificationReason: reason,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化推荐结果为 LLM 可读文本(注入到 SystemPrompt toolOutputs)
|
||||
*/
|
||||
formatForLLM(rec: MethodRecommendation): string {
|
||||
if (!rec.matched || !rec.primaryMethod) {
|
||||
return [
|
||||
'## 决策表匹配结果',
|
||||
`状态: 未匹配到合适方法`,
|
||||
`原因: ${rec.clarificationReason || '信息不足'}`,
|
||||
'',
|
||||
'请根据用户描述的研究目的,给出一般性的方法建议。',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
const lines = [
|
||||
'## 决策表匹配结果',
|
||||
`匹配分数: ${rec.matchScore}/${rec.maxPossibleScore}`,
|
||||
`匹配维度: goal=${rec.matchDimensions.goal}, outcome=${rec.matchDimensions.outcomeType || '未知'}, predictor=${rec.matchDimensions.predictorType || '未知'}, design=${rec.matchDimensions.design}`,
|
||||
'',
|
||||
`### 推荐方法: ${rec.primaryMethod.name} (${rec.primaryMethod.code})`,
|
||||
`- 描述: ${rec.primaryMethod.description}`,
|
||||
`- 类别: ${rec.primaryMethod.category}`,
|
||||
];
|
||||
|
||||
if (rec.primaryMethod.prerequisite) {
|
||||
lines.push(`- 前提条件: ${rec.primaryMethod.prerequisite}`);
|
||||
}
|
||||
|
||||
if (rec.fallbackMethod) {
|
||||
lines.push('');
|
||||
lines.push(`### 降级方案: ${rec.fallbackMethod.name} (${rec.fallbackMethod.code})`);
|
||||
lines.push(`- 切换条件: ${rec.fallbackMethod.switchCondition}`);
|
||||
lines.push(`- 描述: ${rec.fallbackMethod.description}`);
|
||||
}
|
||||
|
||||
if (rec.needsClarification) {
|
||||
lines.push('');
|
||||
lines.push(`⚠️ 注意: ${rec.clarificationReason}`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
}
|
||||
|
||||
export const methodConsultService = new MethodConsultService();
|
||||
156
backend/src/modules/ssa/services/PicoInferenceService.ts
Normal file
156
backend/src/modules/ssa/services/PicoInferenceService.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
/**
|
||||
* Phase I — PICO 推断服务
|
||||
*
|
||||
* 调用 LLM (SSA_PICO_INFERENCE prompt) 从数据概览推断 PICO 结构。
|
||||
* 写入 SessionBlackboard.picoInference,标记为 ai_inferred。
|
||||
*
|
||||
* 安全措施:
|
||||
* - Zod 校验 LLM 输出
|
||||
* - jsonrepair 容错
|
||||
* - H3: 观察性研究允许 intervention/comparison 为 null
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
|
||||
import { getPromptService } from '../../../common/prompt/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { jsonrepair } from 'jsonrepair';
|
||||
import type { Message } from '../../../common/llm/adapters/types.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import {
|
||||
PicoInferenceSchema,
|
||||
type PicoInference,
|
||||
type DataOverview,
|
||||
type VariableDictEntry,
|
||||
} from '../types/session-blackboard.types.js';
|
||||
|
||||
const MAX_RETRIES = 1;
|
||||
|
||||
export class PicoInferenceService {
|
||||
|
||||
/**
|
||||
* 从 DataOverview 推断 PICO 结构并写入黑板。
|
||||
*/
|
||||
async inferFromOverview(
|
||||
sessionId: string,
|
||||
overview: DataOverview,
|
||||
dictionary: VariableDictEntry[],
|
||||
): Promise<PicoInference | null> {
|
||||
try {
|
||||
logger.info('[SSA:PICO] Starting inference', { sessionId });
|
||||
|
||||
const promptService = getPromptService(prisma);
|
||||
|
||||
const dataOverviewSummary = this.buildOverviewSummary(overview);
|
||||
const variableList = this.buildVariableList(dictionary);
|
||||
|
||||
const rendered = await promptService.get('SSA_PICO_INFERENCE', {
|
||||
dataOverviewSummary,
|
||||
variableList,
|
||||
});
|
||||
|
||||
const adapter = LLMFactory.getAdapter(
|
||||
(rendered.modelConfig?.model as any) || 'deepseek-v3'
|
||||
);
|
||||
|
||||
const messages: Message[] = [
|
||||
{ role: 'system', content: rendered.content },
|
||||
{ role: 'user', content: '请根据以上数据概览推断 PICO 结构。' },
|
||||
];
|
||||
|
||||
let pico: PicoInference | null = null;
|
||||
|
||||
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
const response = await adapter.chat(messages, {
|
||||
temperature: rendered.modelConfig?.temperature ?? 0.3,
|
||||
maxTokens: rendered.modelConfig?.maxTokens ?? 1024,
|
||||
});
|
||||
|
||||
const raw = this.robustJsonParse(response.content);
|
||||
const validated = PicoInferenceSchema.parse({
|
||||
...raw,
|
||||
status: 'ai_inferred',
|
||||
});
|
||||
|
||||
pico = validated;
|
||||
break;
|
||||
} catch (err: any) {
|
||||
logger.warn('[SSA:PICO] LLM attempt failed', {
|
||||
attempt, error: err.message,
|
||||
});
|
||||
if (attempt === MAX_RETRIES) throw err;
|
||||
}
|
||||
}
|
||||
|
||||
if (pico) {
|
||||
await sessionBlackboardService.confirmPico(sessionId, {
|
||||
population: pico.population,
|
||||
intervention: pico.intervention,
|
||||
comparison: pico.comparison,
|
||||
outcome: pico.outcome,
|
||||
});
|
||||
|
||||
logger.info('[SSA:PICO] Inference complete', {
|
||||
sessionId,
|
||||
confidence: pico.confidence,
|
||||
hasIntervention: pico.intervention !== null,
|
||||
});
|
||||
}
|
||||
|
||||
return pico;
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:PICO] Inference failed', {
|
||||
sessionId, error: error.message,
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private buildOverviewSummary(overview: DataOverview): string {
|
||||
const s = overview.profile.summary;
|
||||
const lines = [
|
||||
`数据集: ${s.totalRows} 行, ${s.totalColumns} 列`,
|
||||
`类型分布: 数值型 ${s.numericColumns}, 分类型 ${s.categoricalColumns}, 日期型 ${s.datetimeColumns}, 文本型 ${s.textColumns}`,
|
||||
`整体缺失率: ${s.overallMissingRate}%`,
|
||||
`完整病例数: ${overview.completeCaseCount}`,
|
||||
];
|
||||
|
||||
const nonNormal = overview.normalityTests
|
||||
?.filter(t => !t.isNormal)
|
||||
.map(t => t.variable);
|
||||
if (nonNormal && nonNormal.length > 0) {
|
||||
lines.push(`非正态分布变量: ${nonNormal.join(', ')}`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
private buildVariableList(dict: VariableDictEntry[]): string {
|
||||
return dict
|
||||
.filter(v => !v.isIdLike)
|
||||
.map(v => {
|
||||
const type = v.confirmedType ?? v.inferredType;
|
||||
const label = v.label ? ` (${v.label})` : '';
|
||||
return `- ${v.name}: ${type}${label}`;
|
||||
})
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
private robustJsonParse(text: string): any {
|
||||
let cleaned = text.trim();
|
||||
|
||||
const fenceMatch = cleaned.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (fenceMatch) {
|
||||
cleaned = fenceMatch[1].trim();
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(cleaned);
|
||||
} catch {
|
||||
return JSON.parse(jsonrepair(cleaned));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const picoInferenceService = new PicoInferenceService();
|
||||
@@ -30,7 +30,7 @@ import {
|
||||
createDynamicIntentSchema,
|
||||
validateConfidence,
|
||||
} from '../types/query.types.js';
|
||||
import { AVAILABLE_TOOLS } from './WorkflowPlannerService.js';
|
||||
import { toolRegistryService } from './ToolRegistryService.js';
|
||||
|
||||
const CONFIDENCE_THRESHOLD = 0.7;
|
||||
const MAX_LLM_RETRIES = 1;
|
||||
@@ -92,9 +92,7 @@ export class QueryService {
|
||||
? this.buildProfileSummaryForPrompt(profile)
|
||||
: '(未上传数据文件)';
|
||||
|
||||
const toolList = Object.values(AVAILABLE_TOOLS)
|
||||
.map(t => `- ${t.code}: ${t.name} — ${t.description}`)
|
||||
.join('\n');
|
||||
const toolList = toolRegistryService.formatForLLM();
|
||||
|
||||
// 2. 获取渲染后的 Prompt
|
||||
const rendered = await promptService.get('SSA_QUERY_INTENT', {
|
||||
|
||||
319
backend/src/modules/ssa/services/SessionBlackboardService.ts
Normal file
319
backend/src/modules/ssa/services/SessionBlackboardService.ts
Normal file
@@ -0,0 +1,319 @@
|
||||
/**
|
||||
* Phase I — Session 黑板服务
|
||||
*
|
||||
* 会话级数据缓存,存储 DataOverview / VariableDictionary / PicoInference / QperTrace。
|
||||
* 使用平台 CacheFactory(Postgres-Only 架构,遵循 C4/C7 约束)。
|
||||
* 扁平单 JSON Blob:一个 sessionId = 一条 cache 记录。
|
||||
*
|
||||
* 并发安全:patch() 内置基于 sessionId 的互斥锁(H1),
|
||||
* 同一 session 的并发 patch 排队执行,杜绝 get-merge-set 竞态覆盖。
|
||||
*/
|
||||
|
||||
import { cache } from '../../../common/cache/index.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import {
|
||||
SessionBlackboardSchema,
|
||||
createEmptyBlackboard,
|
||||
type SessionBlackboard,
|
||||
type DataOverview,
|
||||
type VariableDictEntry,
|
||||
type VariableDictPatch,
|
||||
type PicoInference,
|
||||
type PicoPatch,
|
||||
type QperTraceEntry,
|
||||
type FiveSectionReport,
|
||||
} from '../types/session-blackboard.types.js';
|
||||
|
||||
class SessionBlackboardService {
|
||||
private readonly KEY_PREFIX = 'ssa:session:';
|
||||
private readonly DEFAULT_TTL = 7200; // 2 小时
|
||||
|
||||
/**
|
||||
* 基于 sessionId 的互斥锁(H1 修正)
|
||||
* 防止并发 patch 导致 get-merge-set 竞态覆盖
|
||||
*/
|
||||
private locks = new Map<string, Promise<any>>();
|
||||
|
||||
private cacheKey(sessionId: string): string {
|
||||
return `${this.KEY_PREFIX}${sessionId}`;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 核心 CRUD
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async get(sessionId: string): Promise<SessionBlackboard | null> {
|
||||
try {
|
||||
const data = await cache.get<SessionBlackboard>(this.cacheKey(sessionId));
|
||||
if (!data) return null;
|
||||
|
||||
const parsed = SessionBlackboardSchema.safeParse(data);
|
||||
if (!parsed.success) {
|
||||
logger.warn(`SessionBlackboard schema validation failed for ${sessionId}:`, parsed.error.issues);
|
||||
return data as SessionBlackboard;
|
||||
}
|
||||
return parsed.data as SessionBlackboard;
|
||||
} catch (error) {
|
||||
logger.error(`Failed to get SessionBlackboard for ${sessionId}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async set(sessionId: string, data: SessionBlackboard): Promise<void> {
|
||||
try {
|
||||
data.updatedAt = new Date().toISOString();
|
||||
await cache.set(this.cacheKey(sessionId), data, this.DEFAULT_TTL);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to set SessionBlackboard for ${sessionId}:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 部分更新 — 带互斥锁(H1)
|
||||
* 同一 session 的并发 patch 排队执行
|
||||
*/
|
||||
async patch(sessionId: string, partial: Partial<SessionBlackboard>): Promise<SessionBlackboard> {
|
||||
while (this.locks.has(sessionId)) {
|
||||
await this.locks.get(sessionId);
|
||||
}
|
||||
|
||||
const operation = (async (): Promise<SessionBlackboard> => {
|
||||
const current = await this.get(sessionId) ?? createEmptyBlackboard(sessionId);
|
||||
const merged: SessionBlackboard = {
|
||||
...current,
|
||||
...partial,
|
||||
sessionId: current.sessionId,
|
||||
createdAt: current.createdAt,
|
||||
updatedAt: new Date().toISOString(),
|
||||
};
|
||||
await this.set(sessionId, merged);
|
||||
return merged;
|
||||
})();
|
||||
|
||||
this.locks.set(sessionId, operation);
|
||||
try {
|
||||
return await operation;
|
||||
} finally {
|
||||
this.locks.delete(sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
async delete(sessionId: string): Promise<void> {
|
||||
try {
|
||||
await cache.delete(this.cacheKey(sessionId));
|
||||
} catch (error) {
|
||||
logger.error(`Failed to delete SessionBlackboard for ${sessionId}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
async exists(sessionId: string): Promise<boolean> {
|
||||
return cache.has(this.cacheKey(sessionId));
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// DataOverview 便捷方法
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async getDataOverview(sessionId: string): Promise<DataOverview | null> {
|
||||
const bb = await this.get(sessionId);
|
||||
return bb?.dataOverview ?? null;
|
||||
}
|
||||
|
||||
async setDataOverview(sessionId: string, overview: DataOverview): Promise<void> {
|
||||
await this.patch(sessionId, { dataOverview: overview });
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// VariableDictionary 便捷方法
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async getVariableDictionary(sessionId: string): Promise<VariableDictEntry[]> {
|
||||
const bb = await this.get(sessionId);
|
||||
return bb?.variableDictionary ?? [];
|
||||
}
|
||||
|
||||
async setVariableDictionary(sessionId: string, dict: VariableDictEntry[]): Promise<void> {
|
||||
await this.patch(sessionId, { variableDictionary: dict });
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新单个变量的字典条目
|
||||
* 用于用户编辑变量类型/标签/PICO 角色
|
||||
*/
|
||||
async updateVariable(sessionId: string, varName: string, patchData: VariableDictPatch): Promise<VariableDictEntry | null> {
|
||||
while (this.locks.has(sessionId)) {
|
||||
await this.locks.get(sessionId);
|
||||
}
|
||||
|
||||
const operation = (async (): Promise<VariableDictEntry | null> => {
|
||||
const bb = await this.get(sessionId);
|
||||
if (!bb) return null;
|
||||
|
||||
const idx = bb.variableDictionary.findIndex(v => v.name === varName);
|
||||
if (idx === -1) return null;
|
||||
|
||||
const entry = bb.variableDictionary[idx];
|
||||
if (patchData.confirmedType !== undefined) {
|
||||
entry.confirmedType = patchData.confirmedType;
|
||||
entry.confirmStatus = 'user_confirmed';
|
||||
}
|
||||
if (patchData.label !== undefined) {
|
||||
entry.label = patchData.label;
|
||||
}
|
||||
if (patchData.picoRole !== undefined) {
|
||||
entry.picoRole = patchData.picoRole;
|
||||
}
|
||||
|
||||
bb.variableDictionary[idx] = entry;
|
||||
await this.set(sessionId, bb);
|
||||
return entry;
|
||||
})();
|
||||
|
||||
this.locks.set(sessionId, operation);
|
||||
try {
|
||||
return await operation;
|
||||
} finally {
|
||||
this.locks.delete(sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// PICO 推断便捷方法
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async getPicoInference(sessionId: string): Promise<PicoInference | null> {
|
||||
const bb = await this.get(sessionId);
|
||||
return bb?.picoInference ?? null;
|
||||
}
|
||||
|
||||
async setPicoInference(sessionId: string, pico: PicoInference): Promise<void> {
|
||||
await this.patch(sessionId, { picoInference: pico });
|
||||
}
|
||||
|
||||
async confirmPico(sessionId: string, picoPatch: PicoPatch): Promise<PicoInference | null> {
|
||||
while (this.locks.has(sessionId)) {
|
||||
await this.locks.get(sessionId);
|
||||
}
|
||||
|
||||
const operation = (async (): Promise<PicoInference | null> => {
|
||||
const bb = await this.get(sessionId);
|
||||
if (!bb?.picoInference) return null;
|
||||
|
||||
const updated: PicoInference = {
|
||||
...bb.picoInference,
|
||||
...picoPatch,
|
||||
status: 'user_confirmed',
|
||||
};
|
||||
bb.picoInference = updated;
|
||||
await this.set(sessionId, bb);
|
||||
return updated;
|
||||
})();
|
||||
|
||||
this.locks.set(sessionId, operation);
|
||||
try {
|
||||
return await operation;
|
||||
} finally {
|
||||
this.locks.delete(sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// QPER Trace 便捷方法(Phase II+ 使用)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async appendTrace(sessionId: string, entry: QperTraceEntry): Promise<void> {
|
||||
while (this.locks.has(sessionId)) {
|
||||
await this.locks.get(sessionId);
|
||||
}
|
||||
|
||||
const operation = (async (): Promise<void> => {
|
||||
const bb = await this.get(sessionId) ?? createEmptyBlackboard(sessionId);
|
||||
bb.qperTrace.push(entry);
|
||||
await this.set(sessionId, bb);
|
||||
})();
|
||||
|
||||
this.locks.set(sessionId, operation);
|
||||
try {
|
||||
await operation;
|
||||
} finally {
|
||||
this.locks.delete(sessionId);
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 五段式报告生成(纯模板渲染,不依赖 LLM)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
generateFiveSectionReport(overview: DataOverview, dict: VariableDictEntry[]): FiveSectionReport {
|
||||
const { profile, completeCaseCount, normalityTests } = overview;
|
||||
const { summary, columns } = profile;
|
||||
|
||||
const varsWithMissing = columns
|
||||
.filter(c => c.missingCount > 0)
|
||||
.map(c => c.name);
|
||||
|
||||
const categoricalVars = dict
|
||||
.filter(v => v.inferredType === 'categorical')
|
||||
.map(v => v.name);
|
||||
|
||||
const numericVars = dict
|
||||
.filter(v => v.inferredType === 'numeric')
|
||||
.map(v => v.name);
|
||||
|
||||
const varsWithOutliers = columns
|
||||
.filter(c => (c.outlierCount ?? 0) > 0)
|
||||
.map(c => c.name);
|
||||
|
||||
const nonNormalVars = normalityTests
|
||||
.filter(t => !t.isNormal)
|
||||
.map(t => t.variable);
|
||||
|
||||
const normalVars = normalityTests
|
||||
.filter(t => t.isNormal)
|
||||
.map(t => t.variable);
|
||||
|
||||
return {
|
||||
basicInfo: {
|
||||
title: '数据基本特征检测',
|
||||
content: `数据集共有${summary.totalRows}个病例,${summary.totalColumns}个变量。`,
|
||||
},
|
||||
missingData: {
|
||||
title: '数据缺失状况检测',
|
||||
content: varsWithMissing.length > 0
|
||||
? `所有变量中,有${varsWithMissing.length}个变量存在缺失数据,变量名如下:${varsWithMissing.slice(0, 5).join(' ')}${varsWithMissing.length > 5 ? ' 等' : ''}。数据完整的病例共有${completeCaseCount}个。`
|
||||
: `所有变量均无缺失数据,数据完整的病例共有${completeCaseCount}个。`,
|
||||
varsWithMissing,
|
||||
completeCaseCount,
|
||||
},
|
||||
dataTypes: {
|
||||
title: '数据类型检测',
|
||||
content: categoricalVars.length > 0
|
||||
? `所有变量均为数值型,其中有${categoricalVars.length}个变量为分类变量,请确认变量类型是否正确,这对统计分析方法的选择非常重要。分类变量的变量名如下:${categoricalVars.slice(0, 5).join(' ')}${categoricalVars.length > 5 ? ' 等' : ''}。`
|
||||
: `所有变量均为连续型数值变量。`,
|
||||
categoricalVars,
|
||||
numericVars,
|
||||
needsConfirmation: true,
|
||||
},
|
||||
outliers: {
|
||||
title: '数据异常值检测',
|
||||
content: varsWithOutliers.length > 0
|
||||
? `所有变量中,有${varsWithOutliers.length}个变量存在异常值,异常值判定是应用四分位数±1.5倍四分位间距,在此之外判定为异常值。变量名如下:${varsWithOutliers.slice(0, 5).join(' ')}${varsWithOutliers.length > 5 ? ' 等' : ''}。异常值不一定是错误值,请查看原始数据并结合实际情况进行处理,不宜直接修改或删除。`
|
||||
: `所有变量均未检测到异常值(基于 IQR 方法)。`,
|
||||
method: 'IQR (Q1-1.5*IQR, Q3+1.5*IQR)',
|
||||
varsWithOutliers,
|
||||
},
|
||||
normality: {
|
||||
title: '正态分布检测',
|
||||
content: nonNormalVars.length > 0
|
||||
? `所有连续变量中,有${nonNormalVars.length}个变量为非正态分布,使用R函数shapiro.test进行判定,当p<0.05认为非正态分布。变量名如下:${nonNormalVars.slice(0, 5).join(' ')}${nonNormalVars.length > 5 ? ' 等' : ''}。统计学家更推荐直方图,P-P图、Q-Q图进行判定,本判定的结果供您参考。`
|
||||
: `所有连续变量均通过正态性检验(Shapiro-Wilk, p >= 0.05)。`,
|
||||
method: 'Shapiro-Wilk (p < 0.05 判定为非正态)',
|
||||
nonNormalVars,
|
||||
normalVars,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const sessionBlackboardService = new SessionBlackboardService();
|
||||
153
backend/src/modules/ssa/services/SystemPromptService.ts
Normal file
153
backend/src/modules/ssa/services/SystemPromptService.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
/**
|
||||
* Phase II — System Prompt 动态组装服务
|
||||
*
|
||||
* 六段式组装(H2 Lost-in-the-Middle 修正):
|
||||
* [1] base_system — 固定角色定义
|
||||
* [2] data_context — DataOverview 摘要
|
||||
* [3] pico_inference — PICO 分类
|
||||
* [4] variable_dictionary — 变量字典摘要
|
||||
* [5] tool_outputs — 工具调用结果(冗长数据放中间)
|
||||
* [6] intent_instruction — 意图指令(核心指令放最后,永不裁剪)
|
||||
*
|
||||
* Token 预算 <= 4000(C2),超出按 [5] > [4] > [3] > [2] 优先级裁剪。
|
||||
* [6] intent_instruction 永不裁剪。
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { getPromptService } from '../../../common/prompt/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { tokenTruncationService, type TruncationOptions } from './TokenTruncationService.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import type { SessionBlackboard } from '../types/session-blackboard.types.js';
|
||||
|
||||
export type IntentType = 'chat' | 'explore' | 'consult' | 'analyze' | 'discuss' | 'feedback';
|
||||
|
||||
const INTENT_PROMPT_CODES: Record<IntentType, string> = {
|
||||
chat: 'SSA_INTENT_CHAT',
|
||||
explore: 'SSA_INTENT_EXPLORE',
|
||||
consult: 'SSA_INTENT_CONSULT',
|
||||
analyze: 'SSA_INTENT_ANALYZE',
|
||||
discuss: 'SSA_INTENT_DISCUSS',
|
||||
feedback: 'SSA_INTENT_FEEDBACK',
|
||||
};
|
||||
|
||||
const MAX_SYSTEM_TOKENS = 4000;
|
||||
|
||||
export class SystemPromptService {
|
||||
|
||||
/**
|
||||
* 组装完整 System Prompt(六段式,H2 修正顺序)
|
||||
*/
|
||||
async assemble(
|
||||
sessionId: string,
|
||||
intent: IntentType,
|
||||
toolOutputs?: string,
|
||||
): Promise<string> {
|
||||
const promptService = getPromptService(prisma);
|
||||
|
||||
// [1] Base system role
|
||||
let baseSystem = '';
|
||||
try {
|
||||
const rendered = await promptService.get('SSA_BASE_SYSTEM', {});
|
||||
baseSystem = rendered.content;
|
||||
} catch {
|
||||
baseSystem = this.fallbackBaseSystem();
|
||||
}
|
||||
|
||||
// [2-4] DataContext from SessionBlackboard (truncated)
|
||||
let dataContextBlock = '';
|
||||
const blackboard = await sessionBlackboardService.get(sessionId);
|
||||
if (blackboard) {
|
||||
const truncated = tokenTruncationService.truncate(blackboard, {
|
||||
maxTokens: this.calculateDataBudget(baseSystem, toolOutputs),
|
||||
strategy: 'balanced',
|
||||
});
|
||||
dataContextBlock = tokenTruncationService.toPromptString(truncated);
|
||||
}
|
||||
|
||||
// [5] Tool outputs (placed in middle — H2 fix)
|
||||
const toolBlock = toolOutputs
|
||||
? `\n\n## 工具执行结果\n${toolOutputs}`
|
||||
: '';
|
||||
|
||||
// [6] Intent instruction (placed LAST — H2 fix, never truncated)
|
||||
let intentInstruction = '';
|
||||
const intentCode = INTENT_PROMPT_CODES[intent];
|
||||
try {
|
||||
const rendered = await promptService.get(intentCode, {});
|
||||
intentInstruction = rendered.content;
|
||||
} catch {
|
||||
intentInstruction = this.fallbackIntentInstruction(intent);
|
||||
}
|
||||
|
||||
// Assemble: [1] Base → [2-4] DataContext → [5] ToolOutputs → [6] IntentInstruction
|
||||
const parts: string[] = [baseSystem];
|
||||
|
||||
if (dataContextBlock) {
|
||||
parts.push(dataContextBlock);
|
||||
}
|
||||
|
||||
if (toolBlock) {
|
||||
parts.push(toolBlock);
|
||||
}
|
||||
|
||||
// Intent instruction is ALWAYS last (H2 — Lost in the Middle fix)
|
||||
parts.push(`\n\n## 当前任务指令\n${intentInstruction}`);
|
||||
|
||||
const assembled = parts.join('\n\n');
|
||||
|
||||
const estimatedTokens = Math.ceil(assembled.length / 2);
|
||||
logger.debug('[SSA:SystemPrompt] Assembled', {
|
||||
sessionId,
|
||||
intent,
|
||||
estimatedTokens,
|
||||
hasData: !!blackboard,
|
||||
hasToolOutput: !!toolOutputs,
|
||||
});
|
||||
|
||||
if (estimatedTokens > MAX_SYSTEM_TOKENS) {
|
||||
logger.warn('[SSA:SystemPrompt] Exceeded token budget', {
|
||||
estimatedTokens,
|
||||
maxTokens: MAX_SYSTEM_TOKENS,
|
||||
});
|
||||
}
|
||||
|
||||
return assembled;
|
||||
}
|
||||
|
||||
private calculateDataBudget(baseSystem: string, toolOutputs?: string): number {
|
||||
const baseTokens = Math.ceil(baseSystem.length / 2);
|
||||
const toolTokens = toolOutputs ? Math.ceil(toolOutputs.length / 2) : 0;
|
||||
const intentReserve = 500; // intent instruction reserve
|
||||
return Math.max(500, MAX_SYSTEM_TOKENS - baseTokens - toolTokens - intentReserve);
|
||||
}
|
||||
|
||||
private fallbackBaseSystem(): string {
|
||||
return `你是 SSA-Pro 智能统计分析助手,专注于临床研究统计分析。
|
||||
你具备以下能力:
|
||||
- 理解临床研究数据的结构和特征
|
||||
- 推荐合适的统计分析方法
|
||||
- 解读统计分析结果
|
||||
- 用通俗易懂的语言向医学研究者解释统计概念
|
||||
|
||||
沟通原则:
|
||||
- 使用中文回复
|
||||
- 语言专业但不晦涩
|
||||
- 分点作答,条理清晰
|
||||
- 对不确定的内容如实说明`;
|
||||
}
|
||||
|
||||
private fallbackIntentInstruction(intent: IntentType): string {
|
||||
const map: Record<IntentType, string> = {
|
||||
chat: '请基于统计知识和用户数据直接回答用户的问题。不要主动建议执行分析,除非用户明确要求。简洁作答,分点清晰。',
|
||||
explore: '用户想了解数据的特征。请基于上方的数据摘要信息,帮用户解读数据特征(缺失、分布、异常值等)。可以推断 PICO 结构。不要执行分析。',
|
||||
consult: '用户在咨询统计方法。请根据数据特征和研究目的推荐合适的统计方法,给出选择理由和前提条件。不要直接执行分析。提供替代方案。',
|
||||
analyze: '以下是工具执行结果。请向用户简要说明分析进展和关键发现。使用通俗语言,避免过度技术化。',
|
||||
discuss: '用户想讨论分析结果。请帮助用户深入解读结果,解释统计量的含义,讨论临床意义和局限性。',
|
||||
feedback: '用户对之前的分析结果不满意或有改进建议。请分析问题原因,提出改进方案(如更换统计方法、调整参数等)。',
|
||||
};
|
||||
return map[intent];
|
||||
}
|
||||
}
|
||||
|
||||
export const systemPromptService = new SystemPromptService();
|
||||
204
backend/src/modules/ssa/services/TokenTruncationService.ts
Normal file
204
backend/src/modules/ssa/services/TokenTruncationService.ts
Normal file
@@ -0,0 +1,204 @@
|
||||
/**
|
||||
* Phase I — Token 截断服务
|
||||
*
|
||||
* 在将 SessionBlackboard 数据注入 LLM Prompt 之前,
|
||||
* 按优先级策略裁剪 payload 以适配模型上下文窗口。
|
||||
*
|
||||
* 裁剪策略(按优先级从低到高保留):
|
||||
* 1. 完整变量字典 → 仅保留非 isIdLike 的变量
|
||||
* 2. topValues 列表 → 截断到 top 5
|
||||
* 3. 数值列详细统计 → 保留 mean/std/median + 去掉 skewness/kurtosis
|
||||
* 4. normalityTests → 仅保留非正态的变量
|
||||
* 5. picoInference → 始终保留(最高优先级)
|
||||
* 6. fiveSectionReport.content → 若超限则截断到前 500 字符
|
||||
*
|
||||
* 预估 token 使用简易方式: 1 中文字 ≈ 2 tokens, 1 英文词 ≈ 1.3 tokens
|
||||
* 通过 JSON.stringify 长度 / 2 作为粗略上界。
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import type {
|
||||
SessionBlackboard,
|
||||
DataOverview,
|
||||
VariableDictEntry,
|
||||
FiveSectionReport,
|
||||
} from '../types/session-blackboard.types.js';
|
||||
|
||||
export interface TruncationOptions {
|
||||
maxTokens?: number;
|
||||
strategy?: 'aggressive' | 'balanced' | 'minimal';
|
||||
}
|
||||
|
||||
interface TruncatedContext {
|
||||
overview: string;
|
||||
variables: string;
|
||||
pico: string;
|
||||
report: string;
|
||||
estimatedTokens: number;
|
||||
}
|
||||
|
||||
const DEFAULT_MAX_TOKENS = 3000;
|
||||
|
||||
export class TokenTruncationService {
|
||||
|
||||
/**
|
||||
* 将 SessionBlackboard 截断为可注入 Prompt 的紧凑文本。
|
||||
*/
|
||||
truncate(
|
||||
blackboard: SessionBlackboard,
|
||||
options: TruncationOptions = {},
|
||||
): TruncatedContext {
|
||||
const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
|
||||
const strategy = options.strategy ?? 'balanced';
|
||||
|
||||
logger.debug('[SSA:TokenTrunc] Truncating context', {
|
||||
sessionId: blackboard.sessionId,
|
||||
maxTokens,
|
||||
strategy,
|
||||
});
|
||||
|
||||
const pico = this.formatPico(blackboard);
|
||||
const overview = this.formatOverview(blackboard.dataOverview, strategy);
|
||||
const variables = this.formatVariables(blackboard.variableDictionary, strategy);
|
||||
const report = this.formatReport(blackboard, strategy);
|
||||
|
||||
let ctx: TruncatedContext = {
|
||||
pico,
|
||||
overview,
|
||||
variables,
|
||||
report,
|
||||
estimatedTokens: 0,
|
||||
};
|
||||
|
||||
ctx.estimatedTokens = this.estimateTokens(ctx);
|
||||
|
||||
if (ctx.estimatedTokens > maxTokens) {
|
||||
ctx = this.applyAggressiveTruncation(ctx, blackboard, maxTokens);
|
||||
}
|
||||
|
||||
logger.debug('[SSA:TokenTrunc] Truncation complete', {
|
||||
estimatedTokens: ctx.estimatedTokens,
|
||||
maxTokens,
|
||||
});
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/**
|
||||
* 一次性生成可直接拼入 system prompt 的字符串。
|
||||
*/
|
||||
toPromptString(ctx: TruncatedContext): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
if (ctx.pico) parts.push(`## PICO 结构\n${ctx.pico}`);
|
||||
if (ctx.overview) parts.push(`## 数据概览\n${ctx.overview}`);
|
||||
if (ctx.variables) parts.push(`## 变量列表\n${ctx.variables}`);
|
||||
if (ctx.report) parts.push(`## 数据诊断摘要\n${ctx.report}`);
|
||||
|
||||
return parts.join('\n\n');
|
||||
}
|
||||
|
||||
private formatPico(bb: SessionBlackboard): string {
|
||||
const p = bb.picoInference;
|
||||
if (!p) return '';
|
||||
const lines = [];
|
||||
if (p.population) lines.push(`P (人群): ${p.population}`);
|
||||
if (p.intervention) lines.push(`I (干预): ${p.intervention}`);
|
||||
if (p.comparison) lines.push(`C (对照): ${p.comparison}`);
|
||||
if (p.outcome) lines.push(`O (结局): ${p.outcome}`);
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
private formatOverview(ov: DataOverview | null, strategy: string): string {
|
||||
if (!ov) return '';
|
||||
const s = ov.profile.summary;
|
||||
let text = `${s.totalRows} 行 × ${s.totalColumns} 列, 缺失率 ${s.overallMissingRate}%, 完整病例 ${ov.completeCaseCount}`;
|
||||
|
||||
if (strategy !== 'aggressive' && ov.normalityTests?.length) {
|
||||
const nonNormal = ov.normalityTests.filter(t => !t.isNormal).map(t => t.variable);
|
||||
if (nonNormal.length > 0) {
|
||||
text += `\n非正态: ${nonNormal.join(', ')}`;
|
||||
}
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
private formatVariables(dict: VariableDictEntry[], strategy: string): string {
|
||||
let vars = dict.filter(v => !v.isIdLike);
|
||||
|
||||
if (strategy === 'aggressive') {
|
||||
vars = vars.slice(0, 15);
|
||||
}
|
||||
|
||||
return vars.map(v => {
|
||||
const type = v.confirmedType ?? v.inferredType;
|
||||
const label = v.label ? ` "${v.label}"` : '';
|
||||
const role = v.picoRole ? ` [${v.picoRole}]` : '';
|
||||
return `- ${v.name}: ${type}${label}${role}`;
|
||||
}).join('\n');
|
||||
}
|
||||
|
||||
private formatReport(bb: SessionBlackboard, strategy: string): string {
|
||||
const report = bb.dataOverview
|
||||
? this.buildReportSummary(bb.dataOverview)
|
||||
: '';
|
||||
|
||||
if (strategy === 'aggressive' && report.length > 500) {
|
||||
return report.slice(0, 500) + '...';
|
||||
}
|
||||
return report;
|
||||
}
|
||||
|
||||
private buildReportSummary(ov: DataOverview): string {
|
||||
const s = ov.profile.summary;
|
||||
const lines: string[] = [];
|
||||
|
||||
const missingCols = ov.profile.columns.filter(c => c.missingCount > 0);
|
||||
if (missingCols.length > 0) {
|
||||
lines.push(`缺失变量(${missingCols.length}): ${missingCols.map(c => c.name).join(', ')}`);
|
||||
}
|
||||
|
||||
const outlierCols = ov.profile.columns.filter(c => (c as any).outlierCount > 0);
|
||||
if (outlierCols.length > 0) {
|
||||
lines.push(`异常值变量(${outlierCols.length}): ${outlierCols.map(c => c.name).join(', ')}`);
|
||||
}
|
||||
|
||||
const catCount = s.categoricalColumns;
|
||||
const numCount = s.numericColumns;
|
||||
lines.push(`类型: 数值${numCount} + 分类${catCount}`);
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
private estimateTokens(ctx: TruncatedContext): number {
|
||||
const total = ctx.pico.length + ctx.overview.length + ctx.variables.length + ctx.report.length;
|
||||
return Math.ceil(total / 2);
|
||||
}
|
||||
|
||||
private applyAggressiveTruncation(
|
||||
ctx: TruncatedContext,
|
||||
bb: SessionBlackboard,
|
||||
maxTokens: number,
|
||||
): TruncatedContext {
|
||||
const result = { ...ctx };
|
||||
|
||||
result.report = result.report.length > 300 ? result.report.slice(0, 300) + '...' : result.report;
|
||||
|
||||
let vars = bb.variableDictionary.filter(v => !v.isIdLike);
|
||||
if (vars.length > 10) {
|
||||
const picoVars = vars.filter(v => v.picoRole);
|
||||
const others = vars.filter(v => !v.picoRole).slice(0, 10 - picoVars.length);
|
||||
vars = [...picoVars, ...others];
|
||||
}
|
||||
result.variables = vars.map(v => {
|
||||
const type = v.confirmedType ?? v.inferredType;
|
||||
return `- ${v.name}: ${type}`;
|
||||
}).join('\n');
|
||||
|
||||
result.estimatedTokens = this.estimateTokens(result);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
export const tokenTruncationService = new TokenTruncationService();
|
||||
134
backend/src/modules/ssa/services/ToolOrchestratorService.ts
Normal file
134
backend/src/modules/ssa/services/ToolOrchestratorService.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
/**
|
||||
* ToolOrchestratorService — QPER 服务薄层封装
|
||||
*
|
||||
* Phase IV: 对话层调用 QPER 的统一入口
|
||||
* - plan(): 读黑板(PICO hint) → planWorkflow → WorkflowPlan
|
||||
* - formatPlanForLLM(): WorkflowPlan → LLM 可读摘要
|
||||
*
|
||||
* D4: 不创建独立 Tool 类,本 Service 统一封装
|
||||
* D5: PICO 可选 hint — 用户直接表述优先于系统推断
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { sessionBlackboardService } from './SessionBlackboardService.js';
|
||||
import { workflowPlannerService, type WorkflowPlan } from './WorkflowPlannerService.js';
|
||||
import type { PicoInference } from '../types/session-blackboard.types.js';
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Types
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface PlanResult {
|
||||
success: boolean;
|
||||
plan: WorkflowPlan | null;
|
||||
error?: string;
|
||||
picoUsed: boolean;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Service
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export class ToolOrchestratorService {
|
||||
|
||||
/**
|
||||
* 生成分析计划 — 对话层 → QPER 的桥梁
|
||||
*
|
||||
* 三层降级策略 (D5):
|
||||
* 1. 用户消息明确 → planWorkflow (LLM 解析)
|
||||
* 2. 用户消息模糊 + PICO 存在 → PICO hint 注入 LLM prompt
|
||||
* 3. 用户消息模糊 + 无 PICO → 纯 LLM + DataProfile 推断
|
||||
*/
|
||||
async plan(sessionId: string, userMessage: string): Promise<PlanResult> {
|
||||
let picoUsed = false;
|
||||
|
||||
try {
|
||||
const bb = await sessionBlackboardService.get(sessionId);
|
||||
|
||||
let enrichedQuery = userMessage;
|
||||
|
||||
if (bb?.picoInference) {
|
||||
const hint = this.buildPicoHint(bb.picoInference);
|
||||
if (hint) {
|
||||
enrichedQuery = `${userMessage}\n\n[参考上下文 - PICO 推断] ${hint}`;
|
||||
picoUsed = true;
|
||||
logger.info('[SSA:Orchestrator] PICO hint injected', {
|
||||
sessionId,
|
||||
confidence: bb.picoInference.confidence,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const plan = await workflowPlannerService.planWorkflow(sessionId, enrichedQuery);
|
||||
|
||||
logger.info('[SSA:Orchestrator] Plan generated', {
|
||||
sessionId,
|
||||
workflowId: plan.workflow_id,
|
||||
totalSteps: plan.total_steps,
|
||||
picoUsed,
|
||||
});
|
||||
|
||||
return { success: true, plan, picoUsed };
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:Orchestrator] Plan failed', {
|
||||
sessionId,
|
||||
error: error.message,
|
||||
});
|
||||
|
||||
return {
|
||||
success: false,
|
||||
plan: null,
|
||||
error: error.message,
|
||||
picoUsed,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化 WorkflowPlan 为 LLM 可读摘要(注入 toolOutputs)
|
||||
*/
|
||||
formatPlanForLLM(plan: WorkflowPlan): string {
|
||||
const lines = [
|
||||
`## 分析方案: ${plan.title}`,
|
||||
plan.description,
|
||||
'',
|
||||
`共 ${plan.total_steps} 个分析步骤:`,
|
||||
];
|
||||
|
||||
for (const step of plan.steps) {
|
||||
const desc = step.switch_condition
|
||||
? `${step.description} [切换条件: ${step.switch_condition}]`
|
||||
: step.description;
|
||||
lines.push(`${step.step_number}. **${step.tool_name}** — ${desc}`);
|
||||
}
|
||||
|
||||
if (plan.epv_warning) {
|
||||
lines.push('', `⚠️ ${plan.epv_warning}`);
|
||||
}
|
||||
|
||||
if (plan.planned_trace) {
|
||||
lines.push('', `决策依据: ${plan.planned_trace.reasoning}`);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 PICO hint 文本(仅在 PICO 有实质内容时生成)
|
||||
*/
|
||||
private buildPicoHint(pico: PicoInference): string | null {
|
||||
const parts: string[] = [];
|
||||
|
||||
if (pico.population) parts.push(`研究人群=${pico.population}`);
|
||||
if (pico.intervention) parts.push(`干预/暴露=${pico.intervention}`);
|
||||
if (pico.comparison) parts.push(`对照=${pico.comparison}`);
|
||||
if (pico.outcome) parts.push(`结局=${pico.outcome}`);
|
||||
|
||||
if (parts.length === 0) return null;
|
||||
|
||||
return `${parts.join(', ')} (置信度: ${pico.confidence})`;
|
||||
}
|
||||
}
|
||||
|
||||
export const toolOrchestratorService = new ToolOrchestratorService();
|
||||
163
backend/src/modules/ssa/services/ToolRegistryService.ts
Normal file
163
backend/src/modules/ssa/services/ToolRegistryService.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
/**
|
||||
* ToolRegistryService — 工具注册表查询服务
|
||||
*
|
||||
* H2 仓储模式: 通过 IToolRepository 接口隔离数据源,
|
||||
* 当前使用 JsonToolRepository(读 tools_registry.json),
|
||||
* 未来可无缝切换为 PgToolRepository(读 Prisma/PostgreSQL)。
|
||||
*/
|
||||
|
||||
import { toolsRegistryLoader } from '../config/index.js';
|
||||
import type { ToolDefinition, ToolsRegistry } from '../config/schemas.js';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// Repository Interface (H2)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface IToolRepository {
|
||||
getAll(): ToolDefinition[];
|
||||
getByCode(code: string): ToolDefinition | null;
|
||||
reload?(): void;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// JSON-based implementation (Phase III)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export class JsonToolRepository implements IToolRepository {
|
||||
getAll(): ToolDefinition[] {
|
||||
return toolsRegistryLoader.get().tools;
|
||||
}
|
||||
|
||||
getByCode(code: string): ToolDefinition | null {
|
||||
return this.getAll().find(t => t.code === code) || null;
|
||||
}
|
||||
|
||||
reload(): void {
|
||||
toolsRegistryLoader.reload();
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// ToolRegistryService
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export class ToolRegistryService {
|
||||
constructor(private repo: IToolRepository) {}
|
||||
|
||||
getAll(): ToolDefinition[] {
|
||||
return this.repo.getAll();
|
||||
}
|
||||
|
||||
getByCode(code: string): ToolDefinition | null {
|
||||
return this.repo.getByCode(code);
|
||||
}
|
||||
|
||||
getToolName(code: string): string {
|
||||
return this.repo.getByCode(code)?.name ?? code;
|
||||
}
|
||||
|
||||
getByCategory(category: string): ToolDefinition[] {
|
||||
return this.repo.getAll().filter(t => t.category === category);
|
||||
}
|
||||
|
||||
/**
|
||||
* 按分析目标 + 变量类型推荐相关工具
|
||||
* goal: comparison/correlation/regression/descriptive
|
||||
*/
|
||||
getRelevantTools(goal?: string, outputType?: string): ToolDefinition[] {
|
||||
const all = this.repo.getAll();
|
||||
if (!goal && !outputType) return all;
|
||||
|
||||
return all.filter(t => {
|
||||
if (goal && outputType) {
|
||||
return t.outputType === outputType || t.category === goal;
|
||||
}
|
||||
if (outputType) return t.outputType === outputType;
|
||||
// goal → category mapping heuristic
|
||||
const goalCategoryMap: Record<string, string[]> = {
|
||||
comparison: ['parametric', 'nonparametric', 'categorical'],
|
||||
correlation: ['correlation'],
|
||||
regression: ['regression'],
|
||||
descriptive: ['basic', 'composite'],
|
||||
};
|
||||
const categories = goalCategoryMap[goal!] || [];
|
||||
return categories.includes(t.category);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 按意图/阶段过滤工具列表(Phase IV: 阶段性可见性)
|
||||
* explore/chat: 只看到数据类工具
|
||||
* consult: 看到全部工具(供推荐参考)
|
||||
* analyze: 看到执行类工具
|
||||
*/
|
||||
getVisibleTools(intent?: string): ToolDefinition[] {
|
||||
const all = this.repo.getAll();
|
||||
if (!intent) return all;
|
||||
|
||||
const dataCategories = ['basic', 'composite'];
|
||||
const execCategories = ['parametric', 'nonparametric', 'categorical', 'correlation', 'regression'];
|
||||
|
||||
switch (intent) {
|
||||
case 'explore':
|
||||
case 'chat':
|
||||
return all.filter(t => dataCategories.includes(t.category));
|
||||
case 'consult':
|
||||
return all;
|
||||
case 'analyze':
|
||||
case 'feedback':
|
||||
return all;
|
||||
default:
|
||||
return all;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化工具列表为 LLM 可读文本
|
||||
*/
|
||||
formatForLLM(tools?: ToolDefinition[]): string {
|
||||
const list = tools || this.repo.getAll();
|
||||
return list
|
||||
.map(t => {
|
||||
let line = `- ${t.code}: ${t.name} — ${t.description}`;
|
||||
if (t.prerequisite) line += ` [前提: ${t.prerequisite}]`;
|
||||
if (t.fallback) line += ` [降级: ${t.fallback}]`;
|
||||
return line;
|
||||
})
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化单个工具的详细信息(用于 method_consult 注入)
|
||||
*/
|
||||
formatToolDetail(code: string): string | null {
|
||||
const tool = this.repo.getByCode(code);
|
||||
if (!tool) return null;
|
||||
|
||||
const lines = [
|
||||
`**${tool.name}** (${tool.code})`,
|
||||
`- 类别: ${tool.category}`,
|
||||
`- 描述: ${tool.description}`,
|
||||
];
|
||||
if (tool.prerequisite) lines.push(`- 前提条件: ${tool.prerequisite}`);
|
||||
if (tool.fallback) lines.push(`- 降级方案: ${tool.fallback}`);
|
||||
if (tool.inputParams.length) {
|
||||
const params = tool.inputParams
|
||||
.map(p => `${p.name}${p.required ? '' : '?'}: ${p.type}`)
|
||||
.join(', ');
|
||||
lines.push(`- 参数: ${params}`);
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
reload(): void {
|
||||
if (this.repo.reload) {
|
||||
this.repo.reload();
|
||||
logger.info('[ToolRegistry] 热更新完成');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton — 默认使用 JSON 数据源
|
||||
export const toolRegistryService = new ToolRegistryService(new JsonToolRepository());
|
||||
@@ -16,7 +16,7 @@ import axios, { AxiosInstance } from 'axios';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { storage } from '../../../common/storage/index.js';
|
||||
import { WorkflowStep, ToolCode, AVAILABLE_TOOLS } from './WorkflowPlannerService.js';
|
||||
import { WorkflowStep, ToolCode } from './WorkflowPlannerService.js';
|
||||
import { conclusionGeneratorService } from './ConclusionGeneratorService.js';
|
||||
import { reflectionService } from './ReflectionService.js';
|
||||
import type { ConclusionReport } from '../types/reflection.types.js';
|
||||
|
||||
@@ -15,72 +15,14 @@ import { DataProfile, dataProfileService } from './DataProfileService.js';
|
||||
import { queryService } from './QueryService.js';
|
||||
import { decisionTableService, type MatchResult } from './DecisionTableService.js';
|
||||
import { flowTemplateService, type FilledStep, type FillResult } from './FlowTemplateService.js';
|
||||
import { toolsRegistryLoader } from '../config/index.js';
|
||||
import { toolRegistryService } from './ToolRegistryService.js';
|
||||
import type { ParsedQuery } from '../types/query.types.js';
|
||||
|
||||
// 可用工具定义
|
||||
export const AVAILABLE_TOOLS = {
|
||||
ST_DESCRIPTIVE: {
|
||||
code: 'ST_DESCRIPTIVE',
|
||||
name: '描述性统计',
|
||||
category: 'basic',
|
||||
description: '数据概况、基线特征表',
|
||||
inputParams: ['variables', 'group_var?'],
|
||||
outputType: 'summary'
|
||||
},
|
||||
ST_T_TEST_IND: {
|
||||
code: 'ST_T_TEST_IND',
|
||||
name: '独立样本T检验',
|
||||
category: 'parametric',
|
||||
description: '两组连续变量比较(参数方法)',
|
||||
inputParams: ['group_var', 'value_var'],
|
||||
outputType: 'comparison',
|
||||
prerequisite: '正态分布',
|
||||
fallback: 'ST_MANN_WHITNEY'
|
||||
},
|
||||
ST_MANN_WHITNEY: {
|
||||
code: 'ST_MANN_WHITNEY',
|
||||
name: 'Mann-Whitney U检验',
|
||||
category: 'nonparametric',
|
||||
description: '两组连续/等级变量比较(非参数方法)',
|
||||
inputParams: ['group_var', 'value_var'],
|
||||
outputType: 'comparison'
|
||||
},
|
||||
ST_T_TEST_PAIRED: {
|
||||
code: 'ST_T_TEST_PAIRED',
|
||||
name: '配对T检验',
|
||||
category: 'parametric',
|
||||
description: '配对设计的前后对比',
|
||||
inputParams: ['before_var', 'after_var'],
|
||||
outputType: 'comparison'
|
||||
},
|
||||
ST_CHI_SQUARE: {
|
||||
code: 'ST_CHI_SQUARE',
|
||||
name: '卡方检验',
|
||||
category: 'categorical',
|
||||
description: '两个分类变量的独立性检验',
|
||||
inputParams: ['var1', 'var2'],
|
||||
outputType: 'association'
|
||||
},
|
||||
ST_CORRELATION: {
|
||||
code: 'ST_CORRELATION',
|
||||
name: '相关分析',
|
||||
category: 'correlation',
|
||||
description: 'Pearson/Spearman相关系数',
|
||||
inputParams: ['var_x', 'var_y', 'method?'],
|
||||
outputType: 'correlation'
|
||||
},
|
||||
ST_LOGISTIC_BINARY: {
|
||||
code: 'ST_LOGISTIC_BINARY',
|
||||
name: '二元Logistic回归',
|
||||
category: 'regression',
|
||||
description: '二分类结局的多因素分析',
|
||||
inputParams: ['outcome_var', 'predictors', 'confounders?'],
|
||||
outputType: 'regression'
|
||||
}
|
||||
} as const;
|
||||
|
||||
export type ToolCode = keyof typeof AVAILABLE_TOOLS;
|
||||
/**
|
||||
* 工具代码类型 — 所有支持的统计工具 code
|
||||
* Phase IV: 不再依赖硬编码常量,工具定义统一由 ToolRegistryService 管理
|
||||
*/
|
||||
export type ToolCode = string;
|
||||
|
||||
/** P 层策略日志 — 记录规划决策,供 R 层合并 E 层事实后生成方法学说明 */
|
||||
export interface PlannedTrace {
|
||||
@@ -460,7 +402,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_DESCRIPTIVE',
|
||||
toolName: AVAILABLE_TOOLS.ST_DESCRIPTIVE.name,
|
||||
toolName: toolRegistryService.getToolName('ST_DESCRIPTIVE'),
|
||||
inputParams: {
|
||||
variables: descVars,
|
||||
group_var: intent.variables?.grouping
|
||||
@@ -477,7 +419,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_T_TEST_PAIRED',
|
||||
toolName: AVAILABLE_TOOLS.ST_T_TEST_PAIRED.name,
|
||||
toolName: toolRegistryService.getToolName('ST_T_TEST_PAIRED'),
|
||||
inputParams: {
|
||||
before_var: intent.variables.continuous[0],
|
||||
after_var: intent.variables.continuous[1]
|
||||
@@ -492,7 +434,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_T_TEST_IND',
|
||||
toolName: AVAILABLE_TOOLS.ST_T_TEST_IND.name,
|
||||
toolName: toolRegistryService.getToolName('ST_T_TEST_IND'),
|
||||
inputParams: {
|
||||
group_var: intent.variables.grouping,
|
||||
value_var: intent.variables.continuous[0]
|
||||
@@ -517,7 +459,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CHI_SQUARE',
|
||||
toolName: AVAILABLE_TOOLS.ST_CHI_SQUARE.name,
|
||||
toolName: toolRegistryService.getToolName('ST_CHI_SQUARE'),
|
||||
inputParams: {
|
||||
var1: var1,
|
||||
var2: var2
|
||||
@@ -530,7 +472,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CORRELATION',
|
||||
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
|
||||
toolName: toolRegistryService.getToolName('ST_CORRELATION'),
|
||||
inputParams: {
|
||||
var_x: var1,
|
||||
var_y: var2,
|
||||
@@ -548,7 +490,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_T_TEST_IND',
|
||||
toolName: AVAILABLE_TOOLS.ST_T_TEST_IND.name,
|
||||
toolName: toolRegistryService.getToolName('ST_T_TEST_IND'),
|
||||
inputParams: {
|
||||
group_var: catVar,
|
||||
value_var: contVar
|
||||
@@ -563,7 +505,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CORRELATION',
|
||||
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
|
||||
toolName: toolRegistryService.getToolName('ST_CORRELATION'),
|
||||
inputParams: {
|
||||
var_x: intent.variables.continuous[0],
|
||||
var_y: intent.variables.continuous[1],
|
||||
@@ -599,7 +541,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_LOGISTIC_BINARY',
|
||||
toolName: AVAILABLE_TOOLS.ST_LOGISTIC_BINARY.name,
|
||||
toolName: toolRegistryService.getToolName('ST_LOGISTIC_BINARY'),
|
||||
inputParams: {
|
||||
outcome_var: regressionOutcome,
|
||||
predictors: regressionPredictors
|
||||
@@ -608,20 +550,16 @@ export class WorkflowPlannerService {
|
||||
dependsOn: [1]
|
||||
});
|
||||
} else {
|
||||
// 连续结局 → 暂时也使用 Logistic 回归(TODO: 添加线性回归工具)
|
||||
// 实际应该使用线性回归,但当前工具库暂未支持
|
||||
logger.warn('[WorkflowPlanner] Linear regression not yet implemented, falling back to descriptive stats');
|
||||
// 添加一个额外的描述性统计步骤作为替代
|
||||
// 连续结局 → 线性回归
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CORRELATION',
|
||||
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
|
||||
toolCode: 'ST_LINEAR_REG',
|
||||
toolName: toolRegistryService.getToolName('ST_LINEAR_REG'),
|
||||
inputParams: {
|
||||
var_x: regressionPredictors[0],
|
||||
var_y: regressionOutcome,
|
||||
method: 'auto'
|
||||
outcome_var: regressionOutcome,
|
||||
predictors: regressionPredictors
|
||||
},
|
||||
purpose: `分析 ${regressionPredictors[0]} 与 ${regressionOutcome} 的相关性(线性回归待开发)`,
|
||||
purpose: `分析 ${regressionPredictors.join('、')} 对 ${regressionOutcome} 的影响(多因素线性回归)`,
|
||||
dependsOn: [1]
|
||||
});
|
||||
}
|
||||
@@ -630,7 +568,7 @@ export class WorkflowPlannerService {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_LOGISTIC_BINARY',
|
||||
toolName: AVAILABLE_TOOLS.ST_LOGISTIC_BINARY.name,
|
||||
toolName: toolRegistryService.getToolName('ST_LOGISTIC_BINARY'),
|
||||
inputParams: {
|
||||
outcome_var: intent.variables.grouping,
|
||||
predictors: intent.variables.continuous?.slice(0, 5) || []
|
||||
|
||||
139
backend/src/modules/ssa/services/tools/GetDataOverviewTool.ts
Normal file
139
backend/src/modules/ssa/services/tools/GetDataOverviewTool.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
/**
|
||||
* Phase I — get_data_overview 工具
|
||||
*
|
||||
* READ 层工具:调用 DataProfileService 获取数据画像,
|
||||
* 扩展正态性检验 + 完整病例数,
|
||||
* 初始化变量字典,写入 Session 黑板,
|
||||
* 返回五段式结构化报告。
|
||||
*/
|
||||
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
import { dataProfileService, type DataProfile, type ColumnProfile } from '../DataProfileService.js';
|
||||
import { sessionBlackboardService } from '../SessionBlackboardService.js';
|
||||
import {
|
||||
createEmptyBlackboard,
|
||||
type DataOverview,
|
||||
type VariableDictEntry,
|
||||
type NormalityResult,
|
||||
type FiveSectionReport,
|
||||
type SessionBlackboard,
|
||||
} from '../../types/session-blackboard.types.js';
|
||||
|
||||
export interface DataOverviewResult {
|
||||
success: boolean;
|
||||
report?: FiveSectionReport;
|
||||
blackboard?: SessionBlackboard;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 get_data_overview:
|
||||
* 1. 调用 DataProfileService 获取 DataProfile(含正态性检验 + 完整病例数)
|
||||
* 2. 初始化 VariableDictionary
|
||||
* 3. 写入 SessionBlackboard
|
||||
* 4. 生成五段式报告
|
||||
*/
|
||||
export async function executeGetDataOverview(sessionId: string): Promise<DataOverviewResult> {
|
||||
try {
|
||||
logger.info('[SSA:GetDataOverview] Starting data overview generation', { sessionId });
|
||||
|
||||
// Step 1: 获取 DataProfile
|
||||
const profileResult = await dataProfileService.generateProfileFromSession(sessionId);
|
||||
|
||||
if (!profileResult.success || !profileResult.profile) {
|
||||
return {
|
||||
success: false,
|
||||
error: profileResult.error || '数据画像生成失败',
|
||||
};
|
||||
}
|
||||
|
||||
const profile = profileResult.profile;
|
||||
const rawResponse = profileResult as any;
|
||||
|
||||
// Step 2: 提取 Phase I 新增字段(Python 扩展返回)
|
||||
const normalityTests: NormalityResult[] = rawResponse.normalityTests
|
||||
?? extractNormalityFromProfile(profile);
|
||||
const completeCaseCount: number = rawResponse.completeCaseCount
|
||||
?? estimateCompleteCases(profile);
|
||||
|
||||
// Step 3: 构建 DataOverview
|
||||
const dataOverview: DataOverview = {
|
||||
profile,
|
||||
normalityTests,
|
||||
completeCaseCount,
|
||||
generatedAt: new Date().toISOString(),
|
||||
};
|
||||
|
||||
// Step 4: 初始化 VariableDictionary
|
||||
const variableDictionary = initVariableDictionary(profile.columns);
|
||||
|
||||
// Step 5: 写入 SessionBlackboard
|
||||
const blackboard = createEmptyBlackboard(sessionId);
|
||||
blackboard.dataOverview = dataOverview;
|
||||
blackboard.variableDictionary = variableDictionary;
|
||||
await sessionBlackboardService.set(sessionId, blackboard);
|
||||
|
||||
// Step 6: 生成五段式报告
|
||||
const report = sessionBlackboardService.generateFiveSectionReport(dataOverview, variableDictionary);
|
||||
|
||||
logger.info('[SSA:GetDataOverview] Data overview generation complete', {
|
||||
sessionId,
|
||||
totalVars: profile.summary.totalColumns,
|
||||
completeCases: completeCaseCount,
|
||||
nonNormalVars: normalityTests.filter(t => !t.isNormal).length,
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
report,
|
||||
blackboard,
|
||||
};
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:GetDataOverview] Failed', { sessionId, error: error.message });
|
||||
return {
|
||||
success: false,
|
||||
error: error.message,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 DataProfile.columns 初始化变量字典。
|
||||
* 全部标记为 ai_inferred,等待用户确认。
|
||||
*/
|
||||
function initVariableDictionary(columns: ColumnProfile[]): VariableDictEntry[] {
|
||||
return columns.map(col => ({
|
||||
name: col.name,
|
||||
inferredType: col.type,
|
||||
confirmedType: null,
|
||||
label: null,
|
||||
picoRole: null,
|
||||
isIdLike: col.isIdLike ?? false,
|
||||
confirmStatus: 'ai_inferred' as const,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 降级方案:如果 Python 返回中没有 normalityTests,
|
||||
* 尝试根据 skewness/kurtosis 粗略估算。
|
||||
*/
|
||||
function extractNormalityFromProfile(profile: DataProfile): NormalityResult[] {
|
||||
return profile.columns
|
||||
.filter(c => c.type === 'numeric' && c.skewness !== undefined)
|
||||
.map(c => ({
|
||||
variable: c.name,
|
||||
method: 'shapiro_wilk' as const,
|
||||
statistic: 0,
|
||||
pValue: 0,
|
||||
isNormal: Math.abs(c.skewness ?? 0) < 1 && Math.abs(c.kurtosis ?? 0) < 3,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 降级方案:估算完整病例数。
|
||||
* 如果 Python 没有返回 completeCaseCount,用 totalRows - 最大单列缺失估算下界。
|
||||
*/
|
||||
function estimateCompleteCases(profile: DataProfile): number {
|
||||
const maxMissing = Math.max(...profile.columns.map(c => c.missingCount), 0);
|
||||
return Math.max(0, profile.summary.totalRows - maxMissing);
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Phase I — get_variable_detail 工具
|
||||
*
|
||||
* READ 层工具:调用 Python variable-detail 端点获取单变量的
|
||||
* 描述统计、直方图、正态性检验、Q-Q 图数据。
|
||||
* 同时更新 SessionBlackboard 中该变量的字典条目。
|
||||
*/
|
||||
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
import { dataProfileService } from '../DataProfileService.js';
|
||||
import { sessionBlackboardService } from '../SessionBlackboardService.js';
|
||||
import type { ColumnType, VariableDictPatch } from '../../types/session-blackboard.types.js';
|
||||
|
||||
export interface VariableDetailResult {
|
||||
success: boolean;
|
||||
variable?: string;
|
||||
type?: string;
|
||||
descriptive?: Record<string, any>;
|
||||
outliers?: Record<string, any>;
|
||||
histogram?: { counts: number[]; edges: number[] };
|
||||
normalityTest?: { method: string; statistic: number; pValue: number; isNormal: boolean } | null;
|
||||
qqPlot?: { theoretical: number[]; observed: number[] };
|
||||
distribution?: Array<{ value: string; count: number; percentage: number }>;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 get_variable_detail:
|
||||
* 1. 调用 DataProfileService.getVariableDetail()(转发 Python 端点)
|
||||
* 2. 若 SessionBlackboard 存在,更新该变量的 confirmedType / label(如有)
|
||||
* 3. 返回单变量详情
|
||||
*/
|
||||
export async function executeGetVariableDetail(
|
||||
sessionId: string,
|
||||
variableName: string,
|
||||
confirmedType?: ColumnType,
|
||||
label?: string,
|
||||
): Promise<VariableDetailResult> {
|
||||
try {
|
||||
logger.info('[SSA:GetVariableDetail] Starting', { sessionId, variableName });
|
||||
|
||||
const detail = await dataProfileService.getVariableDetail(sessionId, variableName);
|
||||
|
||||
if (!detail.success) {
|
||||
return { success: false, error: detail.error || '变量详情获取失败' };
|
||||
}
|
||||
|
||||
if (confirmedType || label) {
|
||||
const patch: VariableDictPatch = {};
|
||||
if (confirmedType) {
|
||||
patch.confirmedType = confirmedType;
|
||||
}
|
||||
if (label) {
|
||||
patch.label = label;
|
||||
}
|
||||
await sessionBlackboardService.updateVariable(sessionId, variableName, patch);
|
||||
}
|
||||
|
||||
logger.info('[SSA:GetVariableDetail] Done', { sessionId, variableName, type: detail.type });
|
||||
|
||||
return {
|
||||
success: true,
|
||||
variable: detail.variable,
|
||||
type: detail.type,
|
||||
descriptive: detail.descriptive,
|
||||
outliers: detail.outliers,
|
||||
histogram: detail.histogram,
|
||||
normalityTest: detail.normalityTest,
|
||||
qqPlot: detail.qqPlot,
|
||||
distribution: detail.distribution,
|
||||
};
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:GetVariableDetail] Failed', { sessionId, variableName, error: error.message });
|
||||
return { success: false, error: error.message };
|
||||
}
|
||||
}
|
||||
251
backend/src/modules/ssa/types/session-blackboard.types.ts
Normal file
251
backend/src/modules/ssa/types/session-blackboard.types.ts
Normal file
@@ -0,0 +1,251 @@
|
||||
/**
|
||||
* Phase I — Session 黑板类型定义 + Zod Schema
|
||||
*
|
||||
* SessionBlackboard 是 SSA 模块的会话级数据缓存,
|
||||
* 存储为扁平单 JSON Blob,key = ssa:session:{sessionId}
|
||||
*
|
||||
* 四层数据结构:
|
||||
* Layer 1: DataOverview — get_data_overview 写入
|
||||
* Layer 2: VariableDictionary — get_data_overview 初始化,用户可编辑
|
||||
* Layer 3: PicoInference — LLM 推断 + 用户确认
|
||||
* Layer 4: QperTrace — Phase II+ 使用,Phase I 预留
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import type { DataProfile } from '../services/DataProfileService.js';
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 1. 正态性检验结果
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface NormalityResult {
|
||||
variable: string;
|
||||
method: 'shapiro_wilk' | 'kolmogorov_smirnov';
|
||||
statistic: number;
|
||||
pValue: number;
|
||||
isNormal: boolean;
|
||||
}
|
||||
|
||||
export const NormalityResultSchema = z.object({
|
||||
variable: z.string(),
|
||||
method: z.enum(['shapiro_wilk', 'kolmogorov_smirnov']),
|
||||
statistic: z.number(),
|
||||
pValue: z.number(),
|
||||
isNormal: z.boolean(),
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 2. 数据总览(五段式报告数据源)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface DataOverview {
|
||||
profile: DataProfile;
|
||||
completeCaseCount: number;
|
||||
normalityTests: NormalityResult[];
|
||||
generatedAt: string;
|
||||
}
|
||||
|
||||
export const DataOverviewSchema = z.object({
|
||||
profile: z.object({
|
||||
columns: z.array(z.any()),
|
||||
summary: z.object({
|
||||
totalRows: z.number(),
|
||||
totalColumns: z.number(),
|
||||
numericColumns: z.number(),
|
||||
categoricalColumns: z.number(),
|
||||
datetimeColumns: z.number(),
|
||||
textColumns: z.number(),
|
||||
overallMissingRate: z.number(),
|
||||
totalMissingCells: z.number(),
|
||||
}),
|
||||
}),
|
||||
completeCaseCount: z.number(),
|
||||
normalityTests: z.array(NormalityResultSchema),
|
||||
generatedAt: z.string(),
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 3. 变量字典条目
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export type ColumnType = 'numeric' | 'categorical' | 'datetime' | 'text';
|
||||
export type PicoRole = 'P' | 'I' | 'C' | 'O';
|
||||
export type ConfirmStatus = 'ai_inferred' | 'user_confirmed';
|
||||
|
||||
export interface VariableDictEntry {
|
||||
name: string;
|
||||
inferredType: ColumnType;
|
||||
confirmedType: ColumnType | null;
|
||||
label: string | null;
|
||||
picoRole: PicoRole | null;
|
||||
isIdLike: boolean;
|
||||
confirmStatus: ConfirmStatus;
|
||||
}
|
||||
|
||||
export const VariableDictEntrySchema = z.object({
|
||||
name: z.string(),
|
||||
inferredType: z.enum(['numeric', 'categorical', 'datetime', 'text']),
|
||||
confirmedType: z.enum(['numeric', 'categorical', 'datetime', 'text']).nullable(),
|
||||
label: z.string().nullable(),
|
||||
picoRole: z.enum(['P', 'I', 'C', 'O']).nullable(),
|
||||
isIdLike: z.boolean(),
|
||||
confirmStatus: z.enum(['ai_inferred', 'user_confirmed']),
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 4. PICO 推断
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export type PicoConfidence = 'high' | 'medium' | 'low';
|
||||
|
||||
export interface PicoInference {
|
||||
population: string | null;
|
||||
intervention: string | null;
|
||||
comparison: string | null;
|
||||
outcome: string | null;
|
||||
confidence: PicoConfidence;
|
||||
status: ConfirmStatus;
|
||||
}
|
||||
|
||||
export const PicoInferenceSchema = z.object({
|
||||
population: z.string().nullable(),
|
||||
intervention: z.string().nullable(),
|
||||
comparison: z.string().nullable(),
|
||||
outcome: z.string().nullable(),
|
||||
confidence: z.enum(['high', 'medium', 'low']),
|
||||
status: z.enum(['ai_inferred', 'user_confirmed']),
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 5. QPER 执行轨迹(Phase II+ 预留)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface QperTraceEntry {
|
||||
workflowId: string;
|
||||
stepIndex: number;
|
||||
toolCode: string;
|
||||
status: 'success' | 'error' | 'blocked';
|
||||
summary: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export const QperTraceEntrySchema = z.object({
|
||||
workflowId: z.string(),
|
||||
stepIndex: z.number(),
|
||||
toolCode: z.string(),
|
||||
status: z.enum(['success', 'error', 'blocked']),
|
||||
summary: z.string(),
|
||||
timestamp: z.string(),
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 6. SessionBlackboard 主体
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface PendingAskUser {
|
||||
questionId: string;
|
||||
question: string;
|
||||
inputType: 'single_select' | 'multi_select' | 'free_text' | 'confirm';
|
||||
metadata?: Record<string, any>;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface SessionBlackboard {
|
||||
sessionId: string;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
dataOverview: DataOverview | null;
|
||||
variableDictionary: VariableDictEntry[];
|
||||
picoInference: PicoInference | null;
|
||||
qperTrace: QperTraceEntry[];
|
||||
pendingAskUser: PendingAskUser | null;
|
||||
}
|
||||
|
||||
export const PendingAskUserSchema = z.object({
|
||||
questionId: z.string(),
|
||||
question: z.string(),
|
||||
inputType: z.enum(['single_select', 'multi_select', 'free_text', 'confirm']),
|
||||
metadata: z.record(z.string(), z.any()).optional(),
|
||||
createdAt: z.string(),
|
||||
});
|
||||
|
||||
export const SessionBlackboardSchema = z.object({
|
||||
sessionId: z.string(),
|
||||
createdAt: z.string(),
|
||||
updatedAt: z.string(),
|
||||
dataOverview: DataOverviewSchema.nullable(),
|
||||
variableDictionary: z.array(VariableDictEntrySchema),
|
||||
picoInference: PicoInferenceSchema.nullable(),
|
||||
qperTrace: z.array(QperTraceEntrySchema),
|
||||
pendingAskUser: PendingAskUserSchema.nullable().default(null),
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 7. 五段式报告结构(前端渲染用)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface ReportSection {
|
||||
title: string;
|
||||
content: string;
|
||||
details?: string[];
|
||||
}
|
||||
|
||||
export interface FiveSectionReport {
|
||||
basicInfo: ReportSection;
|
||||
missingData: ReportSection & { varsWithMissing: string[]; completeCaseCount: number };
|
||||
dataTypes: ReportSection & { categoricalVars: string[]; numericVars: string[]; needsConfirmation: boolean };
|
||||
outliers: ReportSection & { method: string; varsWithOutliers: string[] };
|
||||
normality: ReportSection & { method: string; nonNormalVars: string[]; normalVars: string[] };
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 8. 工厂函数:创建空白 SessionBlackboard
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export function createEmptyBlackboard(sessionId: string): SessionBlackboard {
|
||||
const now = new Date().toISOString();
|
||||
return {
|
||||
sessionId,
|
||||
createdAt: now,
|
||||
updatedAt: now,
|
||||
dataOverview: null,
|
||||
variableDictionary: [],
|
||||
picoInference: null,
|
||||
qperTrace: [],
|
||||
pendingAskUser: null,
|
||||
};
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 9. 变量字典 patch 输入(PATCH API 用)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface VariableDictPatch {
|
||||
confirmedType?: ColumnType;
|
||||
label?: string;
|
||||
picoRole?: PicoRole | null;
|
||||
}
|
||||
|
||||
export const VariableDictPatchSchema = z.object({
|
||||
confirmedType: z.enum(['numeric', 'categorical', 'datetime', 'text']).optional(),
|
||||
label: z.string().optional(),
|
||||
picoRole: z.enum(['P', 'I', 'C', 'O']).nullable().optional(),
|
||||
});
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 10. PICO 确认输入(PATCH API 用)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
export interface PicoPatch {
|
||||
population?: string | null;
|
||||
intervention?: string | null;
|
||||
comparison?: string | null;
|
||||
outcome?: string | null;
|
||||
}
|
||||
|
||||
export const PicoPatchSchema = z.object({
|
||||
population: z.string().nullable().optional(),
|
||||
intervention: z.string().nullable().optional(),
|
||||
comparison: z.string().nullable().optional(),
|
||||
outcome: z.string().nullable().optional(),
|
||||
});
|
||||
Reference in New Issue
Block a user