feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development

Phase I - Session Blackboard + READ Layer:
- SessionBlackboardService with Postgres-Only cache
- DataProfileService for data overview generation
- PicoInferenceService for LLM-driven PICO extraction
- Frontend DataContextCard and VariableDictionaryPanel
- E2E tests: 31/31 passed

Phase II - Conversation Layer LLM + Intent Router:
- ConversationService with SSE streaming
- IntentRouterService (rule-first + LLM fallback, 6 intents)
- SystemPromptService with 6-segment dynamic assembly
- TokenTruncationService for context management
- ChatHandlerService as unified chat entry
- Frontend SSAChatPane and useSSAChat hook
- E2E tests: 38/38 passed

Phase III - Method Consultation + AskUser Standardization:
- ToolRegistryService with Repository Pattern
- MethodConsultService with DecisionTable + LLM enhancement
- AskUserService with global interrupt handling
- Frontend AskUserCard component
- E2E tests: 13/13 passed

Phase IV - Dialogue-Driven Analysis + QPER Integration:
- ToolOrchestratorService (plan/execute/report)
- analysis_plan SSE event for WorkflowPlan transmission
- Dual-channel confirmation (ask_user card + workspace button)
- PICO as optional hint for LLM parsing
- E2E tests: 25/25 passed

R Statistics Service:
- 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon
- Enhanced guardrails and block helpers
- Comprehensive test suite (run_all_tools_test.js)

Documentation:
- Updated system status document (v5.9)
- Updated SSA module status and development plan (v1.8)

Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25)

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-22 18:53:39 +08:00
parent bf10dec4c8
commit 3446909ff7
68 changed files with 11583 additions and 412 deletions

View File

@@ -19,8 +19,8 @@
"predictorType": "binary",
"design": "paired",
"primaryTool": "ST_T_TEST_PAIRED",
"fallbackTool": null,
"switchCondition": null,
"fallbackTool": "ST_WILCOXON",
"switchCondition": "normality_fail: 差值 Shapiro-Wilk P<0.05 时切换 Wilcoxon 符号秩检验",
"templateId": "paired_analysis",
"priority": 10,
"description": "配对设计前后对比"
@@ -31,12 +31,12 @@
"outcomeType": "continuous",
"predictorType": "categorical",
"design": "independent",
"primaryTool": "ST_T_TEST_IND",
"fallbackTool": "ST_MANN_WHITNEY",
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05",
"primaryTool": "ST_ANOVA_ONE",
"fallbackTool": "ST_ANOVA_ONE",
"switchCondition": "normality_fail: Shapiro-Wilk P<0.05 时内部自动切换 Kruskal-Wallis",
"templateId": "standard_analysis",
"priority": 5,
"description": "多组连续变量比较(暂用 T 检验处理两组场景ANOVA 待扩展"
"description": "多组连续变量比较(ANOVA / Kruskal-Wallis"
},
{
"id": "DIFF_CAT_CAT_IND",
@@ -45,12 +45,25 @@
"predictorType": "categorical",
"design": "independent",
"primaryTool": "ST_CHI_SQUARE",
"fallbackTool": "ST_CHI_SQUARE",
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
"fallbackTool": "ST_FISHER",
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
"templateId": "standard_analysis",
"priority": 10,
"description": "两个分类变量的独立性检验"
},
{
"id": "DIFF_CAT_CAT_SMALL",
"goal": "comparison",
"outcomeType": "categorical",
"predictorType": "binary",
"design": "independent",
"primaryTool": "ST_FISHER",
"fallbackTool": null,
"switchCondition": null,
"templateId": "standard_analysis",
"priority": 8,
"description": "小样本分类变量独立性检验Fisher 精确检验)"
},
{
"id": "ASSOC_CONT_CONT",
"goal": "correlation",
@@ -71,8 +84,8 @@
"predictorType": "*",
"design": "*",
"primaryTool": "ST_CHI_SQUARE",
"fallbackTool": "ST_CHI_SQUARE",
"switchCondition": "expected_freq_low: 期望频数<5 时 R 内部自动切换 Fisher",
"fallbackTool": "ST_FISHER",
"switchCondition": "expected_freq_low: 期望频数<5 超过 20% 且为 2x2 表时切换 Fisher",
"templateId": "standard_analysis",
"priority": 5,
"description": "分类变量关联分析"
@@ -96,12 +109,12 @@
"outcomeType": "continuous",
"predictorType": "*",
"design": "*",
"primaryTool": "ST_CORRELATION",
"primaryTool": "ST_LINEAR_REG",
"fallbackTool": null,
"switchCondition": null,
"templateId": "regression_analysis",
"priority": 5,
"description": "连续结局的回归分析(线性回归待扩展,暂用相关分析"
"description": "连续结局的多因素线性回归分析"
},
{
"id": "DESC_ANY",

View File

@@ -45,16 +45,16 @@
{
"order": 1,
"role": "baseline_table",
"tool": "ST_DESCRIPTIVE",
"tool": "ST_BASELINE_TABLE",
"name": "表1: 组间基线特征比较",
"paramsMapping": { "group_var": "{{grouping_var}}", "variables": "{{all_predictors}}" }
"paramsMapping": { "group_var": "{{grouping_var}}", "analyze_vars": "{{all_predictors}}" }
},
{
"order": 2,
"role": "univariate_screen",
"tool": "ST_DESCRIPTIVE",
"tool": "ST_BASELINE_TABLE",
"name": "表2: 结局指标单因素分析",
"paramsMapping": { "group_var": "{{outcome_var}}", "variables": "{{all_predictors}}" }
"paramsMapping": { "group_var": "{{outcome_var}}", "analyze_vars": "{{all_predictors}}" }
},
{
"order": 3,

View File

@@ -0,0 +1,58 @@
{
"rules": [
{
"intent": "analyze",
"keywords": ["分析", "检验", "t检验", "卡方", "回归", "比较一下", "跑一下", "执行分析", "做个分析", "方差分析", "ANOVA", "相关分析", "logistic", "生存分析", "Cox", "基线表"],
"excludeKeywords": ["什么方法", "用什么", "应该怎么", "推荐"],
"requires": ["dataOverview"],
"priority": 10
},
{
"intent": "discuss",
"keywords": ["什么意思", "说明什么", "怎么解释", "p值", "置信区间", "结果说明", "为什么显著", "为什么不显著", "临床意义", "效应量"],
"requires": ["dataOverview", "hasAnalysisResults"],
"priority": 9
},
{
"intent": "feedback",
"keywords": ["结果不对", "不太对", "换个方法", "重新分析", "有问题", "不满意", "重做"],
"requires": ["dataOverview", "hasAnalysisResults"],
"priority": 9
},
{
"intent": "explore",
"keywords": ["看看", "分布", "缺失", "概况", "有哪些变量", "数据特征", "异常值", "样本量", "描述一下数据", "多少例", "变量类型"],
"requires": ["dataOverview"],
"priority": 8
},
{
"intent": "consult",
"keywords": ["什么方法", "用什么", "应该怎么分析", "推荐方法", "分析方案", "哪种检验", "怎么选", "前提条件"],
"requires": ["dataOverview"],
"priority": 7
}
],
"contextGuards": {
"explore": {
"requires": ["dataOverview"],
"fallbackMessage": "您还没有上传数据。请先上传 CSV 或 Excel 文件,我就能帮您探索数据了。您也可以先问我统计方法相关的问题。"
},
"analyze": {
"requires": ["dataOverview"],
"fallbackMessage": "您还没有上传数据。请先上传数据文件,我才能帮您执行统计分析。"
},
"consult": {
"requires": ["dataOverview"],
"fallbackMessage": "如果您上传了数据,我可以根据数据特征给出更精准的方法推荐。不过您也可以直接描述研究设计,我来给出一般性建议。"
},
"discuss": {
"requires": ["dataOverview", "hasAnalysisResults"],
"fallbackMessage": "目前还没有分析结果可以讨论。请先执行一次统计分析,然后我们就可以深入讨论结果了。"
},
"feedback": {
"requires": ["dataOverview", "hasAnalysisResults"],
"fallbackMessage": "目前还没有分析结果可以改进。请先执行一次统计分析,如果对结果不满意,我来帮您调整。"
}
},
"defaultIntent": "chat"
}

View File

@@ -82,6 +82,65 @@
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
],
"outputType": "regression"
},
{
"code": "ST_FISHER",
"name": "Fisher精确检验",
"category": "categorical",
"description": "小样本或稀疏列联表的精确独立性检验(卡方检验的替代方法)",
"inputParams": [
{ "name": "var1", "type": "string", "required": true, "description": "分类变量1" },
{ "name": "var2", "type": "string", "required": true, "description": "分类变量2" }
],
"outputType": "association"
},
{
"code": "ST_ANOVA_ONE",
"name": "单因素方差分析",
"category": "parametric",
"description": "三组及以上独立样本的均值差异比较(含事后多重比较)",
"inputParams": [
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量3+水平)" },
{ "name": "value_var", "type": "string", "required": true, "description": "连续型结局变量" }
],
"outputType": "comparison",
"prerequisite": "正态分布 + 方差齐性",
"fallback": "Kruskal-Wallis"
},
{
"code": "ST_WILCOXON",
"name": "Wilcoxon符号秩检验",
"category": "nonparametric",
"description": "配对样本的非参数检验配对T检验的替代方法",
"inputParams": [
{ "name": "before_var", "type": "string", "required": true, "description": "前测变量" },
{ "name": "after_var", "type": "string", "required": true, "description": "后测变量" }
],
"outputType": "comparison"
},
{
"code": "ST_LINEAR_REG",
"name": "线性回归",
"category": "regression",
"description": "连续型结局变量的多因素线性回归分析",
"inputParams": [
{ "name": "outcome_var", "type": "string", "required": true, "description": "连续型结局变量" },
{ "name": "predictors", "type": "string[]", "required": true, "description": "预测变量列表" },
{ "name": "confounders", "type": "string[]", "required": false, "description": "混杂因素列表" }
],
"outputType": "regression"
},
{
"code": "ST_BASELINE_TABLE",
"name": "基线特征表",
"category": "composite",
"description": "基于 gtsummary 的一键式基线特征表生成,自动判断变量类型、选方法、合并出表(复合工具)",
"inputParams": [
{ "name": "group_var", "type": "string", "required": true, "description": "分组变量" },
{ "name": "analyze_vars", "type": "string[]", "required": false, "description": "分析变量列表(不传则自动选取全部)" }
],
"outputType": "baseline_table",
"composite": true
}
]
}

View File

@@ -14,6 +14,8 @@ import analysisRoutes from './routes/analysis.routes.js';
import consultRoutes from './routes/consult.routes.js';
import configRoutes from './routes/config.routes.js';
import workflowRoutes from './routes/workflow.routes.js';
import blackboardRoutes from './routes/blackboard.routes.js';
import chatRoutes from './routes/chat.routes.js';
export async function ssaRoutes(app: FastifyInstance) {
// 注册认证中间件(遵循模块认证规范)
@@ -26,6 +28,10 @@ export async function ssaRoutes(app: FastifyInstance) {
app.register(configRoutes, { prefix: '/config' });
// Phase 2A: 多步骤工作流
app.register(workflowRoutes, { prefix: '/workflow' });
// Phase I: Session 黑板 + READ 层
app.register(blackboardRoutes, { prefix: '/sessions/:sessionId/blackboard' });
// Phase II: 统一对话入口
app.register(chatRoutes, { prefix: '/sessions' });
}
export default ssaRoutes;

View File

@@ -0,0 +1,102 @@
/**
* Phase I — Session Blackboard + READ Layer 路由
*
* 路由前缀: /sessions/:sessionId/blackboard
*
* GET / — 获取完整 SessionBlackboard
* POST /data-overview — 触发 get_data_overview 工具
* POST /variable-detail — 触发 get_variable_detail 工具
* PATCH /variables/:name — 用户确认/修改变量类型
*/
import { FastifyInstance, FastifyRequest } from 'fastify';
import { logger } from '../../../common/logging/index.js';
import { sessionBlackboardService } from '../services/SessionBlackboardService.js';
import { executeGetDataOverview } from '../services/tools/GetDataOverviewTool.js';
import { executeGetVariableDetail } from '../services/tools/GetVariableDetailTool.js';
import type { ColumnType, PicoRole, VariableDictPatch } from '../types/session-blackboard.types.js';
export default async function blackboardRoutes(app: FastifyInstance) {
// GET /sessions/:sessionId/blackboard — 获取完整黑板
app.get('/', async (req, reply) => {
const { sessionId } = req.params as { sessionId: string };
const blackboard = await sessionBlackboardService.get(sessionId);
if (!blackboard) {
return reply.status(404).send({ error: 'Blackboard not found for this session' });
}
const report = blackboard.dataOverview
? sessionBlackboardService.generateFiveSectionReport(
blackboard.dataOverview,
blackboard.variableDictionary,
)
: null;
return reply.send({ blackboard, report });
});
// POST /sessions/:sessionId/blackboard/data-overview — 执行 get_data_overview
app.post('/data-overview', async (req, reply) => {
const { sessionId } = req.params as { sessionId: string };
logger.info('[SSA:Route] Triggering data overview', { sessionId });
const result = await executeGetDataOverview(sessionId);
if (!result.success) {
return reply.status(400).send({ error: result.error });
}
return reply.send({
success: true,
report: result.report,
});
});
// POST /sessions/:sessionId/blackboard/variable-detail — 执行 get_variable_detail
app.post('/variable-detail', async (req, reply) => {
const { sessionId } = req.params as { sessionId: string };
const { variableName, confirmedType, label } = req.body as {
variableName: string;
confirmedType?: ColumnType;
label?: string;
};
if (!variableName) {
return reply.status(400).send({ error: 'variableName is required' });
}
logger.info('[SSA:Route] Triggering variable detail', { sessionId, variableName });
const result = await executeGetVariableDetail(sessionId, variableName, confirmedType, label);
if (!result.success) {
return reply.status(400).send({ error: result.error });
}
return reply.send(result);
});
// PATCH /sessions/:sessionId/blackboard/variables/:name — 更新变量字典条目
app.patch('/variables/:name', async (req, reply) => {
const { sessionId, name } = req.params as { sessionId: string; name: string };
const body = req.body as {
confirmedType?: ColumnType;
label?: string;
picoRole?: PicoRole | null;
};
logger.info('[SSA:Route] Updating variable dictionary entry', { sessionId, name, body });
const dictPatch: VariableDictPatch = {};
if (body.confirmedType !== undefined) dictPatch.confirmedType = body.confirmedType;
if (body.label !== undefined) dictPatch.label = body.label;
if (body.picoRole !== undefined) dictPatch.picoRole = body.picoRole;
await sessionBlackboardService.updateVariable(sessionId, name, dictPatch);
return reply.send({ success: true });
});
}

View File

@@ -0,0 +1,217 @@
/**
* Phase II — 统一对话 API 路由
*
* POST /sessions/:id/chat — 统一对话入口SSE 流式)
* GET /sessions/:id/chat/history — 获取对话历史
* GET /sessions/:id/chat/conversation — 获取 conversation 元信息
*
* SSE 格式OpenAI Compatible与前端 useAIStream 兼容)
* 心跳5 秒H1
* 竞态保护placeholder 占位H3
*/
import { FastifyInstance, FastifyRequest } from 'fastify';
import { logger } from '../../../common/logging/index.js';
import { conversationService } from '../services/ConversationService.js';
import { intentRouterService } from '../services/IntentRouterService.js';
import { chatHandlerService } from '../services/ChatHandlerService.js';
import { askUserService } from '../services/AskUserService.js';
function getUserId(request: FastifyRequest): string {
const userId = (request as any).user?.userId;
if (!userId) throw new Error('User not authenticated');
return userId;
}
export default async function chatRoutes(app: FastifyInstance) {
/**
* POST /sessions/:id/chat
* 统一对话入口 — SSE 流式响应
*/
app.post('/:id/chat', async (req, reply) => {
const { id: sessionId } = req.params as { id: string };
const userId = getUserId(req);
const { content, enableDeepThinking, metadata } = req.body as {
content: string;
enableDeepThinking?: boolean;
metadata?: Record<string, any>;
};
if (!content?.trim()) {
return reply.status(400).send({ error: '消息内容不能为空' });
}
// SSE 响应头
reply.raw.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
'X-Accel-Buffering': 'no',
});
const writer = {
write: (data: string) => {
try {
return reply.raw.write(data);
} catch {
return false;
}
},
end: () => {
try { reply.raw.end(); } catch { /* ignore */ }
},
on: (event: string, handler: () => void) => {
req.raw.on(event, handler);
},
};
try {
// 1. 获取或创建 Conversation延迟创建
const conversationId = await conversationService.getOrCreateConversation(sessionId, userId);
// 2. 保存用户消息
await conversationService.saveUserMessage(conversationId, content.trim());
// ── H1 全局打断判定 ──
const pending = await askUserService.getPending(sessionId);
if (pending) {
const askUserResponse = metadata?.askUserResponse
? askUserService.parseResponse(metadata)
: null;
if (askUserResponse) {
// 正常回答问题(含 skip
const placeholderMsgId = await conversationService.createAssistantPlaceholder(
conversationId, 'chat',
);
const metaEvent = JSON.stringify({
type: 'intent_classified',
intent: 'chat',
confidence: 1,
source: 'ask_user_response',
guardTriggered: false,
});
writer.write(`data: ${metaEvent}\n\n`);
const result = await chatHandlerService.handleAskUserResponse(
sessionId, conversationId, askUserResponse, writer, placeholderMsgId,
);
logger.info('[SSA:Chat] AskUser response handled', {
sessionId, action: askUserResponse.action, success: result.success,
});
writer.end();
return;
} else {
// 用户无视卡片,强行打字转移话题
await askUserService.clearPending(sessionId);
logger.info('[SSA:Chat] 用户转移话题,已取消挂起的 ask_user 状态', { sessionId });
}
}
// ── H1 结束 ──
// 3. 意图分类
const intentResult = await intentRouterService.classify(content.trim(), sessionId);
// 发送意图元数据事件(前端可用于 UI 切换)
const metaEvent = JSON.stringify({
type: 'intent_classified',
intent: intentResult.intent,
confidence: intentResult.confidence,
source: intentResult.source,
guardTriggered: intentResult.guardTriggered || false,
guardMessage: intentResult.guardMessage,
});
writer.write(`data: ${metaEvent}\n\n`);
// 4. 创建 assistant placeholderH3 竞态保护)
const placeholderMsgId = await conversationService.createAssistantPlaceholder(
conversationId, intentResult.intent,
);
// 5. 分发到意图处理器
const result = await chatHandlerService.handle(
sessionId, conversationId, content.trim(),
intentResult, writer, placeholderMsgId,
);
logger.info('[SSA:Chat] Request completed', {
sessionId,
intent: result.intent,
success: result.success,
});
} catch (error: any) {
logger.error('[SSA:Chat] Unhandled error', {
sessionId,
error: error.message,
});
const errorEvent = JSON.stringify({
type: 'error',
code: 'CHAT_ERROR',
message: error.message || '处理消息时发生错误',
});
try {
writer.write(`data: ${errorEvent}\n\n`);
} catch { /* ignore */ }
} finally {
writer.end();
}
});
/**
* GET /sessions/:id/chat/history
* 获取对话历史消息
*/
app.get('/:id/chat/history', async (req, reply) => {
const { id: sessionId } = req.params as { id: string };
const conversation = await conversationService.getConversationBySession(sessionId);
if (!conversation) {
return reply.send({ messages: [], conversationId: null });
}
const messages = await conversationService.getMessages(conversation.id);
return reply.send({
conversationId: conversation.id,
messages: messages.map(m => ({
id: m.id,
role: m.role,
content: m.content,
thinkingContent: m.thinkingContent,
intent: (m.metadata as any)?.intent,
status: (m.metadata as any)?.status,
createdAt: m.createdAt,
})),
});
});
/**
* GET /sessions/:id/chat/conversation
* 获取 conversation 元信息
*/
app.get('/:id/chat/conversation', async (req, reply) => {
const { id: sessionId } = req.params as { id: string };
const conversation = await conversationService.getConversationBySession(sessionId);
if (!conversation) {
return reply.send({ conversation: null });
}
return reply.send({
conversation: {
id: conversation.id,
title: conversation.title,
messageCount: conversation.messageCount,
createdAt: conversation.createdAt,
updatedAt: conversation.updatedAt,
},
});
});
}

View File

@@ -13,6 +13,9 @@ import { prisma } from '../../../config/database.js';
import { logger } from '../../../common/logging/index.js';
import { storage } from '../../../common/storage/index.js';
import { DataParserService } from '../services/DataParserService.js';
import { executeGetDataOverview } from '../services/tools/GetDataOverviewTool.js';
import { picoInferenceService } from '../services/PicoInferenceService.js';
import { sessionBlackboardService } from '../services/SessionBlackboardService.js';
function getUserId(request: FastifyRequest): string {
const userId = (request as any).user?.userId;
@@ -87,6 +90,15 @@ export default async function sessionRoutes(app: FastifyInstance) {
sessionId: session.id,
hasFile: !!dataOssKey
});
// Phase I: 文件上传后异步触发 data overview + PICO 推断(不阻塞响应)
if (dataOssKey) {
triggerDataOverviewAsync(session.id).catch((err) => {
logger.error('[SSA:Session] Async data overview trigger failed', {
sessionId: session.id, error: err.message,
});
});
}
// 返回前端期望的格式
return reply.send({
@@ -135,4 +147,106 @@ export default async function sessionRoutes(app: FastifyInstance) {
return reply.send(messages);
});
/**
* GET /sessions/:id/data-context/stream
* Phase I: SSE 端点 — 实时推送 data overview + PICO 推断进度
*/
app.get('/:id/data-context/stream', async (req, reply) => {
const { id } = req.params as { id: string };
reply.raw.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
});
const send = (type: string, data: any) => {
reply.raw.write(`data: ${JSON.stringify({ type, ...data })}\n\n`);
};
send('connected', { sessionId: id });
const heartbeat = setInterval(() => {
reply.raw.write(':heartbeat\n\n');
}, 15000);
const cleanup = () => {
clearInterval(heartbeat);
reply.raw.end();
};
req.raw.on('close', cleanup);
try {
// Step 1: Data Overview
send('data_overview_start', { message: '正在生成数据概览...' });
const overviewResult = await executeGetDataOverview(id);
if (!overviewResult.success) {
send('data_overview_error', { error: overviewResult.error });
cleanup();
return;
}
send('data_overview_complete', {
report: overviewResult.report,
});
// Step 2: PICO Inference
if (overviewResult.blackboard?.dataOverview && overviewResult.blackboard?.variableDictionary) {
send('pico_start', { message: '正在推断 PICO 结构...' });
const pico = await picoInferenceService.inferFromOverview(
id,
overviewResult.blackboard.dataOverview,
overviewResult.blackboard.variableDictionary,
);
if (pico) {
send('pico_complete', { picoInference: pico });
} else {
send('pico_skip', { message: 'PICO 推断跳过LLM 不可用或推断失败)' });
}
}
// Step 3: 完成
const finalBlackboard = await sessionBlackboardService.get(id);
send('data_context_complete', {
blackboard: finalBlackboard,
});
} catch (error: any) {
logger.error('[SSA:SSE] Data context stream failed', { sessionId: id, error: error.message });
send('data_context_error', { error: error.message });
} finally {
cleanup();
}
});
}
/**
* 异步触发 data overviewfire-and-forget不阻塞 session 创建响应)
*/
async function triggerDataOverviewAsync(sessionId: string): Promise<void> {
logger.info('[SSA:AutoTrigger] Starting async data overview', { sessionId });
const result = await executeGetDataOverview(sessionId);
if (!result.success) {
logger.warn('[SSA:AutoTrigger] Data overview failed', { sessionId, error: result.error });
return;
}
logger.info('[SSA:AutoTrigger] Data overview complete, starting PICO inference', { sessionId });
if (result.blackboard?.dataOverview && result.blackboard?.variableDictionary) {
await picoInferenceService.inferFromOverview(
sessionId,
result.blackboard.dataOverview,
result.blackboard.variableDictionary,
);
}
logger.info('[SSA:AutoTrigger] Full data context pipeline complete', { sessionId });
}

View File

@@ -0,0 +1,196 @@
/**
* AskUserService — 统一交互卡片服务
*
* H3: 统一 AskUser 领域模型,取代旧 Clarification 概念。
* H1: 支持 skip/打断 — clearPending 逃生门。
*
* 职责:
* 1. 构建 AskUserEventSSE 推送给前端)
* 2. 解析 AskUserResponse前端用户回复
* 3. 持久化 pendingAskUser 到 SessionBlackboard
* 4. 全局打断清理
*/
import { randomUUID } from 'crypto';
import { logger } from '../../../common/logging/index.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
// ────────────────────────────────────────────
// Types (H3: 统一模型)
// ────────────────────────────────────────────
export interface AskUserOption {
label: string;
value: string;
description?: string;
}
export interface AskUserEvent {
type: 'ask_user';
questionId: string;
question: string;
context?: string;
inputType: 'single_select' | 'multi_select' | 'free_text' | 'confirm';
options?: AskUserOption[];
defaultValue?: string;
metadata?: Record<string, any>;
}
export interface AskUserResponse {
questionId: string;
action: 'select' | 'skip' | 'free_text';
selectedValues?: string[];
freeText?: string;
metadata?: Record<string, any>;
}
export interface PendingAskUser {
questionId: string;
question: string;
inputType: AskUserEvent['inputType'];
metadata?: Record<string, any>;
createdAt: string;
}
// ────────────────────────────────────────────
// Service
// ────────────────────────────────────────────
export class AskUserService {
/**
* 构建 ask_user 事件并写入黑板 pending 状态
*/
async createQuestion(
sessionId: string,
opts: {
question: string;
context?: string;
inputType: AskUserEvent['inputType'];
options?: AskUserOption[];
defaultValue?: string;
metadata?: Record<string, any>;
},
): Promise<AskUserEvent> {
const questionId = randomUUID();
const event: AskUserEvent = {
type: 'ask_user',
questionId,
question: opts.question,
context: opts.context,
inputType: opts.inputType,
options: opts.options,
defaultValue: opts.defaultValue,
metadata: opts.metadata,
};
// 持久化到黑板
const pending: PendingAskUser = {
questionId,
question: opts.question,
inputType: opts.inputType,
metadata: opts.metadata,
createdAt: new Date().toISOString(),
};
await sessionBlackboardService.patch(sessionId, {
pendingAskUser: pending,
} as any);
logger.info('[SSA:AskUser] Question created', { sessionId, questionId, inputType: opts.inputType });
return event;
}
/**
* 解析用户回复
*/
parseResponse(metadata: Record<string, any>): AskUserResponse | null {
const raw = metadata?.askUserResponse;
if (!raw) return null;
return {
questionId: raw.questionId || '',
action: raw.action || 'select',
selectedValues: raw.selectedValues,
freeText: raw.freeText,
metadata: raw.metadata,
};
}
/**
* 检查 session 是否有挂起的 ask_user
*/
async getPending(sessionId: string): Promise<PendingAskUser | null> {
const bb = await sessionBlackboardService.get(sessionId);
return (bb as any)?.pendingAskUser ?? null;
}
/**
* H1: 清除挂起状态(用户跳过或打断时调用)
*/
async clearPending(sessionId: string): Promise<void> {
await sessionBlackboardService.patch(sessionId, {
pendingAskUser: null,
} as any);
logger.info('[SSA:AskUser] Pending cleared', { sessionId });
}
/**
* 构建方法确认卡片consult 意图专用)
*/
buildMethodConfirmQuestion(
primaryMethodName: string,
primaryMethodCode: string,
fallbackMethodName?: string,
): {
question: string;
context: string;
inputType: 'confirm';
options: AskUserOption[];
metadata: Record<string, any>;
} {
const options: AskUserOption[] = [
{
label: `确认,使用${primaryMethodName}`,
value: 'confirm',
description: '系统将开始执行分析',
},
];
if (fallbackMethodName) {
options.push({
label: `改用${fallbackMethodName}`,
value: 'use_fallback',
description: '使用备选方案',
});
}
options.push({
label: '我想换个方法',
value: 'change_method',
description: '重新描述需求',
});
return {
question: `是否确认使用 ${primaryMethodName} 进行分析?`,
context: '您可以确认执行,选择备选方案,或重新描述需求。',
inputType: 'confirm',
options,
metadata: {
recommendedMethod: primaryMethodCode,
action: 'method_confirm',
},
};
}
/**
* 将 AskUserEvent 序列化为 SSE data 行
*/
formatSSE(event: AskUserEvent): string {
return `data: ${JSON.stringify(event)}\n\n`;
}
}
export const askUserService = new AskUserService();

View File

@@ -0,0 +1,510 @@
/**
* Phase II — 意图处理器Intent Handlers
*
* 按意图类型分发处理逻辑:
* - chat: 直接 LLM 对话
* - explore: READ 工具 + LLM 解读
* - analyze: 转入 QPER 流水线 + LLM 摘要
* - consult: LLM 方法推荐Phase III 增强)
* - discuss: LLM 结果解读Phase V 增强)
* - feedback: LLM 改进建议Phase V 增强)
*/
import { logger } from '../../../common/logging/index.js';
import { conversationService, type StreamWriter } from './ConversationService.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import { tokenTruncationService } from './TokenTruncationService.js';
import { methodConsultService } from './MethodConsultService.js';
import { askUserService, type AskUserResponse } from './AskUserService.js';
import { toolOrchestratorService } from './ToolOrchestratorService.js';
import type { IntentType } from './SystemPromptService.js';
import type { IntentResult } from './IntentRouterService.js';
export interface HandleResult {
messageId: string;
intent: IntentType;
success: boolean;
error?: string;
}
export class ChatHandlerService {
/**
* 统一处理入口:按意图分发
*/
async handle(
sessionId: string,
conversationId: string,
userContent: string,
intentResult: IntentResult,
writer: StreamWriter,
placeholderMessageId: string,
): Promise<HandleResult> {
const intent = intentResult.intent;
try {
// 如果上下文守卫被触发且有提示消息,直接作为 LLM 上下文的一部分
let toolOutputs: string | undefined;
if (intentResult.guardTriggered && intentResult.guardMessage) {
toolOutputs = `[系统提示] ${intentResult.guardMessage}`;
}
switch (intent) {
case 'chat':
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, intent, toolOutputs);
case 'explore':
return await this.handleExplore(sessionId, conversationId, writer, placeholderMessageId, toolOutputs);
case 'consult':
return await this.handleConsult(sessionId, conversationId, writer, placeholderMessageId, toolOutputs);
case 'analyze':
return await this.handleAnalyze(sessionId, conversationId, userContent, writer, placeholderMessageId, toolOutputs);
case 'discuss':
return await this.handleDiscuss(sessionId, conversationId, writer, placeholderMessageId, toolOutputs);
case 'feedback':
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, 'feedback', toolOutputs);
default:
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, 'chat', toolOutputs);
}
} catch (error: any) {
logger.error('[SSA:ChatHandler] Handler error', {
sessionId, intent, error: error.message,
});
await conversationService.markAssistantError(placeholderMessageId, error.message);
return {
messageId: placeholderMessageId,
intent,
success: false,
error: error.message,
};
}
}
// ────────────────────────────────────────────
// chat / consult / feedback — 直接 LLM 对话
// ────────────────────────────────────────────
private async handleChat(
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
intent: IntentType,
toolOutputs?: string,
): Promise<HandleResult> {
const messages = await conversationService.buildContext(
sessionId, conversationId, intent, toolOutputs,
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.7,
maxTokens: 2000,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return {
messageId: placeholderMessageId,
intent,
success: true,
};
}
// ────────────────────────────────────────────
// explore — 数据探索READ 工具 + LLM 解读)
// ────────────────────────────────────────────
private async handleExplore(
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
guardToolOutput?: string,
): Promise<HandleResult> {
// 从 SessionBlackboard 提取数据摘要作为 tool output
const blackboard = await sessionBlackboardService.get(sessionId);
let toolOutputs = guardToolOutput || '';
if (blackboard) {
const truncated = tokenTruncationService.truncate(blackboard, {
maxTokens: 1500,
strategy: 'balanced',
});
const exploreData: string[] = [];
if (truncated.overview) {
exploreData.push(`数据概览:\n${truncated.overview}`);
}
if (truncated.variables) {
exploreData.push(`变量列表:\n${truncated.variables}`);
}
if (truncated.pico) {
exploreData.push(`PICO 推断:\n${truncated.pico}`);
}
if (truncated.report) {
exploreData.push(`数据诊断:\n${truncated.report}`);
}
if (exploreData.length > 0) {
toolOutputs = (toolOutputs ? toolOutputs + '\n\n' : '') + exploreData.join('\n\n');
}
}
const messages = await conversationService.buildContext(
sessionId, conversationId, 'explore', toolOutputs || undefined,
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.7,
maxTokens: 2000,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return {
messageId: placeholderMessageId,
intent: 'explore',
success: true,
};
}
// ────────────────────────────────────────────
// analyze — 转入 QPER混合模式LLM 摘要 + WorkspacePane 详情)
// ────────────────────────────────────────────
/**
* Phase IV: analyze 意图 — 对话驱动分析
*
* 流程: plan 生成 → SSE 推 analysis_plan → LLM 方案说明 → ask_user 确认
* 执行由前端通过 /workflow/{id}/stream 触发D1: 保留独立 workflow SSE
*/
private async handleAnalyze(
sessionId: string,
conversationId: string,
userMessage: string,
writer: StreamWriter,
placeholderMessageId: string,
guardToolOutput?: string,
): Promise<HandleResult> {
// 1. 调用 ToolOrchestratorService 生成计划D5: PICO hint 自动注入)
const planResult = await toolOrchestratorService.plan(sessionId, userMessage);
if (!planResult.success || !planResult.plan) {
const fallbackHint = [
guardToolOutput,
`[系统提示] 分析计划生成失败: ${planResult.error || '未知错误'}`,
'请友好地告知用户需要更明确的分析需求描述,例如需要指明要分析哪些变量、比较什么。',
].filter(Boolean).join('\n');
const messages = await conversationService.buildContext(
sessionId, conversationId, 'analyze', fallbackHint,
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.5, maxTokens: 800,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
}
const plan = planResult.plan;
// 2. SSE 推送 analysis_plan 事件D2: 前端自动创建 AnalysisRecord
const planEvent = `data: ${JSON.stringify({
type: 'analysis_plan',
plan,
})}\n\n`;
writer.write(planEvent);
logger.info('[SSA:ChatHandler] analysis_plan pushed via SSE', {
sessionId, workflowId: plan.workflow_id, totalSteps: plan.total_steps,
});
// 3. LLM 流式生成方案说明
const planSummary = toolOrchestratorService.formatPlanForLLM(plan);
const toolOutputs = [
guardToolOutput,
planSummary,
'[系统提示] 你刚刚为用户制定了上述分析方案。请用自然语言向用户解释这个方案:包括为什么选这些方法、分析步骤的逻辑。不要重复列步骤编号和工具代码,要用用户能理解的语言说明。最后提示用户确认方案后即可执行。',
].filter(Boolean).join('\n\n');
const messages = await conversationService.buildContext(
sessionId, conversationId, 'analyze', toolOutputs,
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.5, maxTokens: 1200,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
// 4. 推送 ask_user 确认卡片(复用 Phase III AskUserService
const confirmQ = {
inputType: 'confirm' as const,
question: '请确认上述分析方案',
context: `方案: ${plan.title},共 ${plan.total_steps} 个步骤`,
options: [
{ id: 'confirm_plan', label: '确认执行', value: 'confirm_plan' },
{ id: 'change_method', label: '修改方案', value: 'change_method' },
],
metadata: {
workflowId: plan.workflow_id,
planTitle: plan.title,
},
};
const event = await askUserService.createQuestion(sessionId, confirmQ);
writer.write(askUserService.formatSSE(event));
return {
messageId: placeholderMessageId,
intent: 'analyze',
success: true,
};
}
// ────────────────────────────────────────────
// discuss — 结果讨论(注入分析结果上下文)
// ────────────────────────────────────────────
private async handleDiscuss(
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
guardToolOutput?: string,
): Promise<HandleResult> {
const blackboard = await sessionBlackboardService.get(sessionId);
let toolOutputs = guardToolOutput || '';
// 注入 QPER trace 摘要
if (blackboard?.qperTrace && blackboard.qperTrace.length > 0) {
const traceItems = blackboard.qperTrace
.filter(t => t.status === 'success')
.slice(-5)
.map(t => `- 步骤${t.stepIndex}: ${t.toolCode}${t.summary}`)
.join('\n');
if (traceItems) {
toolOutputs = (toolOutputs ? toolOutputs + '\n\n' : '') +
`最近分析结果:\n${traceItems}`;
}
}
const messages = await conversationService.buildContext(
sessionId, conversationId, 'discuss', toolOutputs || undefined,
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.7,
maxTokens: 2000,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return {
messageId: placeholderMessageId,
intent: 'discuss',
success: true,
};
}
// ────────────────────────────────────────────
// consult — 方法推荐Phase III: method_consult + ask_user
// ────────────────────────────────────────────
private async handleConsult(
sessionId: string,
conversationId: string,
writer: StreamWriter,
placeholderMessageId: string,
guardToolOutput?: string,
): Promise<HandleResult> {
let toolOutputs = guardToolOutput || '';
// 1. 调用 MethodConsultService 获取推荐
const recommendation = await methodConsultService.recommend(sessionId);
const recText = methodConsultService.formatForLLM(recommendation);
toolOutputs = (toolOutputs ? toolOutputs + '\n\n' : '') + recText;
logger.info('[SSA:ChatHandler] Method consult result', {
sessionId,
matched: recommendation.matched,
primaryMethod: recommendation.primaryMethod?.code,
matchScore: recommendation.matchScore,
needsClarification: recommendation.needsClarification,
});
// 2. LLM 生成自然语言推荐P1: 结论先行 + 结构化列表)
const messages = await conversationService.buildContext(
sessionId, conversationId, 'consult', toolOutputs,
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.5,
maxTokens: 1500,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
// 3. 如果有明确推荐,推送 ask_user 确认卡片
if (recommendation.matched && recommendation.primaryMethod) {
const confirmQ = askUserService.buildMethodConfirmQuestion(
recommendation.primaryMethod.name,
recommendation.primaryMethod.code,
recommendation.fallbackMethod?.name,
);
const event = await askUserService.createQuestion(sessionId, confirmQ);
writer.write(askUserService.formatSSE(event));
}
return {
messageId: placeholderMessageId,
intent: 'consult',
success: true,
};
}
// ────────────────────────────────────────────
// ask_user 响应处理Phase III
// ────────────────────────────────────────────
async handleAskUserResponse(
sessionId: string,
conversationId: string,
response: AskUserResponse,
writer: StreamWriter,
placeholderMessageId: string,
): Promise<HandleResult> {
// 清除 pending 状态
await askUserService.clearPending(sessionId);
if (response.action === 'skip') {
// 用户跳过 — 生成友好回复
const messages = await conversationService.buildContext(
sessionId, conversationId, 'chat',
'[系统提示] 用户跳过了上一个确认问题。请友好地回应,表示随时可以继续。',
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.7,
maxTokens: 500,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return { messageId: placeholderMessageId, intent: 'chat', success: true };
}
// 用户选择了具体选项
const selectedValue = response.selectedValues?.[0];
if (selectedValue === 'confirm_plan') {
// Phase IV: 确认分析方案 → 前端将触发 executeWorkflow
const workflowId = response.metadata?.workflowId || '';
const messages = await conversationService.buildContext(
sessionId, conversationId, 'analyze',
`[系统提示] 用户已确认分析方案workflow: ${workflowId})。请简要确认:"好的,方案已确认,正在准备执行分析..."。`,
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.3, maxTokens: 300,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
// 推送 plan_confirmed 事件,前端据此触发 executeWorkflow
const confirmEvent = `data: ${JSON.stringify({
type: 'plan_confirmed',
workflowId,
})}\n\n`;
writer.write(confirmEvent);
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
} else if (selectedValue === 'confirm') {
// Phase III: 确认使用推荐方法 → 提示可以开始分析
const messages = await conversationService.buildContext(
sessionId, conversationId, 'analyze',
'[系统提示] 用户已确认使用推荐的统计方法。请简要确认方案,告知用户可以在对话中说"开始分析"或在右侧面板触发执行。',
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.5,
maxTokens: 800,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return { messageId: placeholderMessageId, intent: 'analyze', success: true };
} else if (selectedValue === 'use_fallback') {
// 使用备选方案
const messages = await conversationService.buildContext(
sessionId, conversationId, 'consult',
'[系统提示] 用户选择使用备选方案。请确认切换,并简要说明备选方案的适用场景。',
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.5,
maxTokens: 800,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return { messageId: placeholderMessageId, intent: 'consult', success: true };
} else if (selectedValue === 'change_method') {
// 用户想换方法 → 引导重新描述
const messages = await conversationService.buildContext(
sessionId, conversationId, 'consult',
'[系统提示] 用户不满意当前推荐,想换方法。请询问用户希望使用什么方法,或引导其更详细地描述分析需求。',
);
const result = await conversationService.streamToSSE(messages, writer, {
temperature: 0.7,
maxTokens: 800,
});
await conversationService.finalizeAssistantMessage(
placeholderMessageId, result.content, result.thinking, result.tokens,
);
return { messageId: placeholderMessageId, intent: 'consult', success: true };
}
// 其他情况fallback
return await this.handleChat(sessionId, conversationId, writer, placeholderMessageId, 'chat');
}
}
export const chatHandlerService = new ChatHandlerService();

View File

@@ -0,0 +1,382 @@
/**
* Phase II — 对话核心服务
*
* 职责:
* - 对话历史持久化(复用 aia_schema.conversations + messages
* - Placeholder 占位机制H3 竞态保护)
* - LLM 流式调用 + OpenAI Compatible SSE 输出
* - 5 秒心跳保活H1
* - 消息上下文组装(滑动窗口)
*
* 架构约束:
* - C1: 对话层 LLM 禁止 Function Calling
* - C3: 默认使用 chatStream 流式输出
*/
import { prisma } from '../../../config/database.js';
import { logger } from '../../../common/logging/index.js';
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import type { Message as LLMMessage, StreamChunk } from '../../../common/llm/adapters/types.js';
import { systemPromptService, type IntentType } from './SystemPromptService.js';
const MAX_CONTEXT_MESSAGES = 20;
const DEFAULT_MODEL = 'deepseek-v3';
const HEARTBEAT_INTERVAL_MS = 5000;
const SSA_AGENT_ID = 'SSA_ANALYST';
export interface ChatRequest {
content: string;
enableDeepThinking?: boolean;
}
export interface StreamWriter {
write(data: string): boolean;
end(): void;
on(event: string, handler: () => void): void;
}
export class ConversationService {
// ────────────────────────────────────────────
// Conversation CRUD
// ────────────────────────────────────────────
/**
* 获取或创建 session 关联的 conversation延迟创建模式
*/
async getOrCreateConversation(sessionId: string, userId: string): Promise<string> {
const existing = await prisma.conversation.findFirst({
where: {
agentId: SSA_AGENT_ID,
metadata: { path: ['sessionId'], equals: sessionId },
deletedAt: null,
},
});
if (existing) return existing.id;
const session = await prisma.ssaSession.findUnique({ where: { id: sessionId } });
const title = session?.title || '统计分析对话';
const conversation = await prisma.conversation.create({
data: {
userId,
agentId: SSA_AGENT_ID,
title,
modelName: DEFAULT_MODEL,
metadata: { sessionId },
},
});
logger.info('[SSA:Conv] Conversation created', {
conversationId: conversation.id,
sessionId,
});
return conversation.id;
}
// ────────────────────────────────────────────
// Message persistence
// ────────────────────────────────────────────
/**
* 保存用户消息到 DB
*/
async saveUserMessage(
conversationId: string,
content: string,
metadata?: Record<string, any>,
): Promise<string> {
const msg = await prisma.message.create({
data: {
conversationId,
role: 'user',
content,
metadata: metadata ?? undefined,
},
});
await this.incrementMessageCount(conversationId);
return msg.id;
}
/**
* 创建 assistant placeholderH3 竞态保护)
* 在 LLM 流式输出之前立即创建,确保并发请求能感知到"正在生成"
*/
async createAssistantPlaceholder(
conversationId: string,
intent: IntentType,
): Promise<string> {
const msg = await prisma.message.create({
data: {
conversationId,
role: 'assistant',
content: '',
model: DEFAULT_MODEL,
metadata: { intent, status: 'generating' },
},
});
logger.debug('[SSA:Conv] Assistant placeholder created', {
messageId: msg.id,
intent,
});
return msg.id;
}
/**
* 更新 placeholder 为最终内容
*/
async finalizeAssistantMessage(
messageId: string,
content: string,
thinkingContent?: string,
tokens?: number,
): Promise<void> {
const existing = await prisma.message.findUnique({ where: { id: messageId } });
const prevMeta = (existing?.metadata as Record<string, any>) || {};
await prisma.message.update({
where: { id: messageId },
data: {
content,
thinkingContent: thinkingContent || undefined,
tokens,
metadata: { ...prevMeta, status: 'complete' },
},
});
const msg = await prisma.message.findUnique({
where: { id: messageId },
select: { conversationId: true },
});
if (msg) {
await this.incrementMessageCount(msg.conversationId);
}
}
/**
* 标记 placeholder 为错误状态(崩溃恢复)
*/
async markAssistantError(messageId: string, error: string): Promise<void> {
const existing = await prisma.message.findUnique({ where: { id: messageId } });
const prevMeta = (existing?.metadata as Record<string, any>) || {};
await prisma.message.update({
where: { id: messageId },
data: {
content: `[生成中断] ${error}`,
metadata: { ...prevMeta, status: 'error', error },
},
});
}
// ────────────────────────────────────────────
// Context building
// ────────────────────────────────────────────
/**
* 构建 LLM 消息上下文System Prompt + 对话历史)
*/
async buildContext(
sessionId: string,
conversationId: string,
intent: IntentType,
toolOutputs?: string,
): Promise<LLMMessage[]> {
const systemPrompt = await systemPromptService.assemble(
sessionId,
intent,
toolOutputs,
);
const history = await prisma.message.findMany({
where: { conversationId },
orderBy: { createdAt: 'asc' },
});
// 滑动窗口:取最近 N 条有效消息(排除 generating 状态的 placeholder
const validMessages = history.filter(m => {
if (m.role === 'assistant') {
const meta = m.metadata as any;
return meta?.status !== 'generating';
}
return true;
});
const recentMessages = validMessages.slice(-MAX_CONTEXT_MESSAGES);
const messages: LLMMessage[] = [
{ role: 'system', content: systemPrompt },
];
for (const msg of recentMessages) {
messages.push({
role: msg.role as 'user' | 'assistant',
content: msg.content,
});
}
logger.debug('[SSA:Conv] Context built', {
sessionId,
intent,
historyCount: recentMessages.length,
systemPromptLength: systemPrompt.length,
});
return messages;
}
// ────────────────────────────────────────────
// LLM Streaming (C1: no function calling, C3: chatStream)
// ────────────────────────────────────────────
/**
* 执行 LLM 流式输出并写入 SSE 流
*
* 包含 5 秒心跳保活H1和标准化错误事件H1
*/
async streamToSSE(
messages: LLMMessage[],
writer: StreamWriter,
options?: {
model?: string;
temperature?: number;
maxTokens?: number;
onComplete?: (content: string, thinking: string) => void;
},
): Promise<{ content: string; thinking: string; tokens?: number }> {
const model = options?.model || DEFAULT_MODEL;
const adapter = LLMFactory.getAdapter(model as any);
let accContent = '';
let accThinking = '';
let clientDisconnected = false;
writer.on('close', () => {
clientDisconnected = true;
});
// H1: 5 秒心跳保活
const heartbeat = setInterval(() => {
if (!clientDisconnected) {
try {
writer.write(': keep-alive\n\n');
} catch { /* ignore */ }
}
}, HEARTBEAT_INTERVAL_MS);
try {
const stream = adapter.chatStream(messages, {
temperature: options?.temperature ?? 0.7,
maxTokens: options?.maxTokens ?? 2000,
});
for await (const chunk of stream) {
if (clientDisconnected) break;
if (chunk.content) {
accContent += chunk.content;
const sseData = JSON.stringify({
id: `chatcmpl-ssa-${Date.now()}`,
object: 'chat.completion.chunk',
choices: [{
delta: { content: chunk.content },
finish_reason: null,
}],
});
writer.write(`data: ${sseData}\n\n`);
}
if (chunk.done) {
const doneData = JSON.stringify({
id: `chatcmpl-ssa-${Date.now()}`,
object: 'chat.completion.chunk',
choices: [{
delta: {},
finish_reason: 'stop',
}],
usage: chunk.usage,
});
writer.write(`data: ${doneData}\n\n`);
writer.write('data: [DONE]\n\n');
}
}
options?.onComplete?.(accContent, accThinking);
return {
content: accContent,
thinking: accThinking,
tokens: undefined,
};
} catch (error: any) {
logger.error('[SSA:Conv] Stream error', { error: error.message });
if (!clientDisconnected) {
const errorData = JSON.stringify({
type: 'error',
code: 'STREAM_ERROR',
message: error.message || '生成回复时发生错误',
});
try {
writer.write(`data: ${errorData}\n\n`);
} catch { /* ignore */ }
}
throw error;
} finally {
clearInterval(heartbeat);
}
}
// ────────────────────────────────────────────
// Conversation history API
// ────────────────────────────────────────────
async getMessages(conversationId: string): Promise<any[]> {
return prisma.message.findMany({
where: { conversationId },
orderBy: { createdAt: 'asc' },
select: {
id: true,
role: true,
content: true,
thinkingContent: true,
metadata: true,
createdAt: true,
},
});
}
async getConversationBySession(sessionId: string): Promise<any | null> {
return prisma.conversation.findFirst({
where: {
agentId: SSA_AGENT_ID,
metadata: { path: ['sessionId'], equals: sessionId },
deletedAt: null,
},
});
}
// ────────────────────────────────────────────
// Helpers
// ────────────────────────────────────────────
private async incrementMessageCount(conversationId: string): Promise<void> {
try {
await prisma.conversation.update({
where: { id: conversationId },
data: { messageCount: { increment: 1 } },
});
} catch (err) {
logger.warn('[SSA:Conv] Failed to increment message count', { conversationId });
}
}
}
export const conversationService = new ConversationService();

View File

@@ -349,6 +349,66 @@ export class DataProfileService {
return lines.join('\n');
}
/**
* Phase I: 获取单变量详细分析(调用 Python variable-detail 端点)
*
* @param sessionId SSA 会话 ID
* @param variableName 目标变量名
*/
async getVariableDetail(sessionId: string, variableName: string): Promise<any> {
try {
const csvContent = await this.loadCSVFromSession(sessionId);
if (!csvContent) {
return { success: false, error: 'No CSV data available for session' };
}
const response = await this.client.post('/api/ssa/variable-detail', {
csv_content: csvContent,
variable_name: variableName,
max_bins: 30,
max_qq_points: 200,
});
return response.data;
} catch (error: any) {
logger.error('[SSA:DataProfile] Variable detail failed', {
sessionId, variableName, error: error.message,
});
return { success: false, error: error.message };
}
}
/**
* 从 Session 加载原始 CSV 字符串(供 variable-detail 复用)
*/
private async loadCSVFromSession(sessionId: string): Promise<string | null> {
const session = await prisma.ssaSession.findUnique({ where: { id: sessionId } });
if (!session) return null;
if (session.dataOssKey) {
const buffer = await storage.download(session.dataOssKey);
return buffer.toString('utf-8');
}
if (session.dataPayload) {
const rows = session.dataPayload as unknown as Record<string, any>[];
if (rows.length === 0) return null;
const cols = Object.keys(rows[0]);
const lines = [cols.join(',')];
for (const row of rows) {
lines.push(cols.map(c => {
const v = row[c];
if (v === null || v === undefined) return '';
const s = String(v);
return s.includes(',') || s.includes('"') ? `"${s.replace(/"/g, '""')}"` : s;
}).join(','));
}
return lines.join('\n');
}
return null;
}
}
// 单例导出

View File

@@ -0,0 +1,325 @@
/**
* Phase II — 意图路由服务
*
* 混合路由策略:
* 1. 规则引擎优先(零延迟,零 Token
* 2. LLM 兜底(仅规则无法判断时)
* 3. 上下文守卫C5数据依赖意图必须有上下文
*
* 路由结果chat | explore | consult | analyze | discuss | feedback
* 默认安全兜底chat
*/
import { readFileSync } from 'fs';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { logger } from '../../../common/logging/index.js';
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import { getPromptService } from '../../../common/prompt/index.js';
import { prisma } from '../../../config/database.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import type { IntentType } from './SystemPromptService.js';
import type { Message } from '../../../common/llm/adapters/types.js';
// ────────────────────────────────────────────
// Types
// ────────────────────────────────────────────
interface IntentRule {
intent: IntentType;
keywords: string[];
excludeKeywords?: string[];
requires?: string[];
priority: number;
}
interface ContextGuard {
requires: string[];
fallbackMessage: string;
}
interface IntentRulesConfig {
rules: IntentRule[];
contextGuards: Record<string, ContextGuard>;
defaultIntent: IntentType;
}
export interface IntentResult {
intent: IntentType;
confidence: number;
source: 'rules' | 'llm' | 'default' | 'guard';
guardTriggered?: boolean;
guardMessage?: string;
originalIntent?: IntentType;
}
interface SessionContext {
hasDataOverview: boolean;
hasAnalysisResults: boolean;
hasPico: boolean;
lastIntent?: IntentType;
}
// ────────────────────────────────────────────
// Service
// ────────────────────────────────────────────
class IntentRouterService {
private rules: IntentRulesConfig;
constructor() {
this.rules = this.loadRules();
}
/**
* 核心分类方法
*/
async classify(
message: string,
sessionId: string,
): Promise<IntentResult> {
const context = await this.buildSessionContext(sessionId);
// Step 1: 规则引擎(零延迟)
const ruleResult = this.matchRules(message, context);
if (ruleResult && ruleResult.confidence >= 0.8) {
const guarded = this.applyContextGuard(ruleResult, context);
logger.info('[SSA:IntentRouter] Rule match', {
sessionId,
intent: guarded.intent,
confidence: guarded.confidence,
guardTriggered: guarded.guardTriggered,
});
return guarded;
}
// Step 2: LLM 兜底(仅规则无法判断时)
try {
const llmResult = await this.llmClassify(message, context);
if (llmResult.confidence >= 0.7) {
const guarded = this.applyContextGuard(llmResult, context);
logger.info('[SSA:IntentRouter] LLM classify', {
sessionId,
intent: guarded.intent,
confidence: guarded.confidence,
guardTriggered: guarded.guardTriggered,
});
return guarded;
}
} catch (err: any) {
logger.warn('[SSA:IntentRouter] LLM classify failed, using default', {
error: err.message,
});
}
// Step 3: 默认兜底 → chat最安全
logger.info('[SSA:IntentRouter] Default fallback to chat', { sessionId });
return {
intent: 'chat',
confidence: 0.5,
source: 'default',
};
}
// ────────────────────────────────────────────
// 规则引擎
// ────────────────────────────────────────────
private matchRules(message: string, context: SessionContext): IntentResult | null {
const normalizedMsg = message.toLowerCase().trim();
const matches: Array<{ rule: IntentRule; score: number }> = [];
for (const rule of this.rules.rules) {
if (rule.excludeKeywords?.some(kw => normalizedMsg.includes(kw.toLowerCase()))) {
continue;
}
const matchedKeywords = rule.keywords.filter(kw =>
normalizedMsg.includes(kw.toLowerCase())
);
if (matchedKeywords.length > 0) {
const score = matchedKeywords.length * rule.priority;
matches.push({ rule, score });
}
}
if (matches.length === 0) return null;
matches.sort((a, b) => b.score - a.score);
const best = matches[0];
return {
intent: best.rule.intent,
confidence: Math.min(0.95, 0.7 + best.score * 0.05),
source: 'rules',
};
}
// ────────────────────────────────────────────
// LLM 分类(轻量级,<500 tokens
// ────────────────────────────────────────────
private async llmClassify(message: string, context: SessionContext): Promise<IntentResult> {
const promptService = getPromptService(prisma);
let systemPrompt: string;
try {
const rendered = await promptService.get('SSA_INTENT_ROUTER', {});
systemPrompt = rendered.content;
} catch {
systemPrompt = this.fallbackRouterPrompt();
}
const contextDesc = [
`有数据: ${context.hasDataOverview ? '是' : '否'}`,
`有分析结果: ${context.hasAnalysisResults ? '是' : '否'}`,
`有PICO: ${context.hasPico ? '是' : '否'}`,
].join(', ');
const adapter = LLMFactory.getAdapter('deepseek-v3');
const messages: Message[] = [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: `会话状态: ${contextDesc}\n用户消息: ${message}` },
];
const response = await adapter.chat(messages, {
temperature: 0.1,
maxTokens: 100,
});
return this.parseLLMResponse(response.content);
}
private parseLLMResponse(text: string): IntentResult {
const cleaned = text.trim().toLowerCase();
try {
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
const validIntents: IntentType[] = ['chat', 'explore', 'consult', 'analyze', 'discuss', 'feedback'];
const intent = validIntents.includes(parsed.intent) ? parsed.intent : 'chat';
const confidence = typeof parsed.confidence === 'number'
? Math.min(1, Math.max(0, parsed.confidence))
: 0.6;
return { intent, confidence, source: 'llm' };
}
} catch { /* fall through */ }
const intentMap: Record<string, IntentType> = {
chat: 'chat', explore: 'explore', consult: 'consult',
analyze: 'analyze', discuss: 'discuss', feedback: 'feedback',
};
for (const [key, intent] of Object.entries(intentMap)) {
if (cleaned.includes(key)) {
return { intent, confidence: 0.7, source: 'llm' };
}
}
return { intent: 'chat', confidence: 0.5, source: 'llm' };
}
// ────────────────────────────────────────────
// 上下文守卫C5
// ────────────────────────────────────────────
private applyContextGuard(result: IntentResult, context: SessionContext): IntentResult {
const guard = this.rules.contextGuards[result.intent];
if (!guard) return result;
const unmet = guard.requires.filter(req => {
if (req === 'dataOverview') return !context.hasDataOverview;
if (req === 'hasAnalysisResults') return !context.hasAnalysisResults;
return false;
});
if (unmet.length === 0) return result;
// consult 意图特殊处理:即使没有数据,也可以给一般性建议
if (result.intent === 'consult') {
return {
...result,
guardTriggered: true,
guardMessage: guard.fallbackMessage,
};
}
return {
intent: 'chat',
confidence: result.confidence,
source: 'guard',
guardTriggered: true,
guardMessage: guard.fallbackMessage,
originalIntent: result.intent,
};
}
// ────────────────────────────────────────────
// 会话上下文构建
// ────────────────────────────────────────────
private async buildSessionContext(sessionId: string): Promise<SessionContext> {
const blackboard = await sessionBlackboardService.get(sessionId);
let hasAnalysisResults = false;
if (blackboard?.qperTrace && blackboard.qperTrace.length > 0) {
hasAnalysisResults = blackboard.qperTrace.some(t => t.status === 'success');
}
return {
hasDataOverview: !!blackboard?.dataOverview,
hasAnalysisResults,
hasPico: !!blackboard?.picoInference,
};
}
// ────────────────────────────────────────────
// 配置加载
// ────────────────────────────────────────────
private loadRules(): IntentRulesConfig {
try {
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const configPath = join(__dirname, '..', 'config', 'intent_rules.json');
const raw = readFileSync(configPath, 'utf-8');
return JSON.parse(raw);
} catch (err) {
logger.warn('[SSA:IntentRouter] Failed to load intent_rules.json, using defaults');
return this.defaultRules();
}
}
private defaultRules(): IntentRulesConfig {
return {
rules: [
{ intent: 'analyze', keywords: ['分析', '检验', '比较', '回归'], priority: 10, requires: ['dataOverview'] },
{ intent: 'explore', keywords: ['看看', '分布', '缺失', '概况'], priority: 8, requires: ['dataOverview'] },
{ intent: 'discuss', keywords: ['什么意思', '怎么解释', 'p值'], priority: 9, requires: ['dataOverview', 'hasAnalysisResults'] },
],
contextGuards: {},
defaultIntent: 'chat',
};
}
private fallbackRouterPrompt(): string {
return `你是一个意图分类器。根据用户消息和会话状态,判断用户意图。
可选意图:
- chat: 普通对话、统计知识问答
- explore: 探索数据特征、了解数据概况
- consult: 咨询分析方法、请求推荐
- analyze: 要求执行统计分析
- discuss: 讨论已有分析结果
- feedback: 对结果不满意、要求改进
以 JSON 格式输出: {"intent": "xxx", "confidence": 0.9}
只输出 JSON不要输出其他内容。`;
}
}
export const intentRouterService = new IntentRouterService();

View File

@@ -0,0 +1,274 @@
/**
* MethodConsultService — 方法推荐工具
*
* 核心流程:
* 1. 从 SessionBlackboard 读取 dataOverview + picoInference
* 2. 将 PICO 推断映射为 ParsedQuerygoal/outcome/predictor/design
* 3. 调用 DecisionTableService.match() 获取 MatchResult
* 4. 调用 ToolRegistryService.getByCode() 获取工具详情
* 5. 返回 MethodRecommendation供 LLM 生成自然语言推荐)
*
* 设计原则: 只推荐不执行D1
*/
import { logger } from '../../../common/logging/index.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import { decisionTableService, type MatchResult } from './DecisionTableService.js';
import { toolRegistryService } from './ToolRegistryService.js';
import type { PicoInference } from '../types/session-blackboard.types.js';
import type { ParsedQuery, AnalysisGoal, VariableType, StudyDesign } from '../types/query.types.js';
// ────────────────────────────────────────────
// Types
// ────────────────────────────────────────────
export interface MethodDetail {
code: string;
name: string;
description: string;
category: string;
prerequisite?: string;
}
export interface MethodRecommendation {
matched: boolean;
primaryMethod: MethodDetail | null;
fallbackMethod: (MethodDetail & { switchCondition: string }) | null;
matchScore: number;
maxPossibleScore: number;
matchDimensions: {
goal: string;
outcomeType: string | null;
predictorType: string | null;
design: string;
};
needsClarification: boolean;
clarificationReason?: string;
rawMatchResult?: MatchResult;
}
// ────────────────────────────────────────────
// Service
// ────────────────────────────────────────────
export class MethodConsultService {
/**
* 根据 session 上下文推荐统计方法
*/
async recommend(sessionId: string, _userMessage?: string): Promise<MethodRecommendation> {
const bb = await sessionBlackboardService.get(sessionId);
if (!bb) {
return this.noDataRecommendation('Session 黑板为空,无法推荐方法');
}
const pico = bb.picoInference;
const dataOverview = bb.dataOverview;
if (!dataOverview) {
return this.noDataRecommendation('尚未上传数据,无法进行方法推荐');
}
// PICO 不完整时标记需要澄清
if (!pico || !pico.outcome) {
return this.noDataRecommendation(
'尚未完成 PICO 推断(缺少结局变量信息),请先描述您的研究目的和关注的变量',
);
}
// 映射 PICO → ParsedQuery
const parsedQuery = this.picoToQuery(pico, dataOverview);
if (!parsedQuery) {
return this.noDataRecommendation('无法从 PICO 推断中映射出有效的分析参数');
}
// 调用决策表匹配
const matchResult = decisionTableService.match(parsedQuery);
// 构建推荐结果
return this.buildRecommendation(matchResult, parsedQuery);
}
/**
* PICO → ParsedQuery 映射
* 将高层语义Population/Intervention/Comparison/Outcome映射为决策表可理解的四维
*/
private picoToQuery(pico: PicoInference, dataOverview: any): ParsedQuery | null {
const columns = dataOverview.profile?.columns || [];
// 推断 goal
const goal = this.inferGoal(pico);
// 查找 outcome 变量类型
const outcomeCol = columns.find(
(c: any) => c.name === pico.outcome || c.name?.toLowerCase() === pico.outcome?.toLowerCase(),
);
const outcomeType: VariableType | null = outcomeCol
? this.columnToVarType(outcomeCol)
: null;
// 查找 predictor 变量intervention/comparison 映射为分组变量)
const groupingVar = pico.intervention || pico.comparison || null;
const groupCol = groupingVar
? columns.find(
(c: any) => c.name === groupingVar || c.name?.toLowerCase() === groupingVar?.toLowerCase(),
)
: null;
const predictorType: VariableType | null = groupCol
? this.columnToVarType(groupCol)
: null;
// 推断 design
const design: StudyDesign = this.inferDesign(pico);
return {
goal,
outcome_var: pico.outcome,
outcome_type: outcomeType,
predictor_vars: groupingVar ? [groupingVar] : [],
predictor_types: predictorType ? [predictorType] : [],
grouping_var: groupingVar,
design,
confidence: pico.confidence === 'high' ? 0.9 : pico.confidence === 'medium' ? 0.7 : 0.5,
reasoning: `基于 PICO 推断: P=${pico.population}, I=${pico.intervention}, C=${pico.comparison}, O=${pico.outcome}`,
needsClarification: false,
};
}
private inferGoal(pico: PicoInference): AnalysisGoal {
// 有 intervention + comparison → comparison
if (pico.intervention && pico.comparison) return 'comparison';
// 有 intervention 无 comparison → comparison (vs baseline)
if (pico.intervention) return 'comparison';
// 无 intervention → descriptive or correlation
return 'descriptive';
}
private inferDesign(pico: PicoInference): StudyDesign {
// 简单启发式:如果 intervention 暗示配对设计
const pairedKeywords = ['前后', '治疗前', '治疗后', 'before', 'after', '干预前', '干预后'];
const text = `${pico.intervention || ''} ${pico.comparison || ''}`.toLowerCase();
if (pairedKeywords.some(k => text.includes(k))) return 'paired';
return 'independent';
}
private columnToVarType(col: any): VariableType {
const dtype = col.dtype || col.type || '';
if (col.is_id_like) return 'categorical';
if (['float64', 'int64', 'numeric', 'continuous'].includes(dtype)) return 'continuous';
if (col.uniqueValues === 2 || col.unique_count === 2) return 'binary';
return 'categorical';
}
/**
* 将 MatchResult 转换为 MethodRecommendation
*/
private buildRecommendation(matchResult: MatchResult, query: ParsedQuery): MethodRecommendation {
const primaryTool = toolRegistryService.getByCode(matchResult.primaryTool);
const fallbackTool = matchResult.fallbackTool
? toolRegistryService.getByCode(matchResult.fallbackTool)
: null;
const primaryMethod: MethodDetail | null = primaryTool
? {
code: primaryTool.code,
name: primaryTool.name,
description: primaryTool.description,
category: primaryTool.category,
prerequisite: primaryTool.prerequisite,
}
: null;
const fallbackMethod = fallbackTool && matchResult.switchCondition
? {
code: fallbackTool.code,
name: fallbackTool.name,
description: fallbackTool.description,
category: fallbackTool.category,
prerequisite: fallbackTool.prerequisite,
switchCondition: matchResult.switchCondition,
}
: null;
const maxScore = 11; // 4+3+2+2
return {
matched: matchResult.matchScore > 0,
primaryMethod,
fallbackMethod,
matchScore: matchResult.matchScore,
maxPossibleScore: maxScore,
matchDimensions: {
goal: query.goal,
outcomeType: query.outcome_type,
predictorType: query.predictor_types[0] || null,
design: query.design,
},
needsClarification: matchResult.matchScore < 4,
clarificationReason: matchResult.matchScore < 4
? '匹配分数较低,建议补充变量类型或研究设计信息以获得更精准的推荐'
: undefined,
rawMatchResult: matchResult,
};
}
private noDataRecommendation(reason: string): MethodRecommendation {
return {
matched: false,
primaryMethod: null,
fallbackMethod: null,
matchScore: 0,
maxPossibleScore: 11,
matchDimensions: { goal: 'descriptive', outcomeType: null, predictorType: null, design: 'independent' },
needsClarification: true,
clarificationReason: reason,
};
}
/**
* 格式化推荐结果为 LLM 可读文本(注入到 SystemPrompt toolOutputs
*/
formatForLLM(rec: MethodRecommendation): string {
if (!rec.matched || !rec.primaryMethod) {
return [
'## 决策表匹配结果',
`状态: 未匹配到合适方法`,
`原因: ${rec.clarificationReason || '信息不足'}`,
'',
'请根据用户描述的研究目的,给出一般性的方法建议。',
].join('\n');
}
const lines = [
'## 决策表匹配结果',
`匹配分数: ${rec.matchScore}/${rec.maxPossibleScore}`,
`匹配维度: goal=${rec.matchDimensions.goal}, outcome=${rec.matchDimensions.outcomeType || '未知'}, predictor=${rec.matchDimensions.predictorType || '未知'}, design=${rec.matchDimensions.design}`,
'',
`### 推荐方法: ${rec.primaryMethod.name} (${rec.primaryMethod.code})`,
`- 描述: ${rec.primaryMethod.description}`,
`- 类别: ${rec.primaryMethod.category}`,
];
if (rec.primaryMethod.prerequisite) {
lines.push(`- 前提条件: ${rec.primaryMethod.prerequisite}`);
}
if (rec.fallbackMethod) {
lines.push('');
lines.push(`### 降级方案: ${rec.fallbackMethod.name} (${rec.fallbackMethod.code})`);
lines.push(`- 切换条件: ${rec.fallbackMethod.switchCondition}`);
lines.push(`- 描述: ${rec.fallbackMethod.description}`);
}
if (rec.needsClarification) {
lines.push('');
lines.push(`⚠️ 注意: ${rec.clarificationReason}`);
}
return lines.join('\n');
}
}
export const methodConsultService = new MethodConsultService();

View File

@@ -0,0 +1,156 @@
/**
* Phase I — PICO 推断服务
*
* 调用 LLM (SSA_PICO_INFERENCE prompt) 从数据概览推断 PICO 结构。
* 写入 SessionBlackboard.picoInference标记为 ai_inferred。
*
* 安全措施:
* - Zod 校验 LLM 输出
* - jsonrepair 容错
* - H3: 观察性研究允许 intervention/comparison 为 null
*/
import { logger } from '../../../common/logging/index.js';
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import { getPromptService } from '../../../common/prompt/index.js';
import { prisma } from '../../../config/database.js';
import { jsonrepair } from 'jsonrepair';
import type { Message } from '../../../common/llm/adapters/types.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import {
PicoInferenceSchema,
type PicoInference,
type DataOverview,
type VariableDictEntry,
} from '../types/session-blackboard.types.js';
const MAX_RETRIES = 1;
export class PicoInferenceService {
/**
* 从 DataOverview 推断 PICO 结构并写入黑板。
*/
async inferFromOverview(
sessionId: string,
overview: DataOverview,
dictionary: VariableDictEntry[],
): Promise<PicoInference | null> {
try {
logger.info('[SSA:PICO] Starting inference', { sessionId });
const promptService = getPromptService(prisma);
const dataOverviewSummary = this.buildOverviewSummary(overview);
const variableList = this.buildVariableList(dictionary);
const rendered = await promptService.get('SSA_PICO_INFERENCE', {
dataOverviewSummary,
variableList,
});
const adapter = LLMFactory.getAdapter(
(rendered.modelConfig?.model as any) || 'deepseek-v3'
);
const messages: Message[] = [
{ role: 'system', content: rendered.content },
{ role: 'user', content: '请根据以上数据概览推断 PICO 结构。' },
];
let pico: PicoInference | null = null;
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
const response = await adapter.chat(messages, {
temperature: rendered.modelConfig?.temperature ?? 0.3,
maxTokens: rendered.modelConfig?.maxTokens ?? 1024,
});
const raw = this.robustJsonParse(response.content);
const validated = PicoInferenceSchema.parse({
...raw,
status: 'ai_inferred',
});
pico = validated;
break;
} catch (err: any) {
logger.warn('[SSA:PICO] LLM attempt failed', {
attempt, error: err.message,
});
if (attempt === MAX_RETRIES) throw err;
}
}
if (pico) {
await sessionBlackboardService.confirmPico(sessionId, {
population: pico.population,
intervention: pico.intervention,
comparison: pico.comparison,
outcome: pico.outcome,
});
logger.info('[SSA:PICO] Inference complete', {
sessionId,
confidence: pico.confidence,
hasIntervention: pico.intervention !== null,
});
}
return pico;
} catch (error: any) {
logger.error('[SSA:PICO] Inference failed', {
sessionId, error: error.message,
});
return null;
}
}
private buildOverviewSummary(overview: DataOverview): string {
const s = overview.profile.summary;
const lines = [
`数据集: ${s.totalRows} 行, ${s.totalColumns}`,
`类型分布: 数值型 ${s.numericColumns}, 分类型 ${s.categoricalColumns}, 日期型 ${s.datetimeColumns}, 文本型 ${s.textColumns}`,
`整体缺失率: ${s.overallMissingRate}%`,
`完整病例数: ${overview.completeCaseCount}`,
];
const nonNormal = overview.normalityTests
?.filter(t => !t.isNormal)
.map(t => t.variable);
if (nonNormal && nonNormal.length > 0) {
lines.push(`非正态分布变量: ${nonNormal.join(', ')}`);
}
return lines.join('\n');
}
private buildVariableList(dict: VariableDictEntry[]): string {
return dict
.filter(v => !v.isIdLike)
.map(v => {
const type = v.confirmedType ?? v.inferredType;
const label = v.label ? ` (${v.label})` : '';
return `- ${v.name}: ${type}${label}`;
})
.join('\n');
}
private robustJsonParse(text: string): any {
let cleaned = text.trim();
const fenceMatch = cleaned.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fenceMatch) {
cleaned = fenceMatch[1].trim();
}
try {
return JSON.parse(cleaned);
} catch {
return JSON.parse(jsonrepair(cleaned));
}
}
}
export const picoInferenceService = new PicoInferenceService();

View File

@@ -30,7 +30,7 @@ import {
createDynamicIntentSchema,
validateConfidence,
} from '../types/query.types.js';
import { AVAILABLE_TOOLS } from './WorkflowPlannerService.js';
import { toolRegistryService } from './ToolRegistryService.js';
const CONFIDENCE_THRESHOLD = 0.7;
const MAX_LLM_RETRIES = 1;
@@ -92,9 +92,7 @@ export class QueryService {
? this.buildProfileSummaryForPrompt(profile)
: '(未上传数据文件)';
const toolList = Object.values(AVAILABLE_TOOLS)
.map(t => `- ${t.code}: ${t.name}${t.description}`)
.join('\n');
const toolList = toolRegistryService.formatForLLM();
// 2. 获取渲染后的 Prompt
const rendered = await promptService.get('SSA_QUERY_INTENT', {

View File

@@ -0,0 +1,319 @@
/**
* Phase I — Session 黑板服务
*
* 会话级数据缓存,存储 DataOverview / VariableDictionary / PicoInference / QperTrace。
* 使用平台 CacheFactoryPostgres-Only 架构,遵循 C4/C7 约束)。
* 扁平单 JSON Blob一个 sessionId = 一条 cache 记录。
*
* 并发安全patch() 内置基于 sessionId 的互斥锁H1
* 同一 session 的并发 patch 排队执行,杜绝 get-merge-set 竞态覆盖。
*/
import { cache } from '../../../common/cache/index.js';
import { logger } from '../../../common/logging/index.js';
import {
SessionBlackboardSchema,
createEmptyBlackboard,
type SessionBlackboard,
type DataOverview,
type VariableDictEntry,
type VariableDictPatch,
type PicoInference,
type PicoPatch,
type QperTraceEntry,
type FiveSectionReport,
} from '../types/session-blackboard.types.js';
class SessionBlackboardService {
private readonly KEY_PREFIX = 'ssa:session:';
private readonly DEFAULT_TTL = 7200; // 2 小时
/**
* 基于 sessionId 的互斥锁H1 修正)
* 防止并发 patch 导致 get-merge-set 竞态覆盖
*/
private locks = new Map<string, Promise<any>>();
private cacheKey(sessionId: string): string {
return `${this.KEY_PREFIX}${sessionId}`;
}
// ────────────────────────────────────────────
// 核心 CRUD
// ────────────────────────────────────────────
async get(sessionId: string): Promise<SessionBlackboard | null> {
try {
const data = await cache.get<SessionBlackboard>(this.cacheKey(sessionId));
if (!data) return null;
const parsed = SessionBlackboardSchema.safeParse(data);
if (!parsed.success) {
logger.warn(`SessionBlackboard schema validation failed for ${sessionId}:`, parsed.error.issues);
return data as SessionBlackboard;
}
return parsed.data as SessionBlackboard;
} catch (error) {
logger.error(`Failed to get SessionBlackboard for ${sessionId}:`, error);
return null;
}
}
async set(sessionId: string, data: SessionBlackboard): Promise<void> {
try {
data.updatedAt = new Date().toISOString();
await cache.set(this.cacheKey(sessionId), data, this.DEFAULT_TTL);
} catch (error) {
logger.error(`Failed to set SessionBlackboard for ${sessionId}:`, error);
throw error;
}
}
/**
* 部分更新 — 带互斥锁H1
* 同一 session 的并发 patch 排队执行
*/
async patch(sessionId: string, partial: Partial<SessionBlackboard>): Promise<SessionBlackboard> {
while (this.locks.has(sessionId)) {
await this.locks.get(sessionId);
}
const operation = (async (): Promise<SessionBlackboard> => {
const current = await this.get(sessionId) ?? createEmptyBlackboard(sessionId);
const merged: SessionBlackboard = {
...current,
...partial,
sessionId: current.sessionId,
createdAt: current.createdAt,
updatedAt: new Date().toISOString(),
};
await this.set(sessionId, merged);
return merged;
})();
this.locks.set(sessionId, operation);
try {
return await operation;
} finally {
this.locks.delete(sessionId);
}
}
async delete(sessionId: string): Promise<void> {
try {
await cache.delete(this.cacheKey(sessionId));
} catch (error) {
logger.error(`Failed to delete SessionBlackboard for ${sessionId}:`, error);
}
}
async exists(sessionId: string): Promise<boolean> {
return cache.has(this.cacheKey(sessionId));
}
// ────────────────────────────────────────────
// DataOverview 便捷方法
// ────────────────────────────────────────────
async getDataOverview(sessionId: string): Promise<DataOverview | null> {
const bb = await this.get(sessionId);
return bb?.dataOverview ?? null;
}
async setDataOverview(sessionId: string, overview: DataOverview): Promise<void> {
await this.patch(sessionId, { dataOverview: overview });
}
// ────────────────────────────────────────────
// VariableDictionary 便捷方法
// ────────────────────────────────────────────
async getVariableDictionary(sessionId: string): Promise<VariableDictEntry[]> {
const bb = await this.get(sessionId);
return bb?.variableDictionary ?? [];
}
async setVariableDictionary(sessionId: string, dict: VariableDictEntry[]): Promise<void> {
await this.patch(sessionId, { variableDictionary: dict });
}
/**
* 更新单个变量的字典条目
* 用于用户编辑变量类型/标签/PICO 角色
*/
async updateVariable(sessionId: string, varName: string, patchData: VariableDictPatch): Promise<VariableDictEntry | null> {
while (this.locks.has(sessionId)) {
await this.locks.get(sessionId);
}
const operation = (async (): Promise<VariableDictEntry | null> => {
const bb = await this.get(sessionId);
if (!bb) return null;
const idx = bb.variableDictionary.findIndex(v => v.name === varName);
if (idx === -1) return null;
const entry = bb.variableDictionary[idx];
if (patchData.confirmedType !== undefined) {
entry.confirmedType = patchData.confirmedType;
entry.confirmStatus = 'user_confirmed';
}
if (patchData.label !== undefined) {
entry.label = patchData.label;
}
if (patchData.picoRole !== undefined) {
entry.picoRole = patchData.picoRole;
}
bb.variableDictionary[idx] = entry;
await this.set(sessionId, bb);
return entry;
})();
this.locks.set(sessionId, operation);
try {
return await operation;
} finally {
this.locks.delete(sessionId);
}
}
// ────────────────────────────────────────────
// PICO 推断便捷方法
// ────────────────────────────────────────────
async getPicoInference(sessionId: string): Promise<PicoInference | null> {
const bb = await this.get(sessionId);
return bb?.picoInference ?? null;
}
async setPicoInference(sessionId: string, pico: PicoInference): Promise<void> {
await this.patch(sessionId, { picoInference: pico });
}
async confirmPico(sessionId: string, picoPatch: PicoPatch): Promise<PicoInference | null> {
while (this.locks.has(sessionId)) {
await this.locks.get(sessionId);
}
const operation = (async (): Promise<PicoInference | null> => {
const bb = await this.get(sessionId);
if (!bb?.picoInference) return null;
const updated: PicoInference = {
...bb.picoInference,
...picoPatch,
status: 'user_confirmed',
};
bb.picoInference = updated;
await this.set(sessionId, bb);
return updated;
})();
this.locks.set(sessionId, operation);
try {
return await operation;
} finally {
this.locks.delete(sessionId);
}
}
// ────────────────────────────────────────────
// QPER Trace 便捷方法Phase II+ 使用)
// ────────────────────────────────────────────
async appendTrace(sessionId: string, entry: QperTraceEntry): Promise<void> {
while (this.locks.has(sessionId)) {
await this.locks.get(sessionId);
}
const operation = (async (): Promise<void> => {
const bb = await this.get(sessionId) ?? createEmptyBlackboard(sessionId);
bb.qperTrace.push(entry);
await this.set(sessionId, bb);
})();
this.locks.set(sessionId, operation);
try {
await operation;
} finally {
this.locks.delete(sessionId);
}
}
// ────────────────────────────────────────────
// 五段式报告生成(纯模板渲染,不依赖 LLM
// ────────────────────────────────────────────
generateFiveSectionReport(overview: DataOverview, dict: VariableDictEntry[]): FiveSectionReport {
const { profile, completeCaseCount, normalityTests } = overview;
const { summary, columns } = profile;
const varsWithMissing = columns
.filter(c => c.missingCount > 0)
.map(c => c.name);
const categoricalVars = dict
.filter(v => v.inferredType === 'categorical')
.map(v => v.name);
const numericVars = dict
.filter(v => v.inferredType === 'numeric')
.map(v => v.name);
const varsWithOutliers = columns
.filter(c => (c.outlierCount ?? 0) > 0)
.map(c => c.name);
const nonNormalVars = normalityTests
.filter(t => !t.isNormal)
.map(t => t.variable);
const normalVars = normalityTests
.filter(t => t.isNormal)
.map(t => t.variable);
return {
basicInfo: {
title: '数据基本特征检测',
content: `数据集共有${summary.totalRows}个病例,${summary.totalColumns}个变量。`,
},
missingData: {
title: '数据缺失状况检测',
content: varsWithMissing.length > 0
? `所有变量中,有${varsWithMissing.length}个变量存在缺失数据,变量名如下:${varsWithMissing.slice(0, 5).join(' ')}${varsWithMissing.length > 5 ? ' 等' : ''}。数据完整的病例共有${completeCaseCount}个。`
: `所有变量均无缺失数据,数据完整的病例共有${completeCaseCount}个。`,
varsWithMissing,
completeCaseCount,
},
dataTypes: {
title: '数据类型检测',
content: categoricalVars.length > 0
? `所有变量均为数值型,其中有${categoricalVars.length}个变量为分类变量,请确认变量类型是否正确,这对统计分析方法的选择非常重要。分类变量的变量名如下:${categoricalVars.slice(0, 5).join(' ')}${categoricalVars.length > 5 ? ' 等' : ''}`
: `所有变量均为连续型数值变量。`,
categoricalVars,
numericVars,
needsConfirmation: true,
},
outliers: {
title: '数据异常值检测',
content: varsWithOutliers.length > 0
? `所有变量中,有${varsWithOutliers.length}个变量存在异常值异常值判定是应用四分位数±1.5倍四分位间距,在此之外判定为异常值。变量名如下:${varsWithOutliers.slice(0, 5).join(' ')}${varsWithOutliers.length > 5 ? ' 等' : ''}。异常值不一定是错误值,请查看原始数据并结合实际情况进行处理,不宜直接修改或删除。`
: `所有变量均未检测到异常值(基于 IQR 方法)。`,
method: 'IQR (Q1-1.5*IQR, Q3+1.5*IQR)',
varsWithOutliers,
},
normality: {
title: '正态分布检测',
content: nonNormalVars.length > 0
? `所有连续变量中,有${nonNormalVars.length}个变量为非正态分布使用R函数shapiro.test进行判定当p<0.05认为非正态分布。变量名如下:${nonNormalVars.slice(0, 5).join(' ')}${nonNormalVars.length > 5 ? ' 等' : ''}。统计学家更推荐直方图P-P图、Q-Q图进行判定本判定的结果供您参考。`
: `所有连续变量均通过正态性检验Shapiro-Wilk, p >= 0.05)。`,
method: 'Shapiro-Wilk (p < 0.05 判定为非正态)',
nonNormalVars,
normalVars,
},
};
}
}
export const sessionBlackboardService = new SessionBlackboardService();

View File

@@ -0,0 +1,153 @@
/**
* Phase II — System Prompt 动态组装服务
*
* 六段式组装H2 Lost-in-the-Middle 修正):
* [1] base_system — 固定角色定义
* [2] data_context — DataOverview 摘要
* [3] pico_inference — PICO 分类
* [4] variable_dictionary — 变量字典摘要
* [5] tool_outputs — 工具调用结果(冗长数据放中间)
* [6] intent_instruction — 意图指令(核心指令放最后,永不裁剪)
*
* Token 预算 <= 4000C2超出按 [5] > [4] > [3] > [2] 优先级裁剪。
* [6] intent_instruction 永不裁剪。
*/
import { logger } from '../../../common/logging/index.js';
import { getPromptService } from '../../../common/prompt/index.js';
import { prisma } from '../../../config/database.js';
import { tokenTruncationService, type TruncationOptions } from './TokenTruncationService.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import type { SessionBlackboard } from '../types/session-blackboard.types.js';
export type IntentType = 'chat' | 'explore' | 'consult' | 'analyze' | 'discuss' | 'feedback';
const INTENT_PROMPT_CODES: Record<IntentType, string> = {
chat: 'SSA_INTENT_CHAT',
explore: 'SSA_INTENT_EXPLORE',
consult: 'SSA_INTENT_CONSULT',
analyze: 'SSA_INTENT_ANALYZE',
discuss: 'SSA_INTENT_DISCUSS',
feedback: 'SSA_INTENT_FEEDBACK',
};
const MAX_SYSTEM_TOKENS = 4000;
export class SystemPromptService {
/**
* 组装完整 System Prompt六段式H2 修正顺序)
*/
async assemble(
sessionId: string,
intent: IntentType,
toolOutputs?: string,
): Promise<string> {
const promptService = getPromptService(prisma);
// [1] Base system role
let baseSystem = '';
try {
const rendered = await promptService.get('SSA_BASE_SYSTEM', {});
baseSystem = rendered.content;
} catch {
baseSystem = this.fallbackBaseSystem();
}
// [2-4] DataContext from SessionBlackboard (truncated)
let dataContextBlock = '';
const blackboard = await sessionBlackboardService.get(sessionId);
if (blackboard) {
const truncated = tokenTruncationService.truncate(blackboard, {
maxTokens: this.calculateDataBudget(baseSystem, toolOutputs),
strategy: 'balanced',
});
dataContextBlock = tokenTruncationService.toPromptString(truncated);
}
// [5] Tool outputs (placed in middle — H2 fix)
const toolBlock = toolOutputs
? `\n\n## 工具执行结果\n${toolOutputs}`
: '';
// [6] Intent instruction (placed LAST — H2 fix, never truncated)
let intentInstruction = '';
const intentCode = INTENT_PROMPT_CODES[intent];
try {
const rendered = await promptService.get(intentCode, {});
intentInstruction = rendered.content;
} catch {
intentInstruction = this.fallbackIntentInstruction(intent);
}
// Assemble: [1] Base → [2-4] DataContext → [5] ToolOutputs → [6] IntentInstruction
const parts: string[] = [baseSystem];
if (dataContextBlock) {
parts.push(dataContextBlock);
}
if (toolBlock) {
parts.push(toolBlock);
}
// Intent instruction is ALWAYS last (H2 — Lost in the Middle fix)
parts.push(`\n\n## 当前任务指令\n${intentInstruction}`);
const assembled = parts.join('\n\n');
const estimatedTokens = Math.ceil(assembled.length / 2);
logger.debug('[SSA:SystemPrompt] Assembled', {
sessionId,
intent,
estimatedTokens,
hasData: !!blackboard,
hasToolOutput: !!toolOutputs,
});
if (estimatedTokens > MAX_SYSTEM_TOKENS) {
logger.warn('[SSA:SystemPrompt] Exceeded token budget', {
estimatedTokens,
maxTokens: MAX_SYSTEM_TOKENS,
});
}
return assembled;
}
private calculateDataBudget(baseSystem: string, toolOutputs?: string): number {
const baseTokens = Math.ceil(baseSystem.length / 2);
const toolTokens = toolOutputs ? Math.ceil(toolOutputs.length / 2) : 0;
const intentReserve = 500; // intent instruction reserve
return Math.max(500, MAX_SYSTEM_TOKENS - baseTokens - toolTokens - intentReserve);
}
private fallbackBaseSystem(): string {
return `你是 SSA-Pro 智能统计分析助手,专注于临床研究统计分析。
你具备以下能力:
- 理解临床研究数据的结构和特征
- 推荐合适的统计分析方法
- 解读统计分析结果
- 用通俗易懂的语言向医学研究者解释统计概念
沟通原则:
- 使用中文回复
- 语言专业但不晦涩
- 分点作答,条理清晰
- 对不确定的内容如实说明`;
}
private fallbackIntentInstruction(intent: IntentType): string {
const map: Record<IntentType, string> = {
chat: '请基于统计知识和用户数据直接回答用户的问题。不要主动建议执行分析,除非用户明确要求。简洁作答,分点清晰。',
explore: '用户想了解数据的特征。请基于上方的数据摘要信息,帮用户解读数据特征(缺失、分布、异常值等)。可以推断 PICO 结构。不要执行分析。',
consult: '用户在咨询统计方法。请根据数据特征和研究目的推荐合适的统计方法,给出选择理由和前提条件。不要直接执行分析。提供替代方案。',
analyze: '以下是工具执行结果。请向用户简要说明分析进展和关键发现。使用通俗语言,避免过度技术化。',
discuss: '用户想讨论分析结果。请帮助用户深入解读结果,解释统计量的含义,讨论临床意义和局限性。',
feedback: '用户对之前的分析结果不满意或有改进建议。请分析问题原因,提出改进方案(如更换统计方法、调整参数等)。',
};
return map[intent];
}
}
export const systemPromptService = new SystemPromptService();

View File

@@ -0,0 +1,204 @@
/**
* Phase I — Token 截断服务
*
* 在将 SessionBlackboard 数据注入 LLM Prompt 之前,
* 按优先级策略裁剪 payload 以适配模型上下文窗口。
*
* 裁剪策略(按优先级从低到高保留):
* 1. 完整变量字典 → 仅保留非 isIdLike 的变量
* 2. topValues 列表 → 截断到 top 5
* 3. 数值列详细统计 → 保留 mean/std/median + 去掉 skewness/kurtosis
* 4. normalityTests → 仅保留非正态的变量
* 5. picoInference → 始终保留(最高优先级)
* 6. fiveSectionReport.content → 若超限则截断到前 500 字符
*
* 预估 token 使用简易方式: 1 中文字 ≈ 2 tokens, 1 英文词 ≈ 1.3 tokens
* 通过 JSON.stringify 长度 / 2 作为粗略上界。
*/
import { logger } from '../../../common/logging/index.js';
import type {
SessionBlackboard,
DataOverview,
VariableDictEntry,
FiveSectionReport,
} from '../types/session-blackboard.types.js';
export interface TruncationOptions {
maxTokens?: number;
strategy?: 'aggressive' | 'balanced' | 'minimal';
}
interface TruncatedContext {
overview: string;
variables: string;
pico: string;
report: string;
estimatedTokens: number;
}
const DEFAULT_MAX_TOKENS = 3000;
export class TokenTruncationService {
/**
* 将 SessionBlackboard 截断为可注入 Prompt 的紧凑文本。
*/
truncate(
blackboard: SessionBlackboard,
options: TruncationOptions = {},
): TruncatedContext {
const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
const strategy = options.strategy ?? 'balanced';
logger.debug('[SSA:TokenTrunc] Truncating context', {
sessionId: blackboard.sessionId,
maxTokens,
strategy,
});
const pico = this.formatPico(blackboard);
const overview = this.formatOverview(blackboard.dataOverview, strategy);
const variables = this.formatVariables(blackboard.variableDictionary, strategy);
const report = this.formatReport(blackboard, strategy);
let ctx: TruncatedContext = {
pico,
overview,
variables,
report,
estimatedTokens: 0,
};
ctx.estimatedTokens = this.estimateTokens(ctx);
if (ctx.estimatedTokens > maxTokens) {
ctx = this.applyAggressiveTruncation(ctx, blackboard, maxTokens);
}
logger.debug('[SSA:TokenTrunc] Truncation complete', {
estimatedTokens: ctx.estimatedTokens,
maxTokens,
});
return ctx;
}
/**
* 一次性生成可直接拼入 system prompt 的字符串。
*/
toPromptString(ctx: TruncatedContext): string {
const parts: string[] = [];
if (ctx.pico) parts.push(`## PICO 结构\n${ctx.pico}`);
if (ctx.overview) parts.push(`## 数据概览\n${ctx.overview}`);
if (ctx.variables) parts.push(`## 变量列表\n${ctx.variables}`);
if (ctx.report) parts.push(`## 数据诊断摘要\n${ctx.report}`);
return parts.join('\n\n');
}
private formatPico(bb: SessionBlackboard): string {
const p = bb.picoInference;
if (!p) return '';
const lines = [];
if (p.population) lines.push(`P (人群): ${p.population}`);
if (p.intervention) lines.push(`I (干预): ${p.intervention}`);
if (p.comparison) lines.push(`C (对照): ${p.comparison}`);
if (p.outcome) lines.push(`O (结局): ${p.outcome}`);
return lines.join('\n');
}
private formatOverview(ov: DataOverview | null, strategy: string): string {
if (!ov) return '';
const s = ov.profile.summary;
let text = `${s.totalRows}× ${s.totalColumns} 列, 缺失率 ${s.overallMissingRate}%, 完整病例 ${ov.completeCaseCount}`;
if (strategy !== 'aggressive' && ov.normalityTests?.length) {
const nonNormal = ov.normalityTests.filter(t => !t.isNormal).map(t => t.variable);
if (nonNormal.length > 0) {
text += `\n非正态: ${nonNormal.join(', ')}`;
}
}
return text;
}
private formatVariables(dict: VariableDictEntry[], strategy: string): string {
let vars = dict.filter(v => !v.isIdLike);
if (strategy === 'aggressive') {
vars = vars.slice(0, 15);
}
return vars.map(v => {
const type = v.confirmedType ?? v.inferredType;
const label = v.label ? ` "${v.label}"` : '';
const role = v.picoRole ? ` [${v.picoRole}]` : '';
return `- ${v.name}: ${type}${label}${role}`;
}).join('\n');
}
private formatReport(bb: SessionBlackboard, strategy: string): string {
const report = bb.dataOverview
? this.buildReportSummary(bb.dataOverview)
: '';
if (strategy === 'aggressive' && report.length > 500) {
return report.slice(0, 500) + '...';
}
return report;
}
private buildReportSummary(ov: DataOverview): string {
const s = ov.profile.summary;
const lines: string[] = [];
const missingCols = ov.profile.columns.filter(c => c.missingCount > 0);
if (missingCols.length > 0) {
lines.push(`缺失变量(${missingCols.length}): ${missingCols.map(c => c.name).join(', ')}`);
}
const outlierCols = ov.profile.columns.filter(c => (c as any).outlierCount > 0);
if (outlierCols.length > 0) {
lines.push(`异常值变量(${outlierCols.length}): ${outlierCols.map(c => c.name).join(', ')}`);
}
const catCount = s.categoricalColumns;
const numCount = s.numericColumns;
lines.push(`类型: 数值${numCount} + 分类${catCount}`);
return lines.join('\n');
}
private estimateTokens(ctx: TruncatedContext): number {
const total = ctx.pico.length + ctx.overview.length + ctx.variables.length + ctx.report.length;
return Math.ceil(total / 2);
}
private applyAggressiveTruncation(
ctx: TruncatedContext,
bb: SessionBlackboard,
maxTokens: number,
): TruncatedContext {
const result = { ...ctx };
result.report = result.report.length > 300 ? result.report.slice(0, 300) + '...' : result.report;
let vars = bb.variableDictionary.filter(v => !v.isIdLike);
if (vars.length > 10) {
const picoVars = vars.filter(v => v.picoRole);
const others = vars.filter(v => !v.picoRole).slice(0, 10 - picoVars.length);
vars = [...picoVars, ...others];
}
result.variables = vars.map(v => {
const type = v.confirmedType ?? v.inferredType;
return `- ${v.name}: ${type}`;
}).join('\n');
result.estimatedTokens = this.estimateTokens(result);
return result;
}
}
export const tokenTruncationService = new TokenTruncationService();

View File

@@ -0,0 +1,134 @@
/**
* ToolOrchestratorService — QPER 服务薄层封装
*
* Phase IV: 对话层调用 QPER 的统一入口
* - plan(): 读黑板(PICO hint) → planWorkflow → WorkflowPlan
* - formatPlanForLLM(): WorkflowPlan → LLM 可读摘要
*
* D4: 不创建独立 Tool 类,本 Service 统一封装
* D5: PICO 可选 hint — 用户直接表述优先于系统推断
*/
import { logger } from '../../../common/logging/index.js';
import { sessionBlackboardService } from './SessionBlackboardService.js';
import { workflowPlannerService, type WorkflowPlan } from './WorkflowPlannerService.js';
import type { PicoInference } from '../types/session-blackboard.types.js';
// ────────────────────────────────────────────
// Types
// ────────────────────────────────────────────
export interface PlanResult {
success: boolean;
plan: WorkflowPlan | null;
error?: string;
picoUsed: boolean;
}
// ────────────────────────────────────────────
// Service
// ────────────────────────────────────────────
export class ToolOrchestratorService {
/**
* 生成分析计划 — 对话层 → QPER 的桥梁
*
* 三层降级策略 (D5):
* 1. 用户消息明确 → planWorkflow (LLM 解析)
* 2. 用户消息模糊 + PICO 存在 → PICO hint 注入 LLM prompt
* 3. 用户消息模糊 + 无 PICO → 纯 LLM + DataProfile 推断
*/
async plan(sessionId: string, userMessage: string): Promise<PlanResult> {
let picoUsed = false;
try {
const bb = await sessionBlackboardService.get(sessionId);
let enrichedQuery = userMessage;
if (bb?.picoInference) {
const hint = this.buildPicoHint(bb.picoInference);
if (hint) {
enrichedQuery = `${userMessage}\n\n[参考上下文 - PICO 推断] ${hint}`;
picoUsed = true;
logger.info('[SSA:Orchestrator] PICO hint injected', {
sessionId,
confidence: bb.picoInference.confidence,
});
}
}
const plan = await workflowPlannerService.planWorkflow(sessionId, enrichedQuery);
logger.info('[SSA:Orchestrator] Plan generated', {
sessionId,
workflowId: plan.workflow_id,
totalSteps: plan.total_steps,
picoUsed,
});
return { success: true, plan, picoUsed };
} catch (error: any) {
logger.error('[SSA:Orchestrator] Plan failed', {
sessionId,
error: error.message,
});
return {
success: false,
plan: null,
error: error.message,
picoUsed,
};
}
}
/**
* 格式化 WorkflowPlan 为 LLM 可读摘要(注入 toolOutputs
*/
formatPlanForLLM(plan: WorkflowPlan): string {
const lines = [
`## 分析方案: ${plan.title}`,
plan.description,
'',
`${plan.total_steps} 个分析步骤:`,
];
for (const step of plan.steps) {
const desc = step.switch_condition
? `${step.description} [切换条件: ${step.switch_condition}]`
: step.description;
lines.push(`${step.step_number}. **${step.tool_name}** — ${desc}`);
}
if (plan.epv_warning) {
lines.push('', `⚠️ ${plan.epv_warning}`);
}
if (plan.planned_trace) {
lines.push('', `决策依据: ${plan.planned_trace.reasoning}`);
}
return lines.join('\n');
}
/**
* 构建 PICO hint 文本(仅在 PICO 有实质内容时生成)
*/
private buildPicoHint(pico: PicoInference): string | null {
const parts: string[] = [];
if (pico.population) parts.push(`研究人群=${pico.population}`);
if (pico.intervention) parts.push(`干预/暴露=${pico.intervention}`);
if (pico.comparison) parts.push(`对照=${pico.comparison}`);
if (pico.outcome) parts.push(`结局=${pico.outcome}`);
if (parts.length === 0) return null;
return `${parts.join(', ')} (置信度: ${pico.confidence})`;
}
}
export const toolOrchestratorService = new ToolOrchestratorService();

View File

@@ -0,0 +1,163 @@
/**
* ToolRegistryService — 工具注册表查询服务
*
* H2 仓储模式: 通过 IToolRepository 接口隔离数据源,
* 当前使用 JsonToolRepository读 tools_registry.json
* 未来可无缝切换为 PgToolRepository读 Prisma/PostgreSQL
*/
import { toolsRegistryLoader } from '../config/index.js';
import type { ToolDefinition, ToolsRegistry } from '../config/schemas.js';
import { logger } from '../../../common/logging/index.js';
// ────────────────────────────────────────────
// Repository Interface (H2)
// ────────────────────────────────────────────
export interface IToolRepository {
getAll(): ToolDefinition[];
getByCode(code: string): ToolDefinition | null;
reload?(): void;
}
// ────────────────────────────────────────────
// JSON-based implementation (Phase III)
// ────────────────────────────────────────────
export class JsonToolRepository implements IToolRepository {
getAll(): ToolDefinition[] {
return toolsRegistryLoader.get().tools;
}
getByCode(code: string): ToolDefinition | null {
return this.getAll().find(t => t.code === code) || null;
}
reload(): void {
toolsRegistryLoader.reload();
}
}
// ────────────────────────────────────────────
// ToolRegistryService
// ────────────────────────────────────────────
export class ToolRegistryService {
constructor(private repo: IToolRepository) {}
getAll(): ToolDefinition[] {
return this.repo.getAll();
}
getByCode(code: string): ToolDefinition | null {
return this.repo.getByCode(code);
}
getToolName(code: string): string {
return this.repo.getByCode(code)?.name ?? code;
}
getByCategory(category: string): ToolDefinition[] {
return this.repo.getAll().filter(t => t.category === category);
}
/**
* 按分析目标 + 变量类型推荐相关工具
* goal: comparison/correlation/regression/descriptive
*/
getRelevantTools(goal?: string, outputType?: string): ToolDefinition[] {
const all = this.repo.getAll();
if (!goal && !outputType) return all;
return all.filter(t => {
if (goal && outputType) {
return t.outputType === outputType || t.category === goal;
}
if (outputType) return t.outputType === outputType;
// goal → category mapping heuristic
const goalCategoryMap: Record<string, string[]> = {
comparison: ['parametric', 'nonparametric', 'categorical'],
correlation: ['correlation'],
regression: ['regression'],
descriptive: ['basic', 'composite'],
};
const categories = goalCategoryMap[goal!] || [];
return categories.includes(t.category);
});
}
/**
* 按意图/阶段过滤工具列表Phase IV: 阶段性可见性)
* explore/chat: 只看到数据类工具
* consult: 看到全部工具(供推荐参考)
* analyze: 看到执行类工具
*/
getVisibleTools(intent?: string): ToolDefinition[] {
const all = this.repo.getAll();
if (!intent) return all;
const dataCategories = ['basic', 'composite'];
const execCategories = ['parametric', 'nonparametric', 'categorical', 'correlation', 'regression'];
switch (intent) {
case 'explore':
case 'chat':
return all.filter(t => dataCategories.includes(t.category));
case 'consult':
return all;
case 'analyze':
case 'feedback':
return all;
default:
return all;
}
}
/**
* 格式化工具列表为 LLM 可读文本
*/
formatForLLM(tools?: ToolDefinition[]): string {
const list = tools || this.repo.getAll();
return list
.map(t => {
let line = `- ${t.code}: ${t.name}${t.description}`;
if (t.prerequisite) line += ` [前提: ${t.prerequisite}]`;
if (t.fallback) line += ` [降级: ${t.fallback}]`;
return line;
})
.join('\n');
}
/**
* 格式化单个工具的详细信息(用于 method_consult 注入)
*/
formatToolDetail(code: string): string | null {
const tool = this.repo.getByCode(code);
if (!tool) return null;
const lines = [
`**${tool.name}** (${tool.code})`,
`- 类别: ${tool.category}`,
`- 描述: ${tool.description}`,
];
if (tool.prerequisite) lines.push(`- 前提条件: ${tool.prerequisite}`);
if (tool.fallback) lines.push(`- 降级方案: ${tool.fallback}`);
if (tool.inputParams.length) {
const params = tool.inputParams
.map(p => `${p.name}${p.required ? '' : '?'}: ${p.type}`)
.join(', ');
lines.push(`- 参数: ${params}`);
}
return lines.join('\n');
}
reload(): void {
if (this.repo.reload) {
this.repo.reload();
logger.info('[ToolRegistry] 热更新完成');
}
}
}
// Singleton — 默认使用 JSON 数据源
export const toolRegistryService = new ToolRegistryService(new JsonToolRepository());

View File

@@ -16,7 +16,7 @@ import axios, { AxiosInstance } from 'axios';
import { logger } from '../../../common/logging/index.js';
import { prisma } from '../../../config/database.js';
import { storage } from '../../../common/storage/index.js';
import { WorkflowStep, ToolCode, AVAILABLE_TOOLS } from './WorkflowPlannerService.js';
import { WorkflowStep, ToolCode } from './WorkflowPlannerService.js';
import { conclusionGeneratorService } from './ConclusionGeneratorService.js';
import { reflectionService } from './ReflectionService.js';
import type { ConclusionReport } from '../types/reflection.types.js';

View File

@@ -15,72 +15,14 @@ import { DataProfile, dataProfileService } from './DataProfileService.js';
import { queryService } from './QueryService.js';
import { decisionTableService, type MatchResult } from './DecisionTableService.js';
import { flowTemplateService, type FilledStep, type FillResult } from './FlowTemplateService.js';
import { toolsRegistryLoader } from '../config/index.js';
import { toolRegistryService } from './ToolRegistryService.js';
import type { ParsedQuery } from '../types/query.types.js';
// 可用工具定义
export const AVAILABLE_TOOLS = {
ST_DESCRIPTIVE: {
code: 'ST_DESCRIPTIVE',
name: '描述性统计',
category: 'basic',
description: '数据概况、基线特征表',
inputParams: ['variables', 'group_var?'],
outputType: 'summary'
},
ST_T_TEST_IND: {
code: 'ST_T_TEST_IND',
name: '独立样本T检验',
category: 'parametric',
description: '两组连续变量比较(参数方法)',
inputParams: ['group_var', 'value_var'],
outputType: 'comparison',
prerequisite: '正态分布',
fallback: 'ST_MANN_WHITNEY'
},
ST_MANN_WHITNEY: {
code: 'ST_MANN_WHITNEY',
name: 'Mann-Whitney U检验',
category: 'nonparametric',
description: '两组连续/等级变量比较(非参数方法)',
inputParams: ['group_var', 'value_var'],
outputType: 'comparison'
},
ST_T_TEST_PAIRED: {
code: 'ST_T_TEST_PAIRED',
name: '配对T检验',
category: 'parametric',
description: '配对设计的前后对比',
inputParams: ['before_var', 'after_var'],
outputType: 'comparison'
},
ST_CHI_SQUARE: {
code: 'ST_CHI_SQUARE',
name: '卡方检验',
category: 'categorical',
description: '两个分类变量的独立性检验',
inputParams: ['var1', 'var2'],
outputType: 'association'
},
ST_CORRELATION: {
code: 'ST_CORRELATION',
name: '相关分析',
category: 'correlation',
description: 'Pearson/Spearman相关系数',
inputParams: ['var_x', 'var_y', 'method?'],
outputType: 'correlation'
},
ST_LOGISTIC_BINARY: {
code: 'ST_LOGISTIC_BINARY',
name: '二元Logistic回归',
category: 'regression',
description: '二分类结局的多因素分析',
inputParams: ['outcome_var', 'predictors', 'confounders?'],
outputType: 'regression'
}
} as const;
export type ToolCode = keyof typeof AVAILABLE_TOOLS;
/**
* 工具代码类型 — 所有支持的统计工具 code
* Phase IV: 不再依赖硬编码常量,工具定义统一由 ToolRegistryService 管理
*/
export type ToolCode = string;
/** P 层策略日志 — 记录规划决策,供 R 层合并 E 层事实后生成方法学说明 */
export interface PlannedTrace {
@@ -460,7 +402,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_DESCRIPTIVE',
toolName: AVAILABLE_TOOLS.ST_DESCRIPTIVE.name,
toolName: toolRegistryService.getToolName('ST_DESCRIPTIVE'),
inputParams: {
variables: descVars,
group_var: intent.variables?.grouping
@@ -477,7 +419,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_T_TEST_PAIRED',
toolName: AVAILABLE_TOOLS.ST_T_TEST_PAIRED.name,
toolName: toolRegistryService.getToolName('ST_T_TEST_PAIRED'),
inputParams: {
before_var: intent.variables.continuous[0],
after_var: intent.variables.continuous[1]
@@ -492,7 +434,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_T_TEST_IND',
toolName: AVAILABLE_TOOLS.ST_T_TEST_IND.name,
toolName: toolRegistryService.getToolName('ST_T_TEST_IND'),
inputParams: {
group_var: intent.variables.grouping,
value_var: intent.variables.continuous[0]
@@ -517,7 +459,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_CHI_SQUARE',
toolName: AVAILABLE_TOOLS.ST_CHI_SQUARE.name,
toolName: toolRegistryService.getToolName('ST_CHI_SQUARE'),
inputParams: {
var1: var1,
var2: var2
@@ -530,7 +472,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_CORRELATION',
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
toolName: toolRegistryService.getToolName('ST_CORRELATION'),
inputParams: {
var_x: var1,
var_y: var2,
@@ -548,7 +490,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_T_TEST_IND',
toolName: AVAILABLE_TOOLS.ST_T_TEST_IND.name,
toolName: toolRegistryService.getToolName('ST_T_TEST_IND'),
inputParams: {
group_var: catVar,
value_var: contVar
@@ -563,7 +505,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_CORRELATION',
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
toolName: toolRegistryService.getToolName('ST_CORRELATION'),
inputParams: {
var_x: intent.variables.continuous[0],
var_y: intent.variables.continuous[1],
@@ -599,7 +541,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_LOGISTIC_BINARY',
toolName: AVAILABLE_TOOLS.ST_LOGISTIC_BINARY.name,
toolName: toolRegistryService.getToolName('ST_LOGISTIC_BINARY'),
inputParams: {
outcome_var: regressionOutcome,
predictors: regressionPredictors
@@ -608,20 +550,16 @@ export class WorkflowPlannerService {
dependsOn: [1]
});
} else {
// 连续结局 → 暂时也使用 Logistic 回归TODO: 添加线性回归工具)
// 实际应该使用线性回归,但当前工具库暂未支持
logger.warn('[WorkflowPlanner] Linear regression not yet implemented, falling back to descriptive stats');
// 添加一个额外的描述性统计步骤作为替代
// 连续结局 → 线性回归
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_CORRELATION',
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
toolCode: 'ST_LINEAR_REG',
toolName: toolRegistryService.getToolName('ST_LINEAR_REG'),
inputParams: {
var_x: regressionPredictors[0],
var_y: regressionOutcome,
method: 'auto'
outcome_var: regressionOutcome,
predictors: regressionPredictors
},
purpose: `分析 ${regressionPredictors[0]} ${regressionOutcome}相关性(线性回归待开发`,
purpose: `分析 ${regressionPredictors.join('、')} ${regressionOutcome}影响(多因素线性回归)`,
dependsOn: [1]
});
}
@@ -630,7 +568,7 @@ export class WorkflowPlannerService {
steps.push({
stepOrder: stepOrder++,
toolCode: 'ST_LOGISTIC_BINARY',
toolName: AVAILABLE_TOOLS.ST_LOGISTIC_BINARY.name,
toolName: toolRegistryService.getToolName('ST_LOGISTIC_BINARY'),
inputParams: {
outcome_var: intent.variables.grouping,
predictors: intent.variables.continuous?.slice(0, 5) || []

View File

@@ -0,0 +1,139 @@
/**
* Phase I — get_data_overview 工具
*
* READ 层工具:调用 DataProfileService 获取数据画像,
* 扩展正态性检验 + 完整病例数,
* 初始化变量字典,写入 Session 黑板,
* 返回五段式结构化报告。
*/
import { logger } from '../../../../common/logging/index.js';
import { dataProfileService, type DataProfile, type ColumnProfile } from '../DataProfileService.js';
import { sessionBlackboardService } from '../SessionBlackboardService.js';
import {
createEmptyBlackboard,
type DataOverview,
type VariableDictEntry,
type NormalityResult,
type FiveSectionReport,
type SessionBlackboard,
} from '../../types/session-blackboard.types.js';
export interface DataOverviewResult {
success: boolean;
report?: FiveSectionReport;
blackboard?: SessionBlackboard;
error?: string;
}
/**
* 执行 get_data_overview
* 1. 调用 DataProfileService 获取 DataProfile含正态性检验 + 完整病例数)
* 2. 初始化 VariableDictionary
* 3. 写入 SessionBlackboard
* 4. 生成五段式报告
*/
export async function executeGetDataOverview(sessionId: string): Promise<DataOverviewResult> {
try {
logger.info('[SSA:GetDataOverview] Starting data overview generation', { sessionId });
// Step 1: 获取 DataProfile
const profileResult = await dataProfileService.generateProfileFromSession(sessionId);
if (!profileResult.success || !profileResult.profile) {
return {
success: false,
error: profileResult.error || '数据画像生成失败',
};
}
const profile = profileResult.profile;
const rawResponse = profileResult as any;
// Step 2: 提取 Phase I 新增字段Python 扩展返回)
const normalityTests: NormalityResult[] = rawResponse.normalityTests
?? extractNormalityFromProfile(profile);
const completeCaseCount: number = rawResponse.completeCaseCount
?? estimateCompleteCases(profile);
// Step 3: 构建 DataOverview
const dataOverview: DataOverview = {
profile,
normalityTests,
completeCaseCount,
generatedAt: new Date().toISOString(),
};
// Step 4: 初始化 VariableDictionary
const variableDictionary = initVariableDictionary(profile.columns);
// Step 5: 写入 SessionBlackboard
const blackboard = createEmptyBlackboard(sessionId);
blackboard.dataOverview = dataOverview;
blackboard.variableDictionary = variableDictionary;
await sessionBlackboardService.set(sessionId, blackboard);
// Step 6: 生成五段式报告
const report = sessionBlackboardService.generateFiveSectionReport(dataOverview, variableDictionary);
logger.info('[SSA:GetDataOverview] Data overview generation complete', {
sessionId,
totalVars: profile.summary.totalColumns,
completeCases: completeCaseCount,
nonNormalVars: normalityTests.filter(t => !t.isNormal).length,
});
return {
success: true,
report,
blackboard,
};
} catch (error: any) {
logger.error('[SSA:GetDataOverview] Failed', { sessionId, error: error.message });
return {
success: false,
error: error.message,
};
}
}
/**
* 从 DataProfile.columns 初始化变量字典。
* 全部标记为 ai_inferred等待用户确认。
*/
function initVariableDictionary(columns: ColumnProfile[]): VariableDictEntry[] {
return columns.map(col => ({
name: col.name,
inferredType: col.type,
confirmedType: null,
label: null,
picoRole: null,
isIdLike: col.isIdLike ?? false,
confirmStatus: 'ai_inferred' as const,
}));
}
/**
* 降级方案:如果 Python 返回中没有 normalityTests
* 尝试根据 skewness/kurtosis 粗略估算。
*/
function extractNormalityFromProfile(profile: DataProfile): NormalityResult[] {
return profile.columns
.filter(c => c.type === 'numeric' && c.skewness !== undefined)
.map(c => ({
variable: c.name,
method: 'shapiro_wilk' as const,
statistic: 0,
pValue: 0,
isNormal: Math.abs(c.skewness ?? 0) < 1 && Math.abs(c.kurtosis ?? 0) < 3,
}));
}
/**
* 降级方案:估算完整病例数。
* 如果 Python 没有返回 completeCaseCount用 totalRows - 最大单列缺失估算下界。
*/
function estimateCompleteCases(profile: DataProfile): number {
const maxMissing = Math.max(...profile.columns.map(c => c.missingCount), 0);
return Math.max(0, profile.summary.totalRows - maxMissing);
}

View File

@@ -0,0 +1,76 @@
/**
* Phase I — get_variable_detail 工具
*
* READ 层工具:调用 Python variable-detail 端点获取单变量的
* 描述统计、直方图、正态性检验、Q-Q 图数据。
* 同时更新 SessionBlackboard 中该变量的字典条目。
*/
import { logger } from '../../../../common/logging/index.js';
import { dataProfileService } from '../DataProfileService.js';
import { sessionBlackboardService } from '../SessionBlackboardService.js';
import type { ColumnType, VariableDictPatch } from '../../types/session-blackboard.types.js';
export interface VariableDetailResult {
success: boolean;
variable?: string;
type?: string;
descriptive?: Record<string, any>;
outliers?: Record<string, any>;
histogram?: { counts: number[]; edges: number[] };
normalityTest?: { method: string; statistic: number; pValue: number; isNormal: boolean } | null;
qqPlot?: { theoretical: number[]; observed: number[] };
distribution?: Array<{ value: string; count: number; percentage: number }>;
error?: string;
}
/**
* 执行 get_variable_detail
* 1. 调用 DataProfileService.getVariableDetail()(转发 Python 端点)
* 2. 若 SessionBlackboard 存在,更新该变量的 confirmedType / label如有
* 3. 返回单变量详情
*/
export async function executeGetVariableDetail(
sessionId: string,
variableName: string,
confirmedType?: ColumnType,
label?: string,
): Promise<VariableDetailResult> {
try {
logger.info('[SSA:GetVariableDetail] Starting', { sessionId, variableName });
const detail = await dataProfileService.getVariableDetail(sessionId, variableName);
if (!detail.success) {
return { success: false, error: detail.error || '变量详情获取失败' };
}
if (confirmedType || label) {
const patch: VariableDictPatch = {};
if (confirmedType) {
patch.confirmedType = confirmedType;
}
if (label) {
patch.label = label;
}
await sessionBlackboardService.updateVariable(sessionId, variableName, patch);
}
logger.info('[SSA:GetVariableDetail] Done', { sessionId, variableName, type: detail.type });
return {
success: true,
variable: detail.variable,
type: detail.type,
descriptive: detail.descriptive,
outliers: detail.outliers,
histogram: detail.histogram,
normalityTest: detail.normalityTest,
qqPlot: detail.qqPlot,
distribution: detail.distribution,
};
} catch (error: any) {
logger.error('[SSA:GetVariableDetail] Failed', { sessionId, variableName, error: error.message });
return { success: false, error: error.message };
}
}

View File

@@ -0,0 +1,251 @@
/**
* Phase I — Session 黑板类型定义 + Zod Schema
*
* SessionBlackboard 是 SSA 模块的会话级数据缓存,
* 存储为扁平单 JSON Blobkey = ssa:session:{sessionId}
*
* 四层数据结构:
* Layer 1: DataOverview — get_data_overview 写入
* Layer 2: VariableDictionary — get_data_overview 初始化,用户可编辑
* Layer 3: PicoInference — LLM 推断 + 用户确认
* Layer 4: QperTrace — Phase II+ 使用Phase I 预留
*/
import { z } from 'zod';
import type { DataProfile } from '../services/DataProfileService.js';
// ────────────────────────────────────────────
// 1. 正态性检验结果
// ────────────────────────────────────────────
export interface NormalityResult {
variable: string;
method: 'shapiro_wilk' | 'kolmogorov_smirnov';
statistic: number;
pValue: number;
isNormal: boolean;
}
export const NormalityResultSchema = z.object({
variable: z.string(),
method: z.enum(['shapiro_wilk', 'kolmogorov_smirnov']),
statistic: z.number(),
pValue: z.number(),
isNormal: z.boolean(),
});
// ────────────────────────────────────────────
// 2. 数据总览(五段式报告数据源)
// ────────────────────────────────────────────
export interface DataOverview {
profile: DataProfile;
completeCaseCount: number;
normalityTests: NormalityResult[];
generatedAt: string;
}
export const DataOverviewSchema = z.object({
profile: z.object({
columns: z.array(z.any()),
summary: z.object({
totalRows: z.number(),
totalColumns: z.number(),
numericColumns: z.number(),
categoricalColumns: z.number(),
datetimeColumns: z.number(),
textColumns: z.number(),
overallMissingRate: z.number(),
totalMissingCells: z.number(),
}),
}),
completeCaseCount: z.number(),
normalityTests: z.array(NormalityResultSchema),
generatedAt: z.string(),
});
// ────────────────────────────────────────────
// 3. 变量字典条目
// ────────────────────────────────────────────
export type ColumnType = 'numeric' | 'categorical' | 'datetime' | 'text';
export type PicoRole = 'P' | 'I' | 'C' | 'O';
export type ConfirmStatus = 'ai_inferred' | 'user_confirmed';
export interface VariableDictEntry {
name: string;
inferredType: ColumnType;
confirmedType: ColumnType | null;
label: string | null;
picoRole: PicoRole | null;
isIdLike: boolean;
confirmStatus: ConfirmStatus;
}
export const VariableDictEntrySchema = z.object({
name: z.string(),
inferredType: z.enum(['numeric', 'categorical', 'datetime', 'text']),
confirmedType: z.enum(['numeric', 'categorical', 'datetime', 'text']).nullable(),
label: z.string().nullable(),
picoRole: z.enum(['P', 'I', 'C', 'O']).nullable(),
isIdLike: z.boolean(),
confirmStatus: z.enum(['ai_inferred', 'user_confirmed']),
});
// ────────────────────────────────────────────
// 4. PICO 推断
// ────────────────────────────────────────────
export type PicoConfidence = 'high' | 'medium' | 'low';
export interface PicoInference {
population: string | null;
intervention: string | null;
comparison: string | null;
outcome: string | null;
confidence: PicoConfidence;
status: ConfirmStatus;
}
export const PicoInferenceSchema = z.object({
population: z.string().nullable(),
intervention: z.string().nullable(),
comparison: z.string().nullable(),
outcome: z.string().nullable(),
confidence: z.enum(['high', 'medium', 'low']),
status: z.enum(['ai_inferred', 'user_confirmed']),
});
// ────────────────────────────────────────────
// 5. QPER 执行轨迹Phase II+ 预留)
// ────────────────────────────────────────────
export interface QperTraceEntry {
workflowId: string;
stepIndex: number;
toolCode: string;
status: 'success' | 'error' | 'blocked';
summary: string;
timestamp: string;
}
export const QperTraceEntrySchema = z.object({
workflowId: z.string(),
stepIndex: z.number(),
toolCode: z.string(),
status: z.enum(['success', 'error', 'blocked']),
summary: z.string(),
timestamp: z.string(),
});
// ────────────────────────────────────────────
// 6. SessionBlackboard 主体
// ────────────────────────────────────────────
export interface PendingAskUser {
questionId: string;
question: string;
inputType: 'single_select' | 'multi_select' | 'free_text' | 'confirm';
metadata?: Record<string, any>;
createdAt: string;
}
export interface SessionBlackboard {
sessionId: string;
createdAt: string;
updatedAt: string;
dataOverview: DataOverview | null;
variableDictionary: VariableDictEntry[];
picoInference: PicoInference | null;
qperTrace: QperTraceEntry[];
pendingAskUser: PendingAskUser | null;
}
export const PendingAskUserSchema = z.object({
questionId: z.string(),
question: z.string(),
inputType: z.enum(['single_select', 'multi_select', 'free_text', 'confirm']),
metadata: z.record(z.string(), z.any()).optional(),
createdAt: z.string(),
});
export const SessionBlackboardSchema = z.object({
sessionId: z.string(),
createdAt: z.string(),
updatedAt: z.string(),
dataOverview: DataOverviewSchema.nullable(),
variableDictionary: z.array(VariableDictEntrySchema),
picoInference: PicoInferenceSchema.nullable(),
qperTrace: z.array(QperTraceEntrySchema),
pendingAskUser: PendingAskUserSchema.nullable().default(null),
});
// ────────────────────────────────────────────
// 7. 五段式报告结构(前端渲染用)
// ────────────────────────────────────────────
export interface ReportSection {
title: string;
content: string;
details?: string[];
}
export interface FiveSectionReport {
basicInfo: ReportSection;
missingData: ReportSection & { varsWithMissing: string[]; completeCaseCount: number };
dataTypes: ReportSection & { categoricalVars: string[]; numericVars: string[]; needsConfirmation: boolean };
outliers: ReportSection & { method: string; varsWithOutliers: string[] };
normality: ReportSection & { method: string; nonNormalVars: string[]; normalVars: string[] };
}
// ────────────────────────────────────────────
// 8. 工厂函数:创建空白 SessionBlackboard
// ────────────────────────────────────────────
export function createEmptyBlackboard(sessionId: string): SessionBlackboard {
const now = new Date().toISOString();
return {
sessionId,
createdAt: now,
updatedAt: now,
dataOverview: null,
variableDictionary: [],
picoInference: null,
qperTrace: [],
pendingAskUser: null,
};
}
// ────────────────────────────────────────────
// 9. 变量字典 patch 输入PATCH API 用)
// ────────────────────────────────────────────
export interface VariableDictPatch {
confirmedType?: ColumnType;
label?: string;
picoRole?: PicoRole | null;
}
export const VariableDictPatchSchema = z.object({
confirmedType: z.enum(['numeric', 'categorical', 'datetime', 'text']).optional(),
label: z.string().optional(),
picoRole: z.enum(['P', 'I', 'C', 'O']).nullable().optional(),
});
// ────────────────────────────────────────────
// 10. PICO 确认输入PATCH API 用)
// ────────────────────────────────────────────
export interface PicoPatch {
population?: string | null;
intervention?: string | null;
comparison?: string | null;
outcome?: string | null;
}
export const PicoPatchSchema = z.object({
population: z.string().nullable().optional(),
intervention: z.string().nullable().optional(),
comparison: z.string().nullable().optional(),
outcome: z.string().nullable().optional(),
});