Files
AIclinicalresearch/backend/src/modules/ssa/routes/workflow.routes.ts
HaHafeng 371e1c069c feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers
Implement the full QPER intelligent analysis pipeline:

- Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement

- Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging

- Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output

- Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification

End-to-end test: 40/40 passed across two complete analysis scenarios.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 18:15:53 +08:00

605 lines
18 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* SSA Workflow Routes (Phase 2A)
*
* 多步骤工作流 API
* - POST /plan - 生成工作流计划
* - POST /:workflowId/execute - 执行工作流
* - GET /:workflowId/status - 获取执行状态
* - GET /:workflowId/stream - SSE 实时进度
*/
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { logger } from '../../../common/logging/index.js';
import { workflowPlannerService } from '../services/WorkflowPlannerService.js';
import { workflowExecutorService } from '../services/WorkflowExecutorService.js';
import { dataProfileService } from '../services/DataProfileService.js';
import { queryService } from '../services/QueryService.js';
import { reflectionService } from '../services/ReflectionService.js';
import { prisma } from '../../../config/database.js';
import { cache } from '../../../common/cache/index.js';
// 请求类型定义
interface PlanWorkflowBody {
sessionId: string;
userQuery: string;
}
interface ExecuteWorkflowParams {
workflowId: string;
}
interface WorkflowStatusParams {
workflowId: string;
}
interface GenerateProfileBody {
sessionId: string;
}
export default async function workflowRoutes(app: FastifyInstance) {
/**
* POST /workflow/plan
* 生成多步骤工作流计划
*/
app.post<{ Body: PlanWorkflowBody }>(
'/plan',
async (request, reply) => {
const { sessionId, userQuery } = request.body;
if (!sessionId || !userQuery) {
return reply.status(400).send({
success: false,
error: 'sessionId and userQuery are required'
});
}
try {
logger.info('[SSA:API] Planning workflow', { sessionId, userQuery });
const plan = await workflowPlannerService.planWorkflow(sessionId, userQuery);
return reply.send({
success: true,
plan
});
} catch (error: any) {
logger.error('[SSA:API] Workflow planning failed', {
sessionId,
error: error.message
});
return reply.status(500).send({
success: false,
error: error.message
});
}
}
);
/**
* POST /workflow/intent
* Phase Q: LLM 意图理解 — 解析用户自然语言为结构化 ParsedQuery
*/
app.post<{ Body: PlanWorkflowBody }>(
'/intent',
async (request, reply) => {
const { sessionId, userQuery } = request.body;
if (!sessionId || !userQuery) {
return reply.status(400).send({
success: false,
error: 'sessionId and userQuery are required'
});
}
try {
logger.info('[SSA:API] Parsing intent', { sessionId, userQuery });
const parsed = await queryService.parseIntent(userQuery, sessionId);
return reply.send({
success: true,
intent: parsed,
needsClarification: parsed.needsClarification,
clarificationCards: parsed.clarificationCards || [],
});
} catch (error: any) {
logger.error('[SSA:API] Intent parsing failed', {
sessionId,
error: error.message
});
return reply.status(500).send({
success: false,
error: error.message
});
}
}
);
/**
* POST /workflow/clarify
* Phase Q: 用户回答追问卡片后,补全 ParsedQuery 并重新规划
*/
app.post<{ Body: { sessionId: string; userQuery: string; selections: Record<string, string> } }>(
'/clarify',
async (request, reply) => {
const { sessionId, userQuery, selections } = request.body;
if (!sessionId) {
return reply.status(400).send({
success: false,
error: 'sessionId is required'
});
}
try {
logger.info('[SSA:API] Processing clarification', { sessionId, selections });
// 将用户选择拼接到原始 query 中,重新走 intent 解析
const selectionText = Object.entries(selections)
.map(([key, value]) => `${key}: ${value}`)
.join('; ');
const enrichedQuery = userQuery
? `${userQuery}(补充说明:${selectionText}`
: selectionText;
const parsed = await queryService.parseIntent(enrichedQuery, sessionId);
// 如果这次置信度足够,直接生成工作流计划
if (!parsed.needsClarification) {
const plan = await workflowPlannerService.planWorkflow(sessionId, enrichedQuery);
return reply.send({
success: true,
intent: parsed,
plan,
needsClarification: false,
});
}
return reply.send({
success: true,
intent: parsed,
needsClarification: true,
clarificationCards: parsed.clarificationCards || [],
});
} catch (error: any) {
logger.error('[SSA:API] Clarification failed', {
sessionId,
error: error.message
});
return reply.status(500).send({
success: false,
error: error.message
});
}
}
);
/**
* POST /workflow/:workflowId/execute
* 执行工作流
*/
app.post<{ Params: ExecuteWorkflowParams; Body: { sessionId: string } }>(
'/:workflowId/execute',
async (request, reply) => {
const { workflowId } = request.params;
const { sessionId } = request.body;
if (!sessionId) {
return reply.status(400).send({
success: false,
error: 'sessionId is required'
});
}
try {
logger.info('[SSA:API] Executing workflow', { workflowId, sessionId });
const result = await workflowExecutorService.executeWorkflow(workflowId, sessionId);
return reply.send({
success: true,
result
});
} catch (error: any) {
logger.error('[SSA:API] Workflow execution failed', {
workflowId,
error: error.message
});
return reply.status(500).send({
success: false,
error: error.message
});
}
}
);
/**
* GET /workflow/:workflowId/status
* 获取工作流状态
*/
app.get<{ Params: WorkflowStatusParams }>(
'/:workflowId/status',
async (request, reply) => {
const { workflowId } = request.params;
try {
const status = await workflowExecutorService.getWorkflowStatus(workflowId);
if (!status) {
return reply.status(404).send({
success: false,
error: 'Workflow not found'
});
}
return reply.send({
success: true,
workflow: status
});
} catch (error: any) {
logger.error('[SSA:API] Get workflow status failed', {
workflowId,
error: error.message
});
return reply.status(500).send({
success: false,
error: error.message
});
}
}
);
/**
* GET /workflow/:workflowId/stream
* SSE 实时进度流 - 连接后自动开始执行
*/
app.get<{ Params: WorkflowStatusParams }>(
'/:workflowId/stream',
async (request, reply) => {
const { workflowId } = request.params;
logger.info('[SSA:SSE] Stream connected', { workflowId });
// 设置 SSE 响应头
reply.raw.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*'
});
// 发送初始连接确认
reply.raw.write(`data: ${JSON.stringify({ type: 'connected', workflowId })}\n\n`);
// 发送心跳
const heartbeat = setInterval(() => {
reply.raw.write(':heartbeat\n\n');
}, 15000);
let isCompleted = false;
// 监听进度事件
const onProgress = (message: any) => {
// 添加 workflowId 到消息中
const enrichedMessage = { ...message, workflowId };
reply.raw.write(`data: ${JSON.stringify(enrichedMessage)}\n\n`);
// 如果工作流完成,标记并清理
if (message.type === 'workflow_complete' || message.type === 'workflow_error') {
isCompleted = true;
cleanup();
}
};
workflowExecutorService.on('progress', onProgress);
// 清理函数
const cleanup = () => {
clearInterval(heartbeat);
workflowExecutorService.off('progress', onProgress);
if (!isCompleted) {
reply.raw.write(`data: ${JSON.stringify({ type: 'disconnected' })}\n\n`);
}
reply.raw.end();
};
// 客户端断开连接时清理
request.raw.on('close', cleanup);
// 获取 workflow 的 session_id 并启动执行
try {
const workflow = await import('../../../config/database.js').then(m =>
m.prisma.ssaWorkflow.findUnique({
where: { id: workflowId },
select: { sessionId: true, status: true }
})
);
if (!workflow) {
reply.raw.write(`data: ${JSON.stringify({
type: 'workflow_error',
error: 'Workflow not found',
workflowId
})}\n\n`);
cleanup();
return;
}
// 如果已完成,直接返回状态
if (workflow.status === 'completed' || workflow.status === 'failed') {
reply.raw.write(`data: ${JSON.stringify({
type: 'workflow_complete',
status: workflow.status,
workflowId
})}\n\n`);
cleanup();
return;
}
// 异步启动执行(不阻塞 SSE 连接)
logger.info('[SSA:SSE] Starting workflow execution', { workflowId, sessionId: workflow.sessionId });
workflowExecutorService.executeWorkflow(workflowId, workflow.sessionId)
.catch((error: any) => {
logger.error('[SSA:SSE] Workflow execution failed', { workflowId, error: error.message });
reply.raw.write(`data: ${JSON.stringify({
type: 'workflow_error',
error: error.message,
workflowId
})}\n\n`);
cleanup();
});
} catch (error: any) {
logger.error('[SSA:SSE] Failed to start workflow', { workflowId, error: error.message });
reply.raw.write(`data: ${JSON.stringify({
type: 'workflow_error',
error: error.message,
workflowId
})}\n\n`);
cleanup();
}
}
);
/**
* POST /workflow/profile
* 生成数据画像
*/
app.post<{ Body: GenerateProfileBody }>(
'/profile',
async (request, reply) => {
const { sessionId } = request.body;
if (!sessionId) {
return reply.status(400).send({
success: false,
error: 'sessionId is required'
});
}
try {
logger.info('[SSA:API] Generating data profile', { sessionId });
const result = await dataProfileService.generateProfileFromSession(sessionId);
if (!result.success || !result.profile) {
// 如果画像生成失败,返回基于 session schema 的简化版本
const session = await import('../../../config/database.js').then(m => m.prisma.ssaSession.findUnique({
where: { id: sessionId }
}));
if (session?.dataSchema) {
const schema = session.dataSchema as any;
const fallbackProfile = generateFallbackProfile(schema, session.title || 'data.csv');
return reply.send({
success: true,
profile: fallbackProfile
});
}
return reply.send({
success: false,
error: result.error || 'Profile generation failed'
});
}
// 转换为前端期望的格式
const frontendProfile = convertToFrontendFormat(result.profile, result.quality);
return reply.send({
success: true,
profile: frontendProfile
});
} catch (error: any) {
logger.error('[SSA:API] Profile generation failed', {
sessionId,
error: error.message
});
return reply.status(500).send({
success: false,
error: error.message
});
}
}
);
/**
* GET /workflow/sessions/:sessionId/conclusion
* 获取会话的分析结论(优先返回缓存,无缓存则从 workflow 结果重新生成)
*/
app.get<{ Params: { sessionId: string } }>(
'/sessions/:sessionId/conclusion',
async (request, reply) => {
const { sessionId } = request.params;
try {
// 查找该 session 最新的 completed workflow
const workflow = await prisma.ssaWorkflow.findFirst({
where: { sessionId: sessionId, status: { in: ['completed', 'partial'] } },
orderBy: { createdAt: 'desc' },
});
if (!workflow) {
return reply.status(404).send({
success: false,
error: 'No completed workflow found for this session',
});
}
// 检查缓存
const cacheKey = `ssa:conclusion:${workflow.id}`;
const cached = await cache.get(cacheKey);
if (cached) {
return reply.send({ success: true, conclusion: cached, source: 'cache' });
}
// 无缓存:获取 workflow steps 结果并重新生成
const steps = await prisma.ssaWorkflowStep.findMany({
where: { workflowId: workflow.id },
orderBy: { stepOrder: 'asc' },
});
const results = steps.map((s: any) => ({
stepOrder: s.stepOrder,
toolCode: s.toolCode,
toolName: s.toolName,
status: s.status,
result: s.outputResult,
reportBlocks: s.reportBlocks,
executionMs: s.executionMs || 0,
}));
const workflowPlan = workflow.workflowPlan as any;
const conclusion = await reflectionService.reflect(
{
workflowId: workflow.id,
goal: workflowPlan?.goal || '统计分析',
title: workflowPlan?.title,
methodology: workflowPlan?.methodology,
plannedTrace: workflowPlan?.planned_trace,
},
results,
);
return reply.send({ success: true, conclusion, source: conclusion.source });
} catch (error: any) {
logger.error('[SSA:API] Get conclusion failed', { sessionId, error: error.message });
return reply.status(500).send({ success: false, error: error.message });
}
}
);
}
/**
* 将后端 DataProfile 转换为前端期望的格式
*/
function convertToFrontendFormat(profile: any, quality?: any) {
const summary = profile.summary || {};
const columns = profile.columns || [];
return {
file_name: 'data.csv',
row_count: summary.totalRows || 0,
column_count: summary.totalColumns || 0,
total_cells: (summary.totalRows || 0) * (summary.totalColumns || 0),
missing_cells: summary.totalMissingCells || 0,
missing_ratio: (summary.overallMissingRate || 0) / 100,
duplicate_rows: 0,
duplicate_ratio: 0,
numeric_columns: summary.numericColumns || 0,
categorical_columns: summary.categoricalColumns || 0,
datetime_columns: summary.datetimeColumns || 0,
quality_score: quality?.score || 85,
quality_grade: quality?.grade || 'B',
columns: columns.map((col: any) => ({
name: col.name,
dtype: col.type,
inferred_type: col.type,
non_null_count: col.totalCount - (col.missingCount || 0),
null_count: col.missingCount || 0,
null_ratio: (col.missingRate || 0) / 100,
unique_count: col.uniqueCount || 0,
unique_ratio: col.uniqueCount ? col.uniqueCount / col.totalCount : 0,
sample_values: col.topValues?.slice(0, 5).map((v: any) => v.value) || [],
mean: col.mean,
std: col.std,
min: col.min,
max: col.max,
median: col.median,
q1: col.q1,
q3: col.q3,
skewness: col.skewness,
kurtosis: col.kurtosis,
outlier_count: col.outlierCount,
outlier_ratio: col.outlierRate,
top_categories: col.topValues?.map((v: any) => ({
value: v.value,
count: v.count,
ratio: v.percentage / 100
}))
})),
warnings: quality?.issues || [],
recommendations: quality?.recommendations || [],
generated_at: new Date().toISOString()
};
}
/**
* 基于 Schema 生成简化版 fallback profile
*/
function generateFallbackProfile(schema: any, fileName: string) {
const columns = schema.columns || [];
const rowCount = schema.rowCount || 0;
const numericCols = columns.filter((c: any) => c.type === 'numeric');
const categoricalCols = columns.filter((c: any) => c.type === 'categorical');
const totalMissing = columns.reduce((sum: number, c: any) => sum + (c.nullCount || 0), 0);
const totalCells = rowCount * columns.length;
return {
file_name: fileName,
row_count: rowCount,
column_count: columns.length,
total_cells: totalCells,
missing_cells: totalMissing,
missing_ratio: totalCells > 0 ? totalMissing / totalCells : 0,
duplicate_rows: 0,
duplicate_ratio: 0,
numeric_columns: numericCols.length,
categorical_columns: categoricalCols.length,
datetime_columns: 0,
quality_score: 80,
quality_grade: 'B',
columns: columns.map((col: any) => ({
name: col.name,
dtype: col.type,
inferred_type: col.type,
non_null_count: rowCount - (col.nullCount || 0),
null_count: col.nullCount || 0,
null_ratio: rowCount > 0 ? (col.nullCount || 0) / rowCount : 0,
unique_count: col.uniqueValues || 0,
unique_ratio: rowCount > 0 ? (col.uniqueValues || 0) / rowCount : 0,
sample_values: []
})),
warnings: totalMissing > 0 ? [`数据中存在 ${totalMissing} 个缺失值`] : [],
recommendations: ['建议检查数据完整性后再进行分析'],
generated_at: new Date().toISOString()
};
}