Implement the full QPER intelligent analysis pipeline: - Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement - Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging - Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output - Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification End-to-end test: 40/40 passed across two complete analysis scenarios. Co-authored-by: Cursor <cursoragent@cursor.com>
605 lines
18 KiB
TypeScript
605 lines
18 KiB
TypeScript
/**
|
||
* SSA Workflow Routes (Phase 2A)
|
||
*
|
||
* 多步骤工作流 API:
|
||
* - POST /plan - 生成工作流计划
|
||
* - POST /:workflowId/execute - 执行工作流
|
||
* - GET /:workflowId/status - 获取执行状态
|
||
* - GET /:workflowId/stream - SSE 实时进度
|
||
*/
|
||
|
||
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||
import { logger } from '../../../common/logging/index.js';
|
||
import { workflowPlannerService } from '../services/WorkflowPlannerService.js';
|
||
import { workflowExecutorService } from '../services/WorkflowExecutorService.js';
|
||
import { dataProfileService } from '../services/DataProfileService.js';
|
||
import { queryService } from '../services/QueryService.js';
|
||
import { reflectionService } from '../services/ReflectionService.js';
|
||
import { prisma } from '../../../config/database.js';
|
||
import { cache } from '../../../common/cache/index.js';
|
||
|
||
// 请求类型定义
|
||
interface PlanWorkflowBody {
|
||
sessionId: string;
|
||
userQuery: string;
|
||
}
|
||
|
||
interface ExecuteWorkflowParams {
|
||
workflowId: string;
|
||
}
|
||
|
||
interface WorkflowStatusParams {
|
||
workflowId: string;
|
||
}
|
||
|
||
interface GenerateProfileBody {
|
||
sessionId: string;
|
||
}
|
||
|
||
export default async function workflowRoutes(app: FastifyInstance) {
|
||
|
||
/**
|
||
* POST /workflow/plan
|
||
* 生成多步骤工作流计划
|
||
*/
|
||
app.post<{ Body: PlanWorkflowBody }>(
|
||
'/plan',
|
||
async (request, reply) => {
|
||
const { sessionId, userQuery } = request.body;
|
||
|
||
if (!sessionId || !userQuery) {
|
||
return reply.status(400).send({
|
||
success: false,
|
||
error: 'sessionId and userQuery are required'
|
||
});
|
||
}
|
||
|
||
try {
|
||
logger.info('[SSA:API] Planning workflow', { sessionId, userQuery });
|
||
|
||
const plan = await workflowPlannerService.planWorkflow(sessionId, userQuery);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
plan
|
||
});
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:API] Workflow planning failed', {
|
||
sessionId,
|
||
error: error.message
|
||
});
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
error: error.message
|
||
});
|
||
}
|
||
}
|
||
);
|
||
|
||
/**
|
||
* POST /workflow/intent
|
||
* Phase Q: LLM 意图理解 — 解析用户自然语言为结构化 ParsedQuery
|
||
*/
|
||
app.post<{ Body: PlanWorkflowBody }>(
|
||
'/intent',
|
||
async (request, reply) => {
|
||
const { sessionId, userQuery } = request.body;
|
||
|
||
if (!sessionId || !userQuery) {
|
||
return reply.status(400).send({
|
||
success: false,
|
||
error: 'sessionId and userQuery are required'
|
||
});
|
||
}
|
||
|
||
try {
|
||
logger.info('[SSA:API] Parsing intent', { sessionId, userQuery });
|
||
|
||
const parsed = await queryService.parseIntent(userQuery, sessionId);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
intent: parsed,
|
||
needsClarification: parsed.needsClarification,
|
||
clarificationCards: parsed.clarificationCards || [],
|
||
});
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:API] Intent parsing failed', {
|
||
sessionId,
|
||
error: error.message
|
||
});
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
error: error.message
|
||
});
|
||
}
|
||
}
|
||
);
|
||
|
||
/**
|
||
* POST /workflow/clarify
|
||
* Phase Q: 用户回答追问卡片后,补全 ParsedQuery 并重新规划
|
||
*/
|
||
app.post<{ Body: { sessionId: string; userQuery: string; selections: Record<string, string> } }>(
|
||
'/clarify',
|
||
async (request, reply) => {
|
||
const { sessionId, userQuery, selections } = request.body;
|
||
|
||
if (!sessionId) {
|
||
return reply.status(400).send({
|
||
success: false,
|
||
error: 'sessionId is required'
|
||
});
|
||
}
|
||
|
||
try {
|
||
logger.info('[SSA:API] Processing clarification', { sessionId, selections });
|
||
|
||
// 将用户选择拼接到原始 query 中,重新走 intent 解析
|
||
const selectionText = Object.entries(selections)
|
||
.map(([key, value]) => `${key}: ${value}`)
|
||
.join('; ');
|
||
const enrichedQuery = userQuery
|
||
? `${userQuery}(补充说明:${selectionText})`
|
||
: selectionText;
|
||
|
||
const parsed = await queryService.parseIntent(enrichedQuery, sessionId);
|
||
|
||
// 如果这次置信度足够,直接生成工作流计划
|
||
if (!parsed.needsClarification) {
|
||
const plan = await workflowPlannerService.planWorkflow(sessionId, enrichedQuery);
|
||
return reply.send({
|
||
success: true,
|
||
intent: parsed,
|
||
plan,
|
||
needsClarification: false,
|
||
});
|
||
}
|
||
|
||
return reply.send({
|
||
success: true,
|
||
intent: parsed,
|
||
needsClarification: true,
|
||
clarificationCards: parsed.clarificationCards || [],
|
||
});
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:API] Clarification failed', {
|
||
sessionId,
|
||
error: error.message
|
||
});
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
error: error.message
|
||
});
|
||
}
|
||
}
|
||
);
|
||
|
||
/**
|
||
* POST /workflow/:workflowId/execute
|
||
* 执行工作流
|
||
*/
|
||
app.post<{ Params: ExecuteWorkflowParams; Body: { sessionId: string } }>(
|
||
'/:workflowId/execute',
|
||
async (request, reply) => {
|
||
const { workflowId } = request.params;
|
||
const { sessionId } = request.body;
|
||
|
||
if (!sessionId) {
|
||
return reply.status(400).send({
|
||
success: false,
|
||
error: 'sessionId is required'
|
||
});
|
||
}
|
||
|
||
try {
|
||
logger.info('[SSA:API] Executing workflow', { workflowId, sessionId });
|
||
|
||
const result = await workflowExecutorService.executeWorkflow(workflowId, sessionId);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
result
|
||
});
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:API] Workflow execution failed', {
|
||
workflowId,
|
||
error: error.message
|
||
});
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
error: error.message
|
||
});
|
||
}
|
||
}
|
||
);
|
||
|
||
/**
|
||
* GET /workflow/:workflowId/status
|
||
* 获取工作流状态
|
||
*/
|
||
app.get<{ Params: WorkflowStatusParams }>(
|
||
'/:workflowId/status',
|
||
async (request, reply) => {
|
||
const { workflowId } = request.params;
|
||
|
||
try {
|
||
const status = await workflowExecutorService.getWorkflowStatus(workflowId);
|
||
|
||
if (!status) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
error: 'Workflow not found'
|
||
});
|
||
}
|
||
|
||
return reply.send({
|
||
success: true,
|
||
workflow: status
|
||
});
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:API] Get workflow status failed', {
|
||
workflowId,
|
||
error: error.message
|
||
});
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
error: error.message
|
||
});
|
||
}
|
||
}
|
||
);
|
||
|
||
/**
|
||
* GET /workflow/:workflowId/stream
|
||
* SSE 实时进度流 - 连接后自动开始执行
|
||
*/
|
||
app.get<{ Params: WorkflowStatusParams }>(
|
||
'/:workflowId/stream',
|
||
async (request, reply) => {
|
||
const { workflowId } = request.params;
|
||
|
||
logger.info('[SSA:SSE] Stream connected', { workflowId });
|
||
|
||
// 设置 SSE 响应头
|
||
reply.raw.writeHead(200, {
|
||
'Content-Type': 'text/event-stream',
|
||
'Cache-Control': 'no-cache',
|
||
'Connection': 'keep-alive',
|
||
'Access-Control-Allow-Origin': '*'
|
||
});
|
||
|
||
// 发送初始连接确认
|
||
reply.raw.write(`data: ${JSON.stringify({ type: 'connected', workflowId })}\n\n`);
|
||
|
||
// 发送心跳
|
||
const heartbeat = setInterval(() => {
|
||
reply.raw.write(':heartbeat\n\n');
|
||
}, 15000);
|
||
|
||
let isCompleted = false;
|
||
|
||
// 监听进度事件
|
||
const onProgress = (message: any) => {
|
||
// 添加 workflowId 到消息中
|
||
const enrichedMessage = { ...message, workflowId };
|
||
reply.raw.write(`data: ${JSON.stringify(enrichedMessage)}\n\n`);
|
||
|
||
// 如果工作流完成,标记并清理
|
||
if (message.type === 'workflow_complete' || message.type === 'workflow_error') {
|
||
isCompleted = true;
|
||
cleanup();
|
||
}
|
||
};
|
||
|
||
workflowExecutorService.on('progress', onProgress);
|
||
|
||
// 清理函数
|
||
const cleanup = () => {
|
||
clearInterval(heartbeat);
|
||
workflowExecutorService.off('progress', onProgress);
|
||
if (!isCompleted) {
|
||
reply.raw.write(`data: ${JSON.stringify({ type: 'disconnected' })}\n\n`);
|
||
}
|
||
reply.raw.end();
|
||
};
|
||
|
||
// 客户端断开连接时清理
|
||
request.raw.on('close', cleanup);
|
||
|
||
// 获取 workflow 的 session_id 并启动执行
|
||
try {
|
||
const workflow = await import('../../../config/database.js').then(m =>
|
||
m.prisma.ssaWorkflow.findUnique({
|
||
where: { id: workflowId },
|
||
select: { sessionId: true, status: true }
|
||
})
|
||
);
|
||
|
||
if (!workflow) {
|
||
reply.raw.write(`data: ${JSON.stringify({
|
||
type: 'workflow_error',
|
||
error: 'Workflow not found',
|
||
workflowId
|
||
})}\n\n`);
|
||
cleanup();
|
||
return;
|
||
}
|
||
|
||
// 如果已完成,直接返回状态
|
||
if (workflow.status === 'completed' || workflow.status === 'failed') {
|
||
reply.raw.write(`data: ${JSON.stringify({
|
||
type: 'workflow_complete',
|
||
status: workflow.status,
|
||
workflowId
|
||
})}\n\n`);
|
||
cleanup();
|
||
return;
|
||
}
|
||
|
||
// 异步启动执行(不阻塞 SSE 连接)
|
||
logger.info('[SSA:SSE] Starting workflow execution', { workflowId, sessionId: workflow.sessionId });
|
||
workflowExecutorService.executeWorkflow(workflowId, workflow.sessionId)
|
||
.catch((error: any) => {
|
||
logger.error('[SSA:SSE] Workflow execution failed', { workflowId, error: error.message });
|
||
reply.raw.write(`data: ${JSON.stringify({
|
||
type: 'workflow_error',
|
||
error: error.message,
|
||
workflowId
|
||
})}\n\n`);
|
||
cleanup();
|
||
});
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:SSE] Failed to start workflow', { workflowId, error: error.message });
|
||
reply.raw.write(`data: ${JSON.stringify({
|
||
type: 'workflow_error',
|
||
error: error.message,
|
||
workflowId
|
||
})}\n\n`);
|
||
cleanup();
|
||
}
|
||
}
|
||
);
|
||
|
||
/**
|
||
* POST /workflow/profile
|
||
* 生成数据画像
|
||
*/
|
||
app.post<{ Body: GenerateProfileBody }>(
|
||
'/profile',
|
||
async (request, reply) => {
|
||
const { sessionId } = request.body;
|
||
|
||
if (!sessionId) {
|
||
return reply.status(400).send({
|
||
success: false,
|
||
error: 'sessionId is required'
|
||
});
|
||
}
|
||
|
||
try {
|
||
logger.info('[SSA:API] Generating data profile', { sessionId });
|
||
|
||
const result = await dataProfileService.generateProfileFromSession(sessionId);
|
||
|
||
if (!result.success || !result.profile) {
|
||
// 如果画像生成失败,返回基于 session schema 的简化版本
|
||
const session = await import('../../../config/database.js').then(m => m.prisma.ssaSession.findUnique({
|
||
where: { id: sessionId }
|
||
}));
|
||
|
||
if (session?.dataSchema) {
|
||
const schema = session.dataSchema as any;
|
||
const fallbackProfile = generateFallbackProfile(schema, session.title || 'data.csv');
|
||
|
||
return reply.send({
|
||
success: true,
|
||
profile: fallbackProfile
|
||
});
|
||
}
|
||
|
||
return reply.send({
|
||
success: false,
|
||
error: result.error || 'Profile generation failed'
|
||
});
|
||
}
|
||
|
||
// 转换为前端期望的格式
|
||
const frontendProfile = convertToFrontendFormat(result.profile, result.quality);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
profile: frontendProfile
|
||
});
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:API] Profile generation failed', {
|
||
sessionId,
|
||
error: error.message
|
||
});
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
error: error.message
|
||
});
|
||
}
|
||
}
|
||
);
|
||
|
||
/**
|
||
* GET /workflow/sessions/:sessionId/conclusion
|
||
* 获取会话的分析结论(优先返回缓存,无缓存则从 workflow 结果重新生成)
|
||
*/
|
||
app.get<{ Params: { sessionId: string } }>(
|
||
'/sessions/:sessionId/conclusion',
|
||
async (request, reply) => {
|
||
const { sessionId } = request.params;
|
||
|
||
try {
|
||
// 查找该 session 最新的 completed workflow
|
||
const workflow = await prisma.ssaWorkflow.findFirst({
|
||
where: { sessionId: sessionId, status: { in: ['completed', 'partial'] } },
|
||
orderBy: { createdAt: 'desc' },
|
||
});
|
||
|
||
if (!workflow) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
error: 'No completed workflow found for this session',
|
||
});
|
||
}
|
||
|
||
// 检查缓存
|
||
const cacheKey = `ssa:conclusion:${workflow.id}`;
|
||
const cached = await cache.get(cacheKey);
|
||
if (cached) {
|
||
return reply.send({ success: true, conclusion: cached, source: 'cache' });
|
||
}
|
||
|
||
// 无缓存:获取 workflow steps 结果并重新生成
|
||
const steps = await prisma.ssaWorkflowStep.findMany({
|
||
where: { workflowId: workflow.id },
|
||
orderBy: { stepOrder: 'asc' },
|
||
});
|
||
|
||
const results = steps.map((s: any) => ({
|
||
stepOrder: s.stepOrder,
|
||
toolCode: s.toolCode,
|
||
toolName: s.toolName,
|
||
status: s.status,
|
||
result: s.outputResult,
|
||
reportBlocks: s.reportBlocks,
|
||
executionMs: s.executionMs || 0,
|
||
}));
|
||
|
||
const workflowPlan = workflow.workflowPlan as any;
|
||
const conclusion = await reflectionService.reflect(
|
||
{
|
||
workflowId: workflow.id,
|
||
goal: workflowPlan?.goal || '统计分析',
|
||
title: workflowPlan?.title,
|
||
methodology: workflowPlan?.methodology,
|
||
plannedTrace: workflowPlan?.planned_trace,
|
||
},
|
||
results,
|
||
);
|
||
|
||
return reply.send({ success: true, conclusion, source: conclusion.source });
|
||
|
||
} catch (error: any) {
|
||
logger.error('[SSA:API] Get conclusion failed', { sessionId, error: error.message });
|
||
return reply.status(500).send({ success: false, error: error.message });
|
||
}
|
||
}
|
||
);
|
||
}
|
||
|
||
/**
|
||
* 将后端 DataProfile 转换为前端期望的格式
|
||
*/
|
||
function convertToFrontendFormat(profile: any, quality?: any) {
|
||
const summary = profile.summary || {};
|
||
const columns = profile.columns || [];
|
||
|
||
return {
|
||
file_name: 'data.csv',
|
||
row_count: summary.totalRows || 0,
|
||
column_count: summary.totalColumns || 0,
|
||
total_cells: (summary.totalRows || 0) * (summary.totalColumns || 0),
|
||
missing_cells: summary.totalMissingCells || 0,
|
||
missing_ratio: (summary.overallMissingRate || 0) / 100,
|
||
duplicate_rows: 0,
|
||
duplicate_ratio: 0,
|
||
numeric_columns: summary.numericColumns || 0,
|
||
categorical_columns: summary.categoricalColumns || 0,
|
||
datetime_columns: summary.datetimeColumns || 0,
|
||
quality_score: quality?.score || 85,
|
||
quality_grade: quality?.grade || 'B',
|
||
columns: columns.map((col: any) => ({
|
||
name: col.name,
|
||
dtype: col.type,
|
||
inferred_type: col.type,
|
||
non_null_count: col.totalCount - (col.missingCount || 0),
|
||
null_count: col.missingCount || 0,
|
||
null_ratio: (col.missingRate || 0) / 100,
|
||
unique_count: col.uniqueCount || 0,
|
||
unique_ratio: col.uniqueCount ? col.uniqueCount / col.totalCount : 0,
|
||
sample_values: col.topValues?.slice(0, 5).map((v: any) => v.value) || [],
|
||
mean: col.mean,
|
||
std: col.std,
|
||
min: col.min,
|
||
max: col.max,
|
||
median: col.median,
|
||
q1: col.q1,
|
||
q3: col.q3,
|
||
skewness: col.skewness,
|
||
kurtosis: col.kurtosis,
|
||
outlier_count: col.outlierCount,
|
||
outlier_ratio: col.outlierRate,
|
||
top_categories: col.topValues?.map((v: any) => ({
|
||
value: v.value,
|
||
count: v.count,
|
||
ratio: v.percentage / 100
|
||
}))
|
||
})),
|
||
warnings: quality?.issues || [],
|
||
recommendations: quality?.recommendations || [],
|
||
generated_at: new Date().toISOString()
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 基于 Schema 生成简化版 fallback profile
|
||
*/
|
||
function generateFallbackProfile(schema: any, fileName: string) {
|
||
const columns = schema.columns || [];
|
||
const rowCount = schema.rowCount || 0;
|
||
|
||
const numericCols = columns.filter((c: any) => c.type === 'numeric');
|
||
const categoricalCols = columns.filter((c: any) => c.type === 'categorical');
|
||
|
||
const totalMissing = columns.reduce((sum: number, c: any) => sum + (c.nullCount || 0), 0);
|
||
const totalCells = rowCount * columns.length;
|
||
|
||
return {
|
||
file_name: fileName,
|
||
row_count: rowCount,
|
||
column_count: columns.length,
|
||
total_cells: totalCells,
|
||
missing_cells: totalMissing,
|
||
missing_ratio: totalCells > 0 ? totalMissing / totalCells : 0,
|
||
duplicate_rows: 0,
|
||
duplicate_ratio: 0,
|
||
numeric_columns: numericCols.length,
|
||
categorical_columns: categoricalCols.length,
|
||
datetime_columns: 0,
|
||
quality_score: 80,
|
||
quality_grade: 'B',
|
||
columns: columns.map((col: any) => ({
|
||
name: col.name,
|
||
dtype: col.type,
|
||
inferred_type: col.type,
|
||
non_null_count: rowCount - (col.nullCount || 0),
|
||
null_count: col.nullCount || 0,
|
||
null_ratio: rowCount > 0 ? (col.nullCount || 0) / rowCount : 0,
|
||
unique_count: col.uniqueValues || 0,
|
||
unique_ratio: rowCount > 0 ? (col.uniqueValues || 0) / rowCount : 0,
|
||
sample_values: []
|
||
})),
|
||
warnings: totalMissing > 0 ? [`数据中存在 ${totalMissing} 个缺失值`] : [],
|
||
recommendations: ['建议检查数据完整性后再进行分析'],
|
||
generated_at: new Date().toISOString()
|
||
};
|
||
}
|