feat(ssa): Complete Phase 2A frontend integration - multi-step workflow end-to-end
Phase 2A: WorkflowPlannerService, WorkflowExecutorService, Python data quality, 6 bug fixes, DescriptiveResultView, multi-step R code/Word export, MVP UI reuse. V11 UI: Gemini-style, multi-task, single-page scroll, Word export. Architecture: Block-based rendering consensus (4 block types). New R tools: chi_square, correlation, descriptive, logistic_binary, mann_whitney, t_test_paired. Docs: dev summary, block-based plan, status updates, task list v2.0. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,100 @@
|
||||
-- =====================================================
|
||||
-- Phase 2A: SSA 智能化核心 - 数据库迁移脚本
|
||||
-- 日期: 2026-02-20
|
||||
-- 描述: 添加工作流表和数据画像字段
|
||||
-- 注意: ssa_sessions.id 是 TEXT 类型(存储 UUID 字符串)
|
||||
-- =====================================================
|
||||
|
||||
-- 1. 给 ssa_sessions 表添加 data_profile 字段(如果不存在)
|
||||
ALTER TABLE ssa_schema.ssa_sessions
|
||||
ADD COLUMN IF NOT EXISTS data_profile JSONB;
|
||||
|
||||
COMMENT ON COLUMN ssa_schema.ssa_sessions.data_profile IS 'Python Tool C 生成的数据画像 (Phase 2A)';
|
||||
|
||||
-- 2. 创建 ssa_workflows 表(多步骤分析流程)
|
||||
CREATE TABLE IF NOT EXISTS ssa_schema.ssa_workflows (
|
||||
id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::TEXT,
|
||||
session_id TEXT NOT NULL,
|
||||
message_id TEXT,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||
total_steps INTEGER NOT NULL,
|
||||
completed_steps INTEGER NOT NULL DEFAULT 0,
|
||||
workflow_plan JSONB NOT NULL,
|
||||
reasoning TEXT,
|
||||
created_at TIMESTAMP WITHOUT TIME ZONE NOT NULL DEFAULT NOW(),
|
||||
started_at TIMESTAMP WITHOUT TIME ZONE,
|
||||
completed_at TIMESTAMP WITHOUT TIME ZONE,
|
||||
|
||||
CONSTRAINT fk_ssa_workflow_session
|
||||
FOREIGN KEY (session_id)
|
||||
REFERENCES ssa_schema.ssa_sessions(id)
|
||||
ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- ssa_workflows 索引
|
||||
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_session
|
||||
ON ssa_schema.ssa_workflows(session_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_status
|
||||
ON ssa_schema.ssa_workflows(status);
|
||||
|
||||
-- ssa_workflows 字段注释
|
||||
COMMENT ON TABLE ssa_schema.ssa_workflows IS 'SSA 多步骤分析工作流 (Phase 2A)';
|
||||
COMMENT ON COLUMN ssa_schema.ssa_workflows.status IS 'pending | running | completed | partial | error';
|
||||
COMMENT ON COLUMN ssa_schema.ssa_workflows.workflow_plan IS 'LLM 生成的原始工作流计划 JSON';
|
||||
COMMENT ON COLUMN ssa_schema.ssa_workflows.reasoning IS 'LLM 规划理由说明';
|
||||
|
||||
-- 3. 创建 ssa_workflow_steps 表(流程中的每个步骤)
|
||||
CREATE TABLE IF NOT EXISTS ssa_schema.ssa_workflow_steps (
|
||||
id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::TEXT,
|
||||
workflow_id TEXT NOT NULL,
|
||||
step_order INTEGER NOT NULL,
|
||||
tool_code VARCHAR(50) NOT NULL,
|
||||
tool_name VARCHAR(100) NOT NULL,
|
||||
status VARCHAR(20) NOT NULL DEFAULT 'pending',
|
||||
input_params JSONB,
|
||||
guardrail_checks JSONB,
|
||||
output_result JSONB,
|
||||
error_info JSONB,
|
||||
execution_ms INTEGER,
|
||||
started_at TIMESTAMP WITHOUT TIME ZONE,
|
||||
completed_at TIMESTAMP WITHOUT TIME ZONE,
|
||||
|
||||
CONSTRAINT fk_ssa_workflow_step_workflow
|
||||
FOREIGN KEY (workflow_id)
|
||||
REFERENCES ssa_schema.ssa_workflows(id)
|
||||
ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- ssa_workflow_steps 索引
|
||||
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_step_workflow
|
||||
ON ssa_schema.ssa_workflow_steps(workflow_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_ssa_workflow_step_status
|
||||
ON ssa_schema.ssa_workflow_steps(status);
|
||||
|
||||
-- ssa_workflow_steps 字段注释
|
||||
COMMENT ON TABLE ssa_schema.ssa_workflow_steps IS 'SSA 工作流单步执行记录 (Phase 2A)';
|
||||
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.status IS 'pending | running | success | warning | error | skipped';
|
||||
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.guardrail_checks IS 'R Service JIT 护栏检验结果 (正态性、方差齐性等)';
|
||||
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.output_result IS '工具执行结果 (已裁剪,符合 LLM 上下文限制)';
|
||||
COMMENT ON COLUMN ssa_schema.ssa_workflow_steps.error_info IS '错误信息 (用于容错管道的部分成功场景)';
|
||||
|
||||
-- =====================================================
|
||||
-- 验证脚本
|
||||
-- =====================================================
|
||||
SELECT 'ssa_sessions.data_profile 字段' as item,
|
||||
CASE WHEN EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_sessions' AND column_name = 'data_profile'
|
||||
) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
|
||||
|
||||
SELECT 'ssa_workflows 表' as item,
|
||||
CASE WHEN EXISTS (
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_workflows'
|
||||
) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
|
||||
|
||||
SELECT 'ssa_workflow_steps 表' as item,
|
||||
CASE WHEN EXISTS (
|
||||
SELECT 1 FROM information_schema.tables
|
||||
WHERE table_schema = 'ssa_schema' AND table_name = 'ssa_workflow_steps'
|
||||
) THEN '✅ 已创建' ELSE '❌ 未创建' END as status;
|
||||
@@ -2153,12 +2153,14 @@ model SsaSession {
|
||||
dataSchema Json? @map("data_schema") /// 数据结构(LLM可见)
|
||||
dataPayload Json? @map("data_payload") /// 真实数据(仅R可见)
|
||||
dataOssKey String? @map("data_oss_key") /// OSS 存储 key(大数据)
|
||||
dataProfile Json? @map("data_profile") /// 🆕 Python 生成的 DataProfile(Phase 2A)
|
||||
status String @default("active") /// active | consult | completed | error
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
messages SsaMessage[]
|
||||
executionLogs SsaExecutionLog[]
|
||||
workflows SsaWorkflow[] /// 🆕 多步骤流程(Phase 2A)
|
||||
|
||||
@@index([userId], map: "idx_ssa_session_user")
|
||||
@@index([status], map: "idx_ssa_session_status")
|
||||
@@ -2306,3 +2308,54 @@ model SsaInterpretation {
|
||||
@@map("ssa_interpretation_templates")
|
||||
@@schema("ssa_schema")
|
||||
}
|
||||
|
||||
// ============================================================
|
||||
// 🆕 Phase 2A 新增:多步骤流程管理
|
||||
// ============================================================
|
||||
|
||||
/// SSA 多步骤流程
|
||||
model SsaWorkflow {
|
||||
id String @id @default(uuid())
|
||||
sessionId String @map("session_id")
|
||||
messageId String? @map("message_id") /// 关联的计划消息
|
||||
status String @default("pending") /// pending | running | completed | partial | error
|
||||
totalSteps Int @map("total_steps")
|
||||
completedSteps Int @default(0) @map("completed_steps")
|
||||
workflowPlan Json @map("workflow_plan") /// 原始计划 JSON
|
||||
reasoning String? @db.Text /// LLM 规划理由
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
startedAt DateTime? @map("started_at")
|
||||
completedAt DateTime? @map("completed_at")
|
||||
|
||||
session SsaSession @relation(fields: [sessionId], references: [id], onDelete: Cascade)
|
||||
steps SsaWorkflowStep[]
|
||||
|
||||
@@index([sessionId], map: "idx_ssa_workflow_session")
|
||||
@@index([status], map: "idx_ssa_workflow_status")
|
||||
@@map("ssa_workflows")
|
||||
@@schema("ssa_schema")
|
||||
}
|
||||
|
||||
/// SSA 流程步骤
|
||||
model SsaWorkflowStep {
|
||||
id String @id @default(uuid())
|
||||
workflowId String @map("workflow_id")
|
||||
stepOrder Int @map("step_order") /// 步骤顺序(1, 2, 3...)
|
||||
toolCode String @map("tool_code")
|
||||
toolName String @map("tool_name")
|
||||
status String @default("pending") /// pending | running | success | warning | error | skipped
|
||||
inputParams Json? @map("input_params") /// 输入参数
|
||||
guardrailChecks Json? @map("guardrail_checks") /// JIT 护栏检验结果
|
||||
outputResult Json? @map("output_result") /// 执行结果
|
||||
errorInfo Json? @map("error_info") /// 错误信息
|
||||
executionMs Int? @map("execution_ms") /// 执行耗时(毫秒)
|
||||
startedAt DateTime? @map("started_at")
|
||||
completedAt DateTime? @map("completed_at")
|
||||
|
||||
workflow SsaWorkflow @relation(fields: [workflowId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@index([workflowId], map: "idx_ssa_workflow_step_workflow")
|
||||
@@index([status], map: "idx_ssa_workflow_step_status")
|
||||
@@map("ssa_workflow_steps")
|
||||
@@schema("ssa_schema")
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ import sessionRoutes from './routes/session.routes.js';
|
||||
import analysisRoutes from './routes/analysis.routes.js';
|
||||
import consultRoutes from './routes/consult.routes.js';
|
||||
import configRoutes from './routes/config.routes.js';
|
||||
import workflowRoutes from './routes/workflow.routes.js';
|
||||
|
||||
export async function ssaRoutes(app: FastifyInstance) {
|
||||
// 注册认证中间件(遵循模块认证规范)
|
||||
@@ -23,6 +24,8 @@ export async function ssaRoutes(app: FastifyInstance) {
|
||||
app.register(analysisRoutes, { prefix: '/sessions' });
|
||||
app.register(consultRoutes, { prefix: '/consult' });
|
||||
app.register(configRoutes, { prefix: '/config' });
|
||||
// Phase 2A: 多步骤工作流
|
||||
app.register(workflowRoutes, { prefix: '/workflow' });
|
||||
}
|
||||
|
||||
export default ssaRoutes;
|
||||
|
||||
430
backend/src/modules/ssa/routes/workflow.routes.ts
Normal file
430
backend/src/modules/ssa/routes/workflow.routes.ts
Normal file
@@ -0,0 +1,430 @@
|
||||
/**
|
||||
* SSA Workflow Routes (Phase 2A)
|
||||
*
|
||||
* 多步骤工作流 API:
|
||||
* - POST /plan - 生成工作流计划
|
||||
* - POST /:workflowId/execute - 执行工作流
|
||||
* - GET /:workflowId/status - 获取执行状态
|
||||
* - GET /:workflowId/stream - SSE 实时进度
|
||||
*/
|
||||
|
||||
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { workflowPlannerService } from '../services/WorkflowPlannerService.js';
|
||||
import { workflowExecutorService } from '../services/WorkflowExecutorService.js';
|
||||
import { dataProfileService } from '../services/DataProfileService.js';
|
||||
|
||||
// 请求类型定义
|
||||
interface PlanWorkflowBody {
|
||||
sessionId: string;
|
||||
userQuery: string;
|
||||
}
|
||||
|
||||
interface ExecuteWorkflowParams {
|
||||
workflowId: string;
|
||||
}
|
||||
|
||||
interface WorkflowStatusParams {
|
||||
workflowId: string;
|
||||
}
|
||||
|
||||
interface GenerateProfileBody {
|
||||
sessionId: string;
|
||||
}
|
||||
|
||||
export default async function workflowRoutes(app: FastifyInstance) {
|
||||
|
||||
/**
|
||||
* POST /workflow/plan
|
||||
* 生成多步骤工作流计划
|
||||
*/
|
||||
app.post<{ Body: PlanWorkflowBody }>(
|
||||
'/plan',
|
||||
async (request, reply) => {
|
||||
const { sessionId, userQuery } = request.body;
|
||||
|
||||
if (!sessionId || !userQuery) {
|
||||
return reply.status(400).send({
|
||||
success: false,
|
||||
error: 'sessionId and userQuery are required'
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('[SSA:API] Planning workflow', { sessionId, userQuery });
|
||||
|
||||
const plan = await workflowPlannerService.planWorkflow(sessionId, userQuery);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
plan
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:API] Workflow planning failed', {
|
||||
sessionId,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* POST /workflow/:workflowId/execute
|
||||
* 执行工作流
|
||||
*/
|
||||
app.post<{ Params: ExecuteWorkflowParams; Body: { sessionId: string } }>(
|
||||
'/:workflowId/execute',
|
||||
async (request, reply) => {
|
||||
const { workflowId } = request.params;
|
||||
const { sessionId } = request.body;
|
||||
|
||||
if (!sessionId) {
|
||||
return reply.status(400).send({
|
||||
success: false,
|
||||
error: 'sessionId is required'
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('[SSA:API] Executing workflow', { workflowId, sessionId });
|
||||
|
||||
const result = await workflowExecutorService.executeWorkflow(workflowId, sessionId);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
result
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:API] Workflow execution failed', {
|
||||
workflowId,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* GET /workflow/:workflowId/status
|
||||
* 获取工作流状态
|
||||
*/
|
||||
app.get<{ Params: WorkflowStatusParams }>(
|
||||
'/:workflowId/status',
|
||||
async (request, reply) => {
|
||||
const { workflowId } = request.params;
|
||||
|
||||
try {
|
||||
const status = await workflowExecutorService.getWorkflowStatus(workflowId);
|
||||
|
||||
if (!status) {
|
||||
return reply.status(404).send({
|
||||
success: false,
|
||||
error: 'Workflow not found'
|
||||
});
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
workflow: status
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:API] Get workflow status failed', {
|
||||
workflowId,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* GET /workflow/:workflowId/stream
|
||||
* SSE 实时进度流 - 连接后自动开始执行
|
||||
*/
|
||||
app.get<{ Params: WorkflowStatusParams }>(
|
||||
'/:workflowId/stream',
|
||||
async (request, reply) => {
|
||||
const { workflowId } = request.params;
|
||||
|
||||
logger.info('[SSA:SSE] Stream connected', { workflowId });
|
||||
|
||||
// 设置 SSE 响应头
|
||||
reply.raw.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Access-Control-Allow-Origin': '*'
|
||||
});
|
||||
|
||||
// 发送初始连接确认
|
||||
reply.raw.write(`data: ${JSON.stringify({ type: 'connected', workflowId })}\n\n`);
|
||||
|
||||
// 发送心跳
|
||||
const heartbeat = setInterval(() => {
|
||||
reply.raw.write(':heartbeat\n\n');
|
||||
}, 15000);
|
||||
|
||||
let isCompleted = false;
|
||||
|
||||
// 监听进度事件
|
||||
const onProgress = (message: any) => {
|
||||
// 添加 workflowId 到消息中
|
||||
const enrichedMessage = { ...message, workflowId };
|
||||
reply.raw.write(`data: ${JSON.stringify(enrichedMessage)}\n\n`);
|
||||
|
||||
// 如果工作流完成,标记并清理
|
||||
if (message.type === 'workflow_complete' || message.type === 'workflow_error') {
|
||||
isCompleted = true;
|
||||
cleanup();
|
||||
}
|
||||
};
|
||||
|
||||
workflowExecutorService.on('progress', onProgress);
|
||||
|
||||
// 清理函数
|
||||
const cleanup = () => {
|
||||
clearInterval(heartbeat);
|
||||
workflowExecutorService.off('progress', onProgress);
|
||||
if (!isCompleted) {
|
||||
reply.raw.write(`data: ${JSON.stringify({ type: 'disconnected' })}\n\n`);
|
||||
}
|
||||
reply.raw.end();
|
||||
};
|
||||
|
||||
// 客户端断开连接时清理
|
||||
request.raw.on('close', cleanup);
|
||||
|
||||
// 获取 workflow 的 session_id 并启动执行
|
||||
try {
|
||||
const workflow = await import('../../../config/database.js').then(m =>
|
||||
m.prisma.ssaWorkflow.findUnique({
|
||||
where: { id: workflowId },
|
||||
select: { sessionId: true, status: true }
|
||||
})
|
||||
);
|
||||
|
||||
if (!workflow) {
|
||||
reply.raw.write(`data: ${JSON.stringify({
|
||||
type: 'workflow_error',
|
||||
error: 'Workflow not found',
|
||||
workflowId
|
||||
})}\n\n`);
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
// 如果已完成,直接返回状态
|
||||
if (workflow.status === 'completed' || workflow.status === 'failed') {
|
||||
reply.raw.write(`data: ${JSON.stringify({
|
||||
type: 'workflow_complete',
|
||||
status: workflow.status,
|
||||
workflowId
|
||||
})}\n\n`);
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
// 异步启动执行(不阻塞 SSE 连接)
|
||||
logger.info('[SSA:SSE] Starting workflow execution', { workflowId, sessionId: workflow.sessionId });
|
||||
workflowExecutorService.executeWorkflow(workflowId, workflow.sessionId)
|
||||
.catch((error: any) => {
|
||||
logger.error('[SSA:SSE] Workflow execution failed', { workflowId, error: error.message });
|
||||
reply.raw.write(`data: ${JSON.stringify({
|
||||
type: 'workflow_error',
|
||||
error: error.message,
|
||||
workflowId
|
||||
})}\n\n`);
|
||||
cleanup();
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:SSE] Failed to start workflow', { workflowId, error: error.message });
|
||||
reply.raw.write(`data: ${JSON.stringify({
|
||||
type: 'workflow_error',
|
||||
error: error.message,
|
||||
workflowId
|
||||
})}\n\n`);
|
||||
cleanup();
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
/**
|
||||
* POST /workflow/profile
|
||||
* 生成数据画像
|
||||
*/
|
||||
app.post<{ Body: GenerateProfileBody }>(
|
||||
'/profile',
|
||||
async (request, reply) => {
|
||||
const { sessionId } = request.body;
|
||||
|
||||
if (!sessionId) {
|
||||
return reply.status(400).send({
|
||||
success: false,
|
||||
error: 'sessionId is required'
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info('[SSA:API] Generating data profile', { sessionId });
|
||||
|
||||
const result = await dataProfileService.generateProfileFromSession(sessionId);
|
||||
|
||||
if (!result.success || !result.profile) {
|
||||
// 如果画像生成失败,返回基于 session schema 的简化版本
|
||||
const session = await import('../../../config/database.js').then(m => m.prisma.ssaSession.findUnique({
|
||||
where: { id: sessionId }
|
||||
}));
|
||||
|
||||
if (session?.dataSchema) {
|
||||
const schema = session.dataSchema as any;
|
||||
const fallbackProfile = generateFallbackProfile(schema, session.title || 'data.csv');
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
profile: fallbackProfile
|
||||
});
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
success: false,
|
||||
error: result.error || 'Profile generation failed'
|
||||
});
|
||||
}
|
||||
|
||||
// 转换为前端期望的格式
|
||||
const frontendProfile = convertToFrontendFormat(result.profile, result.quality);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
profile: frontendProfile
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:API] Profile generation failed', {
|
||||
sessionId,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将后端 DataProfile 转换为前端期望的格式
|
||||
*/
|
||||
function convertToFrontendFormat(profile: any, quality?: any) {
|
||||
const summary = profile.summary || {};
|
||||
const columns = profile.columns || [];
|
||||
|
||||
return {
|
||||
file_name: 'data.csv',
|
||||
row_count: summary.totalRows || 0,
|
||||
column_count: summary.totalColumns || 0,
|
||||
total_cells: (summary.totalRows || 0) * (summary.totalColumns || 0),
|
||||
missing_cells: summary.totalMissingCells || 0,
|
||||
missing_ratio: (summary.overallMissingRate || 0) / 100,
|
||||
duplicate_rows: 0,
|
||||
duplicate_ratio: 0,
|
||||
numeric_columns: summary.numericColumns || 0,
|
||||
categorical_columns: summary.categoricalColumns || 0,
|
||||
datetime_columns: summary.datetimeColumns || 0,
|
||||
quality_score: quality?.score || 85,
|
||||
quality_grade: quality?.grade || 'B',
|
||||
columns: columns.map((col: any) => ({
|
||||
name: col.name,
|
||||
dtype: col.type,
|
||||
inferred_type: col.type,
|
||||
non_null_count: col.totalCount - (col.missingCount || 0),
|
||||
null_count: col.missingCount || 0,
|
||||
null_ratio: (col.missingRate || 0) / 100,
|
||||
unique_count: col.uniqueCount || 0,
|
||||
unique_ratio: col.uniqueCount ? col.uniqueCount / col.totalCount : 0,
|
||||
sample_values: col.topValues?.slice(0, 5).map((v: any) => v.value) || [],
|
||||
mean: col.mean,
|
||||
std: col.std,
|
||||
min: col.min,
|
||||
max: col.max,
|
||||
median: col.median,
|
||||
q1: col.q1,
|
||||
q3: col.q3,
|
||||
skewness: col.skewness,
|
||||
kurtosis: col.kurtosis,
|
||||
outlier_count: col.outlierCount,
|
||||
outlier_ratio: col.outlierRate,
|
||||
top_categories: col.topValues?.map((v: any) => ({
|
||||
value: v.value,
|
||||
count: v.count,
|
||||
ratio: v.percentage / 100
|
||||
}))
|
||||
})),
|
||||
warnings: quality?.issues || [],
|
||||
recommendations: quality?.recommendations || [],
|
||||
generated_at: new Date().toISOString()
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 基于 Schema 生成简化版 fallback profile
|
||||
*/
|
||||
function generateFallbackProfile(schema: any, fileName: string) {
|
||||
const columns = schema.columns || [];
|
||||
const rowCount = schema.rowCount || 0;
|
||||
|
||||
const numericCols = columns.filter((c: any) => c.type === 'numeric');
|
||||
const categoricalCols = columns.filter((c: any) => c.type === 'categorical');
|
||||
|
||||
const totalMissing = columns.reduce((sum: number, c: any) => sum + (c.nullCount || 0), 0);
|
||||
const totalCells = rowCount * columns.length;
|
||||
|
||||
return {
|
||||
file_name: fileName,
|
||||
row_count: rowCount,
|
||||
column_count: columns.length,
|
||||
total_cells: totalCells,
|
||||
missing_cells: totalMissing,
|
||||
missing_ratio: totalCells > 0 ? totalMissing / totalCells : 0,
|
||||
duplicate_rows: 0,
|
||||
duplicate_ratio: 0,
|
||||
numeric_columns: numericCols.length,
|
||||
categorical_columns: categoricalCols.length,
|
||||
datetime_columns: 0,
|
||||
quality_score: 80,
|
||||
quality_grade: 'B',
|
||||
columns: columns.map((col: any) => ({
|
||||
name: col.name,
|
||||
dtype: col.type,
|
||||
inferred_type: col.type,
|
||||
non_null_count: rowCount - (col.nullCount || 0),
|
||||
null_count: col.nullCount || 0,
|
||||
null_ratio: rowCount > 0 ? (col.nullCount || 0) / rowCount : 0,
|
||||
unique_count: col.uniqueValues || 0,
|
||||
unique_ratio: rowCount > 0 ? (col.uniqueValues || 0) / rowCount : 0,
|
||||
sample_values: []
|
||||
})),
|
||||
warnings: totalMissing > 0 ? [`数据中存在 ${totalMissing} 个缺失值`] : [],
|
||||
recommendations: ['建议检查数据完整性后再进行分析'],
|
||||
generated_at: new Date().toISOString()
|
||||
};
|
||||
}
|
||||
369
backend/src/modules/ssa/services/ConclusionGeneratorService.ts
Normal file
369
backend/src/modules/ssa/services/ConclusionGeneratorService.ts
Normal file
@@ -0,0 +1,369 @@
|
||||
/**
|
||||
* SSA Conclusion Generator Service (Phase 2A)
|
||||
*
|
||||
* 结论生成器:整合多步骤分析结果,生成论文级结论
|
||||
*
|
||||
* 功能:
|
||||
* - 多步骤结果整合
|
||||
* - 论文级结论模板
|
||||
* - 方法学说明 + 局限性声明
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { StepResult } from './WorkflowExecutorService.js';
|
||||
|
||||
// 结论报告结构
|
||||
export interface ConclusionReport {
|
||||
title: string;
|
||||
summary: string;
|
||||
sections: ConclusionSection[];
|
||||
methodology: string;
|
||||
limitations: string[];
|
||||
references?: string[];
|
||||
}
|
||||
|
||||
export interface ConclusionSection {
|
||||
stepOrder: number;
|
||||
toolName: string;
|
||||
finding: string;
|
||||
interpretation: string;
|
||||
significance: 'significant' | 'not_significant' | 'marginal' | 'na';
|
||||
details?: Record<string, any>;
|
||||
}
|
||||
|
||||
export class ConclusionGeneratorService {
|
||||
|
||||
/**
|
||||
* 生成综合结论报告
|
||||
*
|
||||
* @param results 各步骤执行结果
|
||||
* @param goal 分析目标
|
||||
* @returns 结论报告
|
||||
*/
|
||||
generateConclusion(results: StepResult[], goal: string): ConclusionReport {
|
||||
logger.info('[SSA:Conclusion] Generating conclusion', {
|
||||
stepCount: results.length,
|
||||
goal
|
||||
});
|
||||
|
||||
const sections: ConclusionSection[] = [];
|
||||
const successResults = results.filter(r => r.status === 'success' || r.status === 'warning');
|
||||
|
||||
for (const result of successResults) {
|
||||
const section = this.generateSectionConclusion(result);
|
||||
if (section) {
|
||||
sections.push(section);
|
||||
}
|
||||
}
|
||||
|
||||
const summary = this.generateSummary(sections, goal);
|
||||
const methodology = this.generateMethodology(results);
|
||||
const limitations = this.generateLimitations(results);
|
||||
|
||||
const report: ConclusionReport = {
|
||||
title: `统计分析报告:${goal}`,
|
||||
summary,
|
||||
sections,
|
||||
methodology,
|
||||
limitations
|
||||
};
|
||||
|
||||
logger.info('[SSA:Conclusion] Conclusion generated', {
|
||||
sectionCount: sections.length,
|
||||
hasLimitations: limitations.length > 0
|
||||
});
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成单步骤结论
|
||||
*/
|
||||
private generateSectionConclusion(result: StepResult): ConclusionSection | null {
|
||||
if (!result.result) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const { toolCode, toolName, stepOrder } = result;
|
||||
const data = result.result;
|
||||
|
||||
let finding = '';
|
||||
let interpretation = '';
|
||||
let significance: ConclusionSection['significance'] = 'na';
|
||||
|
||||
switch (toolCode) {
|
||||
case 'ST_DESCRIPTIVE':
|
||||
finding = this.formatDescriptiveFindings(data);
|
||||
interpretation = '上述数据展示了研究样本的基本特征分布。';
|
||||
break;
|
||||
|
||||
case 'ST_T_TEST_IND':
|
||||
case 'ST_MANN_WHITNEY':
|
||||
const pValue = data.p_value;
|
||||
significance = this.interpretPValue(pValue);
|
||||
finding = this.formatComparisonFindings(data, toolCode);
|
||||
interpretation = this.interpretComparison(data, significance);
|
||||
break;
|
||||
|
||||
case 'ST_T_TEST_PAIRED':
|
||||
const pairedP = data.p_value;
|
||||
significance = this.interpretPValue(pairedP);
|
||||
finding = this.formatPairedFindings(data);
|
||||
interpretation = this.interpretPairedResult(data, significance);
|
||||
break;
|
||||
|
||||
case 'ST_CHI_SQUARE':
|
||||
const chiP = data.p_value;
|
||||
significance = this.interpretPValue(chiP);
|
||||
finding = this.formatChiSquareFindings(data);
|
||||
interpretation = this.interpretChiSquare(data, significance);
|
||||
break;
|
||||
|
||||
case 'ST_CORRELATION':
|
||||
const corrP = data.p_value;
|
||||
significance = this.interpretPValue(corrP);
|
||||
finding = this.formatCorrelationFindings(data);
|
||||
interpretation = this.interpretCorrelation(data, significance);
|
||||
break;
|
||||
|
||||
case 'ST_LOGISTIC_BINARY':
|
||||
finding = this.formatLogisticFindings(data);
|
||||
interpretation = this.interpretLogistic(data);
|
||||
significance = 'na';
|
||||
break;
|
||||
|
||||
default:
|
||||
finding = `${toolName} 分析已完成。`;
|
||||
interpretation = '请参考详细结果解读。';
|
||||
}
|
||||
|
||||
return {
|
||||
stepOrder,
|
||||
toolName,
|
||||
finding,
|
||||
interpretation,
|
||||
significance,
|
||||
details: {
|
||||
pValue: data.p_value,
|
||||
pValueFmt: data.p_value_fmt
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成总结
|
||||
*/
|
||||
private generateSummary(sections: ConclusionSection[], goal: string): string {
|
||||
const significantFindings = sections.filter(s => s.significance === 'significant');
|
||||
const marginalFindings = sections.filter(s => s.significance === 'marginal');
|
||||
|
||||
let summary = `针对「${goal}」进行了 ${sections.length} 项统计分析。`;
|
||||
|
||||
if (significantFindings.length > 0) {
|
||||
summary += `\n\n主要发现:${significantFindings.length} 项分析达到统计学显著性(p < 0.05)。`;
|
||||
for (const finding of significantFindings) {
|
||||
summary += `\n- ${finding.toolName}:${finding.interpretation}`;
|
||||
}
|
||||
}
|
||||
|
||||
if (marginalFindings.length > 0) {
|
||||
summary += `\n\n边缘性发现:${marginalFindings.length} 项分析接近显著水平(0.05 ≤ p < 0.10)。`;
|
||||
}
|
||||
|
||||
if (significantFindings.length === 0) {
|
||||
summary += '\n\n本次分析未发现具有统计学显著性的差异或关联。';
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成方法学说明
|
||||
*/
|
||||
private generateMethodology(results: StepResult[]): string {
|
||||
const methods: string[] = [];
|
||||
|
||||
for (const result of results) {
|
||||
if (result.result?.method) {
|
||||
methods.push(result.result.method);
|
||||
}
|
||||
}
|
||||
|
||||
let methodology = '本研究采用以下统计方法进行分析:\n';
|
||||
|
||||
const uniqueMethods = [...new Set(methods)];
|
||||
for (const method of uniqueMethods) {
|
||||
methodology += `- ${method}\n`;
|
||||
}
|
||||
|
||||
methodology += '\n所有分析均在执行前进行了统计假设检验(正态性、方差齐性等),并根据检验结果自动选择适当的统计方法。';
|
||||
methodology += '\n显著性水平设定为 α = 0.05(双侧检验)。';
|
||||
|
||||
return methodology;
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成局限性声明
|
||||
*/
|
||||
private generateLimitations(results: StepResult[]): string[] {
|
||||
const limitations: string[] = [];
|
||||
|
||||
// 检查样本量
|
||||
for (const result of results) {
|
||||
if (result.result?.group_stats) {
|
||||
const minN = Math.min(...result.result.group_stats.map((g: any) => g.n || 0));
|
||||
if (minN < 30) {
|
||||
limitations.push(`部分分析的样本量较小(n < 30),可能影响结果的稳健性。`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 检查警告
|
||||
const warnings = results.flatMap(r => r.result?.warnings || []);
|
||||
if (warnings.length > 0) {
|
||||
limitations.push(`分析过程中存在统计警告,请谨慎解读结果。`);
|
||||
}
|
||||
|
||||
// 通用局限性
|
||||
limitations.push('本分析基于横断面数据,无法推断因果关系。');
|
||||
limitations.push('未考虑潜在的混杂因素,结果可能存在偏倚。');
|
||||
|
||||
return limitations;
|
||||
}
|
||||
|
||||
// ==================== 格式化辅助函数 ====================
|
||||
|
||||
private formatDescriptiveFindings(data: any): string {
|
||||
const summary = data.summary;
|
||||
if (!summary) return '描述性统计已完成。';
|
||||
|
||||
return `样本包含 ${summary.n_total || '?'} 个观测值,` +
|
||||
`${summary.n_numeric || 0} 个数值变量,` +
|
||||
`${summary.n_categorical || 0} 个分类变量。`;
|
||||
}
|
||||
|
||||
private formatComparisonFindings(data: any, toolCode: string): string {
|
||||
const stats = data.group_stats || [];
|
||||
const pFmt = data.p_value_fmt || data.p_value?.toFixed(4);
|
||||
|
||||
if (stats.length >= 2) {
|
||||
const g1 = stats[0];
|
||||
const g2 = stats[1];
|
||||
|
||||
if (toolCode === 'ST_T_TEST_IND') {
|
||||
return `${g1.group} 组均值为 ${g1.mean?.toFixed(2)} ± ${g1.sd?.toFixed(2)} (n=${g1.n}),` +
|
||||
`${g2.group} 组均值为 ${g2.mean?.toFixed(2)} ± ${g2.sd?.toFixed(2)} (n=${g2.n}),` +
|
||||
`t = ${data.statistic?.toFixed(2)},p ${pFmt}。`;
|
||||
} else {
|
||||
return `${g1.group} 组中位数为 ${g1.median?.toFixed(2)} (n=${g1.n}),` +
|
||||
`${g2.group} 组中位数为 ${g2.median?.toFixed(2)} (n=${g2.n}),` +
|
||||
`U = ${data.statistic_U?.toFixed(0)},p ${pFmt}。`;
|
||||
}
|
||||
}
|
||||
|
||||
return `两组比较:p ${pFmt}。`;
|
||||
}
|
||||
|
||||
private formatPairedFindings(data: any): string {
|
||||
const desc = data.descriptive;
|
||||
const pFmt = data.p_value_fmt || data.p_value?.toFixed(4);
|
||||
|
||||
if (desc) {
|
||||
return `前测均值 ${desc.before?.mean?.toFixed(2)} ± ${desc.before?.sd?.toFixed(2)},` +
|
||||
`后测均值 ${desc.after?.mean?.toFixed(2)} ± ${desc.after?.sd?.toFixed(2)},` +
|
||||
`差值 ${desc.difference?.mean?.toFixed(2)} ± ${desc.difference?.sd?.toFixed(2)},` +
|
||||
`t = ${data.statistic?.toFixed(2)},p ${pFmt}。`;
|
||||
}
|
||||
|
||||
return `配对比较:p ${pFmt}。`;
|
||||
}
|
||||
|
||||
private formatChiSquareFindings(data: any): string {
|
||||
const pFmt = data.p_value_fmt || data.p_value?.toFixed(4);
|
||||
const chi = data.statistic?.toFixed(2);
|
||||
const df = data.df;
|
||||
|
||||
return `χ² = ${chi},df = ${df},p ${pFmt}。`;
|
||||
}
|
||||
|
||||
private formatCorrelationFindings(data: any): string {
|
||||
const r = data.statistic?.toFixed(3);
|
||||
const pFmt = data.p_value_fmt || data.p_value?.toFixed(4);
|
||||
const method = data.method_code === 'pearson' ? 'Pearson' : 'Spearman';
|
||||
|
||||
return `${method} 相关系数 r = ${r},p ${pFmt},` +
|
||||
`相关强度:${data.interpretation || '待解读'}。`;
|
||||
}
|
||||
|
||||
private formatLogisticFindings(data: any): string {
|
||||
const coeffs = data.coefficients || [];
|
||||
const sigCoeffs = coeffs.filter((c: any) => c.significant && c.variable !== '(Intercept)');
|
||||
|
||||
if (sigCoeffs.length === 0) {
|
||||
return 'Logistic 回归分析中未发现统计学显著的预测因子。';
|
||||
}
|
||||
|
||||
const findings = sigCoeffs.slice(0, 3).map((c: any) =>
|
||||
`${c.variable} (OR=${c.OR}, 95%CI [${c.ci_lower}, ${c.ci_upper}], p ${c.p_value_fmt})`
|
||||
);
|
||||
|
||||
return `多因素分析显示以下因素具有统计学显著性:${findings.join(';')}。`;
|
||||
}
|
||||
|
||||
// ==================== 解读辅助函数 ====================
|
||||
|
||||
private interpretPValue(p: number): ConclusionSection['significance'] {
|
||||
if (p < 0.05) return 'significant';
|
||||
if (p < 0.10) return 'marginal';
|
||||
return 'not_significant';
|
||||
}
|
||||
|
||||
private interpretComparison(data: any, sig: ConclusionSection['significance']): string {
|
||||
if (sig === 'significant') {
|
||||
return '两组之间存在统计学显著差异。';
|
||||
} else if (sig === 'marginal') {
|
||||
return '两组之间存在边缘显著性差异,建议增加样本量进一步验证。';
|
||||
}
|
||||
return '两组之间无统计学显著差异。';
|
||||
}
|
||||
|
||||
private interpretPairedResult(data: any, sig: ConclusionSection['significance']): string {
|
||||
if (sig === 'significant') {
|
||||
const diff = data.descriptive?.difference?.mean || 0;
|
||||
const direction = diff > 0 ? '显著升高' : '显著降低';
|
||||
return `配对比较结果表明,后测值较前测值${direction}。`;
|
||||
}
|
||||
return '配对比较未发现统计学显著变化。';
|
||||
}
|
||||
|
||||
private interpretChiSquare(data: any, sig: ConclusionSection['significance']): string {
|
||||
if (sig === 'significant') {
|
||||
const v = data.effect_size?.cramers_v;
|
||||
const strength = v ? `(效应量 Cramér's V = ${v.toFixed(3)})` : '';
|
||||
return `两个分类变量之间存在统计学显著关联${strength}。`;
|
||||
}
|
||||
return '两个分类变量之间无统计学显著关联。';
|
||||
}
|
||||
|
||||
private interpretCorrelation(data: any, sig: ConclusionSection['significance']): string {
|
||||
const r = data.statistic || 0;
|
||||
const direction = r > 0 ? '正相关' : '负相关';
|
||||
|
||||
if (sig === 'significant') {
|
||||
return `两变量之间存在统计学显著的${direction}。`;
|
||||
}
|
||||
return '两变量之间不存在统计学显著的线性相关。';
|
||||
}
|
||||
|
||||
private interpretLogistic(data: any): string {
|
||||
const coeffs = data.coefficients || [];
|
||||
const sigCount = coeffs.filter((c: any) => c.significant && c.variable !== '(Intercept)').length;
|
||||
const totalCount = coeffs.filter((c: any) => c.variable !== '(Intercept)').length;
|
||||
|
||||
return `在纳入的 ${totalCount} 个自变量中,${sigCount} 个对结局变量具有独立的统计学显著效应。`;
|
||||
}
|
||||
}
|
||||
|
||||
// 单例导出
|
||||
export const conclusionGeneratorService = new ConclusionGeneratorService();
|
||||
353
backend/src/modules/ssa/services/DataProfileService.ts
Normal file
353
backend/src/modules/ssa/services/DataProfileService.ts
Normal file
@@ -0,0 +1,353 @@
|
||||
/**
|
||||
* SSA DataProfile 服务 (Phase 2A)
|
||||
*
|
||||
* 调用 Python Tool C 生成数据画像,用于 LLM 生成分析计划
|
||||
*
|
||||
* 时机:用户上传数据时(时机 A)
|
||||
* 输出:DataProfile JSON,存入 SsaSession.dataProfile
|
||||
*/
|
||||
|
||||
import axios, { AxiosInstance } from 'axios';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { storage } from '../../../common/storage/index.js';
|
||||
|
||||
export interface DataProfile {
|
||||
columns: ColumnProfile[];
|
||||
summary: DataSummary;
|
||||
}
|
||||
|
||||
export interface ColumnProfile {
|
||||
name: string;
|
||||
type: 'numeric' | 'categorical' | 'datetime' | 'text';
|
||||
missingCount: number;
|
||||
missingRate: number;
|
||||
uniqueCount: number;
|
||||
totalCount: number;
|
||||
// 数值列
|
||||
mean?: number;
|
||||
std?: number;
|
||||
median?: number;
|
||||
min?: number;
|
||||
max?: number;
|
||||
q1?: number;
|
||||
q3?: number;
|
||||
iqr?: number;
|
||||
outlierCount?: number;
|
||||
outlierRate?: number;
|
||||
skewness?: number;
|
||||
kurtosis?: number;
|
||||
// 分类列
|
||||
topValues?: Array<{ value: string; count: number; percentage: number }>;
|
||||
totalLevels?: number;
|
||||
modeValue?: string;
|
||||
modeCount?: number;
|
||||
// 日期列
|
||||
minDate?: string;
|
||||
maxDate?: string;
|
||||
dateRange?: string;
|
||||
}
|
||||
|
||||
export interface DataSummary {
|
||||
totalRows: number;
|
||||
totalColumns: number;
|
||||
numericColumns: number;
|
||||
categoricalColumns: number;
|
||||
datetimeColumns: number;
|
||||
textColumns: number;
|
||||
overallMissingRate: number;
|
||||
totalMissingCells: number;
|
||||
}
|
||||
|
||||
export interface QualityScore {
|
||||
score: number;
|
||||
grade: 'A' | 'B' | 'C' | 'D';
|
||||
gradeDescription: string;
|
||||
issues: string[];
|
||||
recommendations: string[];
|
||||
}
|
||||
|
||||
export interface DataProfileResult {
|
||||
success: boolean;
|
||||
profile?: DataProfile;
|
||||
quality?: QualityScore;
|
||||
executionTime?: number;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class DataProfileService {
|
||||
private client: AxiosInstance;
|
||||
|
||||
constructor() {
|
||||
const baseURL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
|
||||
|
||||
this.client = axios.create({
|
||||
baseURL,
|
||||
timeout: 60000,
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 为 SSA Session 生成数据画像
|
||||
*
|
||||
* @param sessionId SSA 会话 ID
|
||||
* @param data 数据数组(JSON 格式)
|
||||
* @returns DataProfile 结果
|
||||
*/
|
||||
async generateProfile(sessionId: string, data: Record<string, any>[]): Promise<DataProfileResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
logger.info('[SSA:DataProfile] Generating profile', {
|
||||
sessionId,
|
||||
rowCount: data.length,
|
||||
columnCount: data.length > 0 ? Object.keys(data[0]).length : 0
|
||||
});
|
||||
|
||||
const response = await this.client.post('/api/ssa/data-profile', {
|
||||
data,
|
||||
max_unique_values: 20,
|
||||
include_quality_score: true
|
||||
});
|
||||
|
||||
if (!response.data.success) {
|
||||
throw new Error(response.data.error || 'Profile generation failed');
|
||||
}
|
||||
|
||||
const result: DataProfileResult = {
|
||||
success: true,
|
||||
profile: response.data.profile,
|
||||
quality: response.data.quality,
|
||||
executionTime: response.data.execution_time
|
||||
};
|
||||
|
||||
// 保存到数据库
|
||||
await this.saveProfileToSession(sessionId, result);
|
||||
|
||||
const executionMs = Date.now() - startTime;
|
||||
logger.info('[SSA:DataProfile] Profile generated successfully', {
|
||||
sessionId,
|
||||
executionMs,
|
||||
summary: result.profile?.summary
|
||||
});
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error: any) {
|
||||
const executionMs = Date.now() - startTime;
|
||||
logger.error('[SSA:DataProfile] Profile generation failed', {
|
||||
sessionId,
|
||||
error: error.message,
|
||||
executionMs
|
||||
});
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error.message,
|
||||
executionTime: executionMs / 1000
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 CSV 内容直接生成画像(让 Python pandas 解析 CSV)
|
||||
*
|
||||
* @param sessionId SSA 会话 ID
|
||||
* @param csvContent CSV 文件内容
|
||||
* @returns DataProfile 结果
|
||||
*/
|
||||
async generateProfileFromCSV(sessionId: string, csvContent: string): Promise<DataProfileResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
logger.info('[SSA:DataProfile] Generating profile from CSV', {
|
||||
sessionId,
|
||||
contentLength: csvContent.length
|
||||
});
|
||||
|
||||
// 直接发送 CSV 内容给 Python 服务,让 pandas 解析
|
||||
const response = await this.client.post('/api/ssa/data-profile-csv', {
|
||||
csv_content: csvContent,
|
||||
max_unique_values: 20,
|
||||
include_quality_score: true
|
||||
});
|
||||
|
||||
if (!response.data.success) {
|
||||
throw new Error(response.data.error || 'Profile generation failed');
|
||||
}
|
||||
|
||||
const result: DataProfileResult = {
|
||||
success: true,
|
||||
profile: response.data.profile,
|
||||
quality: response.data.quality,
|
||||
executionTime: response.data.execution_time
|
||||
};
|
||||
|
||||
// 保存到数据库
|
||||
await this.saveProfileToSession(sessionId, result);
|
||||
|
||||
const executionMs = Date.now() - startTime;
|
||||
logger.info('[SSA:DataProfile] Profile generated from CSV successfully', {
|
||||
sessionId,
|
||||
executionMs,
|
||||
summary: result.profile?.summary
|
||||
});
|
||||
|
||||
return result;
|
||||
|
||||
} catch (error: any) {
|
||||
const executionMs = Date.now() - startTime;
|
||||
logger.error('[SSA:DataProfile] CSV profile generation failed', {
|
||||
sessionId,
|
||||
error: error.message,
|
||||
executionMs
|
||||
});
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error.message,
|
||||
executionTime: executionMs / 1000
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 OSS 加载数据并生成画像
|
||||
*
|
||||
* @param sessionId SSA 会话 ID
|
||||
* @returns DataProfile 结果
|
||||
*/
|
||||
async generateProfileFromSession(sessionId: string): Promise<DataProfileResult> {
|
||||
try {
|
||||
const session = await prisma.ssaSession.findUnique({
|
||||
where: { id: sessionId }
|
||||
});
|
||||
|
||||
if (!session) {
|
||||
throw new Error(`Session not found: ${sessionId}`);
|
||||
}
|
||||
|
||||
// 如果已有画像,直接返回
|
||||
if (session.dataProfile) {
|
||||
logger.info('[SSA:DataProfile] Using cached profile', { sessionId });
|
||||
return {
|
||||
success: true,
|
||||
profile: session.dataProfile as unknown as DataProfile
|
||||
};
|
||||
}
|
||||
|
||||
// 从 dataPayload 或 OSS 加载数据
|
||||
if (session.dataPayload) {
|
||||
// JSON 格式数据,直接调用原方法
|
||||
const data = session.dataPayload as unknown as Record<string, any>[];
|
||||
return await this.generateProfile(sessionId, data);
|
||||
} else if (session.dataOssKey) {
|
||||
// 从 OSS 下载文件
|
||||
const buffer = await storage.download(session.dataOssKey);
|
||||
const content = buffer.toString('utf-8');
|
||||
|
||||
// 检测文件格式:JSON 或 CSV
|
||||
const trimmedContent = content.trim();
|
||||
if (trimmedContent.startsWith('[') || trimmedContent.startsWith('{')) {
|
||||
// JSON 格式
|
||||
const data = JSON.parse(content);
|
||||
return await this.generateProfile(sessionId, data);
|
||||
} else {
|
||||
// CSV 格式,直接发给 Python 解析(更高效、更可靠)
|
||||
return await this.generateProfileFromCSV(sessionId, content);
|
||||
}
|
||||
} else {
|
||||
throw new Error('No data available for session');
|
||||
}
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:DataProfile] Failed to generate profile from session', {
|
||||
sessionId,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存画像到 Session
|
||||
*/
|
||||
private async saveProfileToSession(sessionId: string, result: DataProfileResult): Promise<void> {
|
||||
try {
|
||||
await prisma.ssaSession.update({
|
||||
where: { id: sessionId },
|
||||
data: {
|
||||
dataProfile: result.profile as any
|
||||
}
|
||||
});
|
||||
|
||||
logger.info('[SSA:DataProfile] Profile saved to session', { sessionId });
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:DataProfile] Failed to save profile', {
|
||||
sessionId,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取已缓存的画像
|
||||
*/
|
||||
async getCachedProfile(sessionId: string): Promise<DataProfile | null> {
|
||||
const session = await prisma.ssaSession.findUnique({
|
||||
where: { id: sessionId },
|
||||
select: { dataProfile: true }
|
||||
});
|
||||
|
||||
return session?.dataProfile as unknown as DataProfile | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 为 LLM 生成精简版画像摘要
|
||||
* 用于 Prompt 注入,控制 Token 消耗
|
||||
*/
|
||||
generateProfileSummaryForLLM(profile: DataProfile): string {
|
||||
const { summary, columns } = profile;
|
||||
|
||||
const lines: string[] = [
|
||||
`## 数据概况`,
|
||||
`- 样本量: ${summary.totalRows} 行`,
|
||||
`- 变量数: ${summary.totalColumns} 列 (${summary.numericColumns} 数值, ${summary.categoricalColumns} 分类)`,
|
||||
`- 整体缺失率: ${summary.overallMissingRate}%`,
|
||||
'',
|
||||
`## 变量清单`
|
||||
];
|
||||
|
||||
for (const col of columns) {
|
||||
let desc = `- **${col.name}** [${col.type}]`;
|
||||
|
||||
if (col.missingRate > 0) {
|
||||
desc += ` (缺失 ${col.missingRate}%)`;
|
||||
}
|
||||
|
||||
if (col.type === 'numeric') {
|
||||
desc += `: 均值=${col.mean}, SD=${col.std}, 范围=[${col.min}, ${col.max}]`;
|
||||
if (col.outlierCount && col.outlierCount > 0) {
|
||||
desc += `, ${col.outlierCount}个异常值`;
|
||||
}
|
||||
} else if (col.type === 'categorical') {
|
||||
const levels = col.topValues?.slice(0, 5).map(v => v.value).join(', ');
|
||||
desc += `: ${col.totalLevels}个水平 (${levels}${col.totalLevels && col.totalLevels > 5 ? '...' : ''})`;
|
||||
}
|
||||
|
||||
lines.push(desc);
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
}
|
||||
|
||||
// 单例导出
|
||||
export const dataProfileService = new DataProfileService();
|
||||
521
backend/src/modules/ssa/services/WorkflowExecutorService.ts
Normal file
521
backend/src/modules/ssa/services/WorkflowExecutorService.ts
Normal file
@@ -0,0 +1,521 @@
|
||||
/**
|
||||
* SSA Workflow Executor Service (Phase 2A)
|
||||
*
|
||||
* 流程执行器:串联执行多个统计工具
|
||||
*
|
||||
* 功能:
|
||||
* - 按顺序执行工作流步骤
|
||||
* - JIT 护栏检查(执行前)
|
||||
* - 结果在步骤间传递
|
||||
* - 容错管道:支持部分成功
|
||||
* - SSE 实时进度推送
|
||||
*/
|
||||
|
||||
import { EventEmitter } from 'events';
|
||||
import axios, { AxiosInstance } from 'axios';
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { storage } from '../../../common/storage/index.js';
|
||||
import { WorkflowStep, ToolCode, AVAILABLE_TOOLS } from './WorkflowPlannerService.js';
|
||||
import { conclusionGeneratorService, ConclusionReport } from './ConclusionGeneratorService.js';
|
||||
|
||||
// 步骤执行结果
|
||||
export interface StepResult {
|
||||
stepOrder: number;
|
||||
toolCode: string;
|
||||
toolName: string;
|
||||
status: 'success' | 'warning' | 'error' | 'skipped';
|
||||
result?: any;
|
||||
guardrailChecks?: GuardrailCheck[];
|
||||
error?: {
|
||||
code: string;
|
||||
message: string;
|
||||
userHint: string;
|
||||
};
|
||||
executionMs: number;
|
||||
}
|
||||
|
||||
// 护栏检查结果
|
||||
export interface GuardrailCheck {
|
||||
checkName: string;
|
||||
passed: boolean;
|
||||
pValue?: number;
|
||||
recommendation: string;
|
||||
}
|
||||
|
||||
// SSE 消息格式
|
||||
export interface SSEMessage {
|
||||
type: 'step_start' | 'step_progress' | 'step_complete' | 'step_error' | 'workflow_complete';
|
||||
step: number;
|
||||
total_steps?: number;
|
||||
toolCode: string;
|
||||
toolName: string;
|
||||
status: 'running' | 'success' | 'error' | 'skipped' | 'warning';
|
||||
message: string;
|
||||
progress?: number;
|
||||
durationMs?: number;
|
||||
result?: any;
|
||||
error?: {
|
||||
code: string;
|
||||
message: string;
|
||||
userHint: string;
|
||||
};
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
// 工作流执行结果
|
||||
export interface WorkflowExecutionResult {
|
||||
workflowId: string;
|
||||
status: 'completed' | 'partial' | 'error';
|
||||
totalSteps: number;
|
||||
completedSteps: number;
|
||||
successSteps: number;
|
||||
results: StepResult[];
|
||||
conclusion?: ConclusionReport;
|
||||
executionMs: number;
|
||||
}
|
||||
|
||||
export class WorkflowExecutorService extends EventEmitter {
|
||||
private rClient: AxiosInstance;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
const rServiceUrl = process.env.R_SERVICE_URL || 'http://localhost:8082';
|
||||
|
||||
this.rClient = axios.create({
|
||||
baseURL: rServiceUrl,
|
||||
timeout: 120000,
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行工作流
|
||||
*
|
||||
* @param workflowId 工作流 ID
|
||||
* @param sessionId 会话 ID
|
||||
* @returns 执行结果
|
||||
*/
|
||||
async executeWorkflow(workflowId: string, sessionId: string): Promise<WorkflowExecutionResult> {
|
||||
const startTime = Date.now();
|
||||
const results: StepResult[] = [];
|
||||
|
||||
logger.info('[SSA:Executor] Starting workflow execution', { workflowId, sessionId });
|
||||
|
||||
try {
|
||||
// 获取工作流和步骤
|
||||
const workflow = await prisma.ssaWorkflow.findUnique({
|
||||
where: { id: workflowId },
|
||||
include: { steps: { orderBy: { stepOrder: 'asc' } } }
|
||||
});
|
||||
|
||||
if (!workflow) {
|
||||
throw new Error(`Workflow not found: ${workflowId}`);
|
||||
}
|
||||
|
||||
// 获取会话数据
|
||||
const session = await prisma.ssaSession.findUnique({
|
||||
where: { id: sessionId }
|
||||
});
|
||||
|
||||
if (!session) {
|
||||
throw new Error(`Session not found: ${sessionId}`);
|
||||
}
|
||||
|
||||
// 更新工作流状态
|
||||
await prisma.ssaWorkflow.update({
|
||||
where: { id: workflowId },
|
||||
data: {
|
||||
status: 'running',
|
||||
startedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
// 准备数据源
|
||||
const dataSource = await this.prepareDataSource(session);
|
||||
|
||||
// 逐步执行
|
||||
let successCount = 0;
|
||||
let previousResults: any = null;
|
||||
|
||||
for (const step of workflow.steps) {
|
||||
const stepResult = await this.executeStep(
|
||||
step,
|
||||
session,
|
||||
dataSource,
|
||||
previousResults
|
||||
);
|
||||
|
||||
results.push(stepResult);
|
||||
|
||||
// 更新步骤状态
|
||||
await prisma.ssaWorkflowStep.update({
|
||||
where: { id: step.id },
|
||||
data: {
|
||||
status: stepResult.status,
|
||||
outputResult: stepResult.result,
|
||||
guardrailChecks: stepResult.guardrailChecks as any,
|
||||
errorInfo: stepResult.error as any,
|
||||
executionMs: stepResult.executionMs,
|
||||
completedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
// 更新工作流进度
|
||||
await prisma.ssaWorkflow.update({
|
||||
where: { id: workflowId },
|
||||
data: { completedSteps: { increment: 1 } }
|
||||
});
|
||||
|
||||
if (stepResult.status === 'success' || stepResult.status === 'warning') {
|
||||
successCount++;
|
||||
previousResults = stepResult.result;
|
||||
}
|
||||
|
||||
// 发送 SSE 消息
|
||||
this.emitProgress({
|
||||
type: stepResult.status === 'error' ? 'step_error' : 'step_complete',
|
||||
step: step.stepOrder,
|
||||
total_steps: workflow.steps.length,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: stepResult.status,
|
||||
message: stepResult.status === 'error'
|
||||
? `${step.toolName} 执行失败: ${stepResult.error?.message}`
|
||||
: `${step.toolName} 执行完成`,
|
||||
result: stepResult.result,
|
||||
durationMs: stepResult.executionMs,
|
||||
error: stepResult.error,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
|
||||
// 如果是关键错误,决定是否继续
|
||||
if (stepResult.status === 'error' && this.isCriticalStep(step.stepOrder, workflow.steps.length)) {
|
||||
logger.warn('[SSA:Executor] Critical step failed, stopping workflow', {
|
||||
workflowId,
|
||||
step: step.stepOrder
|
||||
});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 确定最终状态
|
||||
const executionMs = Date.now() - startTime;
|
||||
let finalStatus: 'completed' | 'partial' | 'error' = 'completed';
|
||||
|
||||
if (successCount === 0) {
|
||||
finalStatus = 'error';
|
||||
} else if (successCount < workflow.steps.length) {
|
||||
finalStatus = 'partial';
|
||||
}
|
||||
|
||||
// 更新工作流最终状态
|
||||
await prisma.ssaWorkflow.update({
|
||||
where: { id: workflowId },
|
||||
data: {
|
||||
status: finalStatus,
|
||||
completedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
// 发送完成消息
|
||||
this.emitProgress({
|
||||
type: 'workflow_complete',
|
||||
step: workflow.steps.length,
|
||||
toolCode: '',
|
||||
toolName: '',
|
||||
status: finalStatus === 'completed' ? 'success' : finalStatus === 'partial' ? 'warning' : 'error',
|
||||
message: finalStatus === 'completed'
|
||||
? `分析流程执行完成,共 ${successCount} 个步骤`
|
||||
: `分析流程部分完成,${successCount}/${workflow.steps.length} 个步骤成功`,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
|
||||
// 生成综合结论
|
||||
let conclusion: ConclusionReport | undefined;
|
||||
if (successCount > 0) {
|
||||
const workflowPlan = workflow.workflowPlan as any;
|
||||
conclusion = conclusionGeneratorService.generateConclusion(
|
||||
results,
|
||||
workflowPlan?.goal || '统计分析'
|
||||
);
|
||||
}
|
||||
|
||||
logger.info('[SSA:Executor] Workflow execution finished', {
|
||||
workflowId,
|
||||
status: finalStatus,
|
||||
successCount,
|
||||
totalSteps: workflow.steps.length,
|
||||
executionMs,
|
||||
hasConclusion: !!conclusion
|
||||
});
|
||||
|
||||
return {
|
||||
workflowId,
|
||||
status: finalStatus,
|
||||
totalSteps: workflow.steps.length,
|
||||
completedSteps: results.length,
|
||||
successSteps: successCount,
|
||||
results,
|
||||
conclusion,
|
||||
executionMs
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error('[SSA:Executor] Workflow execution failed', {
|
||||
workflowId,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
await prisma.ssaWorkflow.update({
|
||||
where: { id: workflowId },
|
||||
data: { status: 'error' }
|
||||
});
|
||||
|
||||
return {
|
||||
workflowId,
|
||||
status: 'error',
|
||||
totalSteps: 0,
|
||||
completedSteps: 0,
|
||||
successSteps: 0,
|
||||
results,
|
||||
executionMs: Date.now() - startTime
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行单个步骤
|
||||
*/
|
||||
private async executeStep(
|
||||
step: any,
|
||||
session: any,
|
||||
dataSource: any,
|
||||
previousResults: any
|
||||
): Promise<StepResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 发送开始消息
|
||||
this.emitProgress({
|
||||
type: 'step_start',
|
||||
step: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: 'running',
|
||||
message: `正在执行 ${step.toolName}...`,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
|
||||
// 更新步骤状态
|
||||
await prisma.ssaWorkflowStep.update({
|
||||
where: { id: step.id },
|
||||
data: {
|
||||
status: 'running',
|
||||
startedAt: new Date()
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
// JIT 护栏检查
|
||||
let guardrailChecks: GuardrailCheck[] | undefined;
|
||||
|
||||
if (this.needsGuardrailCheck(step.toolCode)) {
|
||||
this.emitProgress({
|
||||
type: 'step_progress',
|
||||
step: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: 'running',
|
||||
message: '正在执行统计假设检验(JIT护栏)...',
|
||||
progress: 30,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
|
||||
guardrailChecks = await this.runJITGuardrails(dataSource, step.toolCode, step.inputParams);
|
||||
}
|
||||
|
||||
// 发送进度
|
||||
this.emitProgress({
|
||||
type: 'step_progress',
|
||||
step: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: 'running',
|
||||
message: `正在执行 ${step.toolName}...`,
|
||||
progress: 60,
|
||||
timestamp: new Date().toISOString()
|
||||
});
|
||||
|
||||
// 调用 R 服务
|
||||
const response = await this.rClient.post(`/api/v1/skills/${step.toolCode}`, {
|
||||
data_source: dataSource,
|
||||
params: step.inputParams,
|
||||
original_filename: session.title || 'data.csv',
|
||||
guardrails: {
|
||||
check_normality: true,
|
||||
auto_fix: true
|
||||
}
|
||||
});
|
||||
|
||||
const executionMs = Date.now() - startTime;
|
||||
|
||||
if (response.data.status === 'error' || response.data.status === 'blocked') {
|
||||
return {
|
||||
stepOrder: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: 'error',
|
||||
guardrailChecks,
|
||||
error: {
|
||||
code: response.data.error_code || 'E100',
|
||||
message: response.data.message || '执行失败',
|
||||
userHint: response.data.user_hint || '请检查数据和参数'
|
||||
},
|
||||
executionMs
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
stepOrder: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: response.data.warnings?.length > 0 ? 'warning' : 'success',
|
||||
result: {
|
||||
...response.data.results,
|
||||
plots: response.data.plots,
|
||||
result_table: response.data.result_table,
|
||||
reproducible_code: response.data.reproducible_code,
|
||||
trace_log: response.data.trace_log,
|
||||
warnings: response.data.warnings,
|
||||
},
|
||||
guardrailChecks,
|
||||
executionMs
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
const executionMs = Date.now() - startTime;
|
||||
|
||||
logger.error('[SSA:Executor] Step execution failed', {
|
||||
step: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
error: error.message
|
||||
});
|
||||
|
||||
return {
|
||||
stepOrder: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: 'error',
|
||||
error: {
|
||||
code: 'E100',
|
||||
message: error.message,
|
||||
userHint: '执行过程中发生错误,请重试'
|
||||
},
|
||||
executionMs
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* JIT 护栏检查
|
||||
*/
|
||||
private async runJITGuardrails(
|
||||
dataSource: any,
|
||||
toolCode: string,
|
||||
params: any
|
||||
): Promise<GuardrailCheck[]> {
|
||||
try {
|
||||
const response = await this.rClient.post('/api/v1/guardrails/jit', {
|
||||
data_source: dataSource,
|
||||
tool_code: toolCode,
|
||||
params
|
||||
});
|
||||
|
||||
if (response.data.status === 'success') {
|
||||
return response.data.checks || [];
|
||||
}
|
||||
} catch (error: any) {
|
||||
logger.warn('[SSA:Executor] JIT guardrail check failed', {
|
||||
toolCode,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否需要护栏检查
|
||||
*/
|
||||
private needsGuardrailCheck(toolCode: string): boolean {
|
||||
const toolsNeedingGuardrails = [
|
||||
'ST_T_TEST_IND',
|
||||
'ST_T_TEST_PAIRED',
|
||||
'ST_CORRELATION'
|
||||
];
|
||||
return toolsNeedingGuardrails.includes(toolCode);
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断是否是关键步骤
|
||||
*/
|
||||
private isCriticalStep(stepOrder: number, totalSteps: number): boolean {
|
||||
// 第一步(描述统计)失败才算关键错误
|
||||
return stepOrder === 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* 准备数据源
|
||||
*/
|
||||
private async prepareDataSource(session: any): Promise<any> {
|
||||
if (session.dataPayload) {
|
||||
return {
|
||||
type: 'inline',
|
||||
data: session.dataPayload
|
||||
};
|
||||
} else if (session.dataOssKey) {
|
||||
const signedUrl = await storage.getUrl(session.dataOssKey, 3600);
|
||||
return {
|
||||
type: 'oss',
|
||||
oss_url: signedUrl
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error('No data source available');
|
||||
}
|
||||
|
||||
/**
|
||||
* 发送进度消息
|
||||
*/
|
||||
private emitProgress(message: SSEMessage): void {
|
||||
this.emit('progress', message);
|
||||
|
||||
logger.debug('[SSA:Executor] Progress emitted', {
|
||||
type: message.type,
|
||||
step: message.step,
|
||||
status: message.status
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取工作流执行状态
|
||||
*/
|
||||
async getWorkflowStatus(workflowId: string): Promise<any> {
|
||||
const workflow = await prisma.ssaWorkflow.findUnique({
|
||||
where: { id: workflowId },
|
||||
include: {
|
||||
steps: {
|
||||
orderBy: { stepOrder: 'asc' }
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return workflow;
|
||||
}
|
||||
}
|
||||
|
||||
// 单例导出
|
||||
export const workflowExecutorService = new WorkflowExecutorService();
|
||||
603
backend/src/modules/ssa/services/WorkflowPlannerService.ts
Normal file
603
backend/src/modules/ssa/services/WorkflowPlannerService.ts
Normal file
@@ -0,0 +1,603 @@
|
||||
/**
|
||||
* SSA Workflow Planner Service (Phase 2A)
|
||||
*
|
||||
* 路径规划器:LLM 驱动的多工具流程规划
|
||||
*
|
||||
* 功能:
|
||||
* - 理解用户意图 + 数据特征
|
||||
* - 规划 2-7 步分析流程
|
||||
* - 选择合适的统计工具组合
|
||||
*/
|
||||
|
||||
import { logger } from '../../../common/logging/index.js';
|
||||
import { prisma } from '../../../config/database.js';
|
||||
import { DataProfile, dataProfileService } from './DataProfileService.js';
|
||||
|
||||
// 可用工具定义
|
||||
export const AVAILABLE_TOOLS = {
|
||||
ST_DESCRIPTIVE: {
|
||||
code: 'ST_DESCRIPTIVE',
|
||||
name: '描述性统计',
|
||||
category: 'basic',
|
||||
description: '数据概况、基线特征表',
|
||||
inputParams: ['variables', 'group_var?'],
|
||||
outputType: 'summary'
|
||||
},
|
||||
ST_T_TEST_IND: {
|
||||
code: 'ST_T_TEST_IND',
|
||||
name: '独立样本T检验',
|
||||
category: 'parametric',
|
||||
description: '两组连续变量比较(参数方法)',
|
||||
inputParams: ['group_var', 'value_var'],
|
||||
outputType: 'comparison',
|
||||
prerequisite: '正态分布',
|
||||
fallback: 'ST_MANN_WHITNEY'
|
||||
},
|
||||
ST_MANN_WHITNEY: {
|
||||
code: 'ST_MANN_WHITNEY',
|
||||
name: 'Mann-Whitney U检验',
|
||||
category: 'nonparametric',
|
||||
description: '两组连续/等级变量比较(非参数方法)',
|
||||
inputParams: ['group_var', 'value_var'],
|
||||
outputType: 'comparison'
|
||||
},
|
||||
ST_T_TEST_PAIRED: {
|
||||
code: 'ST_T_TEST_PAIRED',
|
||||
name: '配对T检验',
|
||||
category: 'parametric',
|
||||
description: '配对设计的前后对比',
|
||||
inputParams: ['before_var', 'after_var'],
|
||||
outputType: 'comparison'
|
||||
},
|
||||
ST_CHI_SQUARE: {
|
||||
code: 'ST_CHI_SQUARE',
|
||||
name: '卡方检验',
|
||||
category: 'categorical',
|
||||
description: '两个分类变量的独立性检验',
|
||||
inputParams: ['var1', 'var2'],
|
||||
outputType: 'association'
|
||||
},
|
||||
ST_CORRELATION: {
|
||||
code: 'ST_CORRELATION',
|
||||
name: '相关分析',
|
||||
category: 'correlation',
|
||||
description: 'Pearson/Spearman相关系数',
|
||||
inputParams: ['var_x', 'var_y', 'method?'],
|
||||
outputType: 'correlation'
|
||||
},
|
||||
ST_LOGISTIC_BINARY: {
|
||||
code: 'ST_LOGISTIC_BINARY',
|
||||
name: '二元Logistic回归',
|
||||
category: 'regression',
|
||||
description: '二分类结局的多因素分析',
|
||||
inputParams: ['outcome_var', 'predictors', 'confounders?'],
|
||||
outputType: 'regression'
|
||||
}
|
||||
} as const;
|
||||
|
||||
export type ToolCode = keyof typeof AVAILABLE_TOOLS;
|
||||
|
||||
// 工作流步骤
|
||||
export interface WorkflowStep {
|
||||
stepOrder: number;
|
||||
toolCode: ToolCode;
|
||||
toolName: string;
|
||||
inputParams: Record<string, any>;
|
||||
purpose: string;
|
||||
dependsOn?: number[];
|
||||
}
|
||||
|
||||
// 工作流计划(内部使用)
|
||||
export interface WorkflowPlanInternal {
|
||||
goal: string;
|
||||
reasoning: string;
|
||||
steps: WorkflowStep[];
|
||||
estimatedDuration: string;
|
||||
}
|
||||
|
||||
// 工作流计划(API 返回格式,与前端类型匹配)
|
||||
export interface WorkflowPlan {
|
||||
workflow_id: string;
|
||||
session_id: string;
|
||||
title: string;
|
||||
description: string;
|
||||
total_steps: number;
|
||||
steps: Array<{
|
||||
step_number: number;
|
||||
tool_code: string;
|
||||
tool_name: string;
|
||||
description: string;
|
||||
params: Record<string, unknown>;
|
||||
depends_on?: number[];
|
||||
}>;
|
||||
estimated_time_seconds?: number;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
// 用户意图解析结果
|
||||
export interface ParsedIntent {
|
||||
goal: string;
|
||||
analysisType: 'comparison' | 'correlation' | 'regression' | 'descriptive' | 'mixed';
|
||||
variables: {
|
||||
mentioned?: string[]; // 用户在查询中提到的变量
|
||||
outcome?: string; // 结局变量
|
||||
predictors?: string[]; // 预测变量/自变量
|
||||
grouping?: string; // 分组变量
|
||||
continuous?: string[]; // 所有连续变量
|
||||
categorical?: string[]; // 所有分类变量
|
||||
};
|
||||
design?: 'independent' | 'paired' | 'longitudinal';
|
||||
}
|
||||
|
||||
export class WorkflowPlannerService {
|
||||
|
||||
/**
|
||||
* 生成多步骤工作流计划
|
||||
*
|
||||
* @param sessionId 会话 ID
|
||||
* @param userQuery 用户的分析请求
|
||||
* @param profile 数据画像(可选,如果不传会自动获取)
|
||||
*/
|
||||
async planWorkflow(
|
||||
sessionId: string,
|
||||
userQuery: string,
|
||||
profile?: DataProfile
|
||||
): Promise<WorkflowPlan> {
|
||||
|
||||
logger.info('[SSA:Planner] Planning workflow', { sessionId, userQuery });
|
||||
|
||||
// 获取数据画像
|
||||
if (!profile) {
|
||||
profile = await dataProfileService.getCachedProfile(sessionId) || undefined;
|
||||
}
|
||||
|
||||
// 解析用户意图
|
||||
const intent = this.parseUserIntent(userQuery, profile);
|
||||
|
||||
// 根据意图生成工作流
|
||||
const steps = this.generateSteps(intent, profile);
|
||||
|
||||
// 构建内部计划
|
||||
const internalPlan: WorkflowPlanInternal = {
|
||||
goal: intent.goal,
|
||||
reasoning: this.generateReasoning(intent, steps),
|
||||
steps,
|
||||
estimatedDuration: this.estimateDuration(steps)
|
||||
};
|
||||
|
||||
// 保存到数据库
|
||||
const workflowId = await this.saveWorkflow(sessionId, internalPlan);
|
||||
|
||||
logger.info('[SSA:Planner] Workflow planned', {
|
||||
sessionId,
|
||||
stepCount: steps.length,
|
||||
tools: steps.map(s => s.toolCode)
|
||||
});
|
||||
|
||||
// 转换为前端期望的格式
|
||||
const plan: WorkflowPlan = {
|
||||
workflow_id: workflowId,
|
||||
session_id: sessionId,
|
||||
title: intent.goal,
|
||||
description: internalPlan.reasoning,
|
||||
total_steps: steps.length,
|
||||
steps: steps.map(s => ({
|
||||
step_number: s.stepOrder,
|
||||
tool_code: s.toolCode,
|
||||
tool_name: s.toolName,
|
||||
description: s.purpose,
|
||||
params: s.inputParams,
|
||||
depends_on: s.dependsOn
|
||||
})),
|
||||
estimated_time_seconds: steps.length * 5,
|
||||
created_at: new Date().toISOString()
|
||||
};
|
||||
|
||||
return plan;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析用户意图(改进版:识别用户提到的变量并选择合适方法)
|
||||
*/
|
||||
private parseUserIntent(userQuery: string, profile?: DataProfile): ParsedIntent {
|
||||
const query = userQuery.toLowerCase();
|
||||
|
||||
// 基于关键词的意图识别
|
||||
let analysisType: ParsedIntent['analysisType'] = 'descriptive';
|
||||
let design: ParsedIntent['design'] = 'independent';
|
||||
|
||||
if (query.includes('比较') || query.includes('差异') || query.includes('不同')) {
|
||||
analysisType = 'comparison';
|
||||
} else if (query.includes('相关') || query.includes('关系') || query.includes('关联')) {
|
||||
analysisType = 'correlation';
|
||||
} else if (query.includes('影响') || query.includes('因素') || query.includes('预测') || query.includes('回归')) {
|
||||
analysisType = 'regression';
|
||||
}
|
||||
|
||||
if (query.includes('前后') || query.includes('配对') || query.includes('变化')) {
|
||||
design = 'paired';
|
||||
}
|
||||
|
||||
// 从用户查询中提取变量名
|
||||
const variables: ParsedIntent['variables'] = {
|
||||
mentioned: [], // 用户提到的变量
|
||||
outcome: undefined, // 结局变量
|
||||
predictors: [], // 预测变量/自变量
|
||||
continuous: [],
|
||||
categorical: []
|
||||
};
|
||||
|
||||
if (profile) {
|
||||
const allColumns = profile.columns.map(c => c.name);
|
||||
const numericCols = profile.columns.filter(c => c.type === 'numeric').map(c => c.name);
|
||||
const categoricalCols = profile.columns.filter(c => c.type === 'categorical').map(c => c.name);
|
||||
|
||||
variables.continuous = numericCols;
|
||||
variables.categorical = categoricalCols;
|
||||
|
||||
// 从查询中识别用户提到的变量名(不区分大小写)
|
||||
for (const col of allColumns) {
|
||||
if (query.includes(col.toLowerCase())) {
|
||||
variables.mentioned!.push(col);
|
||||
}
|
||||
}
|
||||
|
||||
// 尝试识别结局变量和预测变量
|
||||
// 规则:A对B的影响 / A与B的相关性 → B 是结局,A 是预测因素
|
||||
const influenceMatch = userQuery.match(/(.+?)(?:对|影响|预测)(.+?)(?:的|$)/);
|
||||
const correlationMatch = userQuery.match(/(.+?)(?:与|和|跟)(.+?)(?:的相关|的关系|的关联)/);
|
||||
|
||||
if (influenceMatch) {
|
||||
const predictorPart = influenceMatch[1];
|
||||
const outcomePart = influenceMatch[2];
|
||||
|
||||
// 找出结局变量
|
||||
for (const col of allColumns) {
|
||||
if (outcomePart.toLowerCase().includes(col.toLowerCase())) {
|
||||
variables.outcome = col;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// 找出预测变量
|
||||
for (const col of allColumns) {
|
||||
if (predictorPart.toLowerCase().includes(col.toLowerCase())) {
|
||||
variables.predictors!.push(col);
|
||||
}
|
||||
}
|
||||
} else if (correlationMatch) {
|
||||
const var1Part = correlationMatch[1];
|
||||
const var2Part = correlationMatch[2];
|
||||
|
||||
for (const col of allColumns) {
|
||||
if (var1Part.toLowerCase().includes(col.toLowerCase()) ||
|
||||
var2Part.toLowerCase().includes(col.toLowerCase())) {
|
||||
variables.mentioned!.push(col);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果有明确提到的变量但没有解析出结局/预测,使用提到的变量
|
||||
if (variables.mentioned!.length >= 2 && !variables.outcome) {
|
||||
// 最后一个通常是结局变量
|
||||
variables.outcome = variables.mentioned![variables.mentioned!.length - 1];
|
||||
variables.predictors = variables.mentioned!.slice(0, -1);
|
||||
}
|
||||
|
||||
// 尝试识别分组变量(二分类)
|
||||
const binaryCol = profile.columns.find(c => c.type === 'categorical' && c.totalLevels === 2);
|
||||
if (binaryCol) {
|
||||
variables.grouping = binaryCol.name;
|
||||
}
|
||||
|
||||
logger.info('[WorkflowPlanner] Parsed variables from query', {
|
||||
mentioned: variables.mentioned,
|
||||
outcome: variables.outcome,
|
||||
predictors: variables.predictors
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
goal: userQuery,
|
||||
analysisType,
|
||||
design,
|
||||
variables
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断变量是否为分类型
|
||||
*/
|
||||
private isVariableCategorical(varName: string, profile?: DataProfile): boolean {
|
||||
if (!profile) return false;
|
||||
const col = profile.columns.find(c => c.name.toLowerCase() === varName.toLowerCase());
|
||||
return col?.type === 'categorical';
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断变量是否为二分类
|
||||
*/
|
||||
private isVariableBinary(varName: string, profile?: DataProfile): boolean {
|
||||
if (!profile) return false;
|
||||
const col = profile.columns.find(c => c.name.toLowerCase() === varName.toLowerCase());
|
||||
return col?.type === 'categorical' && col.totalLevels === 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据意图生成工作流步骤(改进版:根据变量类型智能选择方法)
|
||||
*/
|
||||
private generateSteps(intent: ParsedIntent, profile?: DataProfile): WorkflowStep[] {
|
||||
const steps: WorkflowStep[] = [];
|
||||
let stepOrder = 1;
|
||||
|
||||
// 获取用户提到的变量
|
||||
const mentionedVars = intent.variables?.mentioned || [];
|
||||
const outcomeVar = intent.variables?.outcome;
|
||||
const predictorVars = intent.variables?.predictors || [];
|
||||
|
||||
// 第一步:总是先做描述性统计
|
||||
const descVars = mentionedVars.length > 0
|
||||
? mentionedVars
|
||||
: (intent.variables?.continuous || []).slice(0, 5);
|
||||
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_DESCRIPTIVE',
|
||||
toolName: AVAILABLE_TOOLS.ST_DESCRIPTIVE.name,
|
||||
inputParams: {
|
||||
variables: descVars,
|
||||
group_var: intent.variables?.grouping
|
||||
},
|
||||
purpose: '了解数据的基本特征和分布'
|
||||
});
|
||||
|
||||
// 根据分析类型和变量类型添加核心分析步骤
|
||||
switch (intent.analysisType) {
|
||||
case 'comparison':
|
||||
if (intent.design === 'paired') {
|
||||
// 配对设计
|
||||
if (intent.variables?.continuous && intent.variables.continuous.length >= 2) {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_T_TEST_PAIRED',
|
||||
toolName: AVAILABLE_TOOLS.ST_T_TEST_PAIRED.name,
|
||||
inputParams: {
|
||||
before_var: intent.variables.continuous[0],
|
||||
after_var: intent.variables.continuous[1]
|
||||
},
|
||||
purpose: '检验配对样本的均值差异',
|
||||
dependsOn: [1]
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// 独立样本设计
|
||||
if (intent.variables?.grouping && intent.variables?.continuous?.length) {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_T_TEST_IND',
|
||||
toolName: AVAILABLE_TOOLS.ST_T_TEST_IND.name,
|
||||
inputParams: {
|
||||
group_var: intent.variables.grouping,
|
||||
value_var: intent.variables.continuous[0]
|
||||
},
|
||||
purpose: '检验两组均值是否存在显著差异(正态时用T检验,否则自动降级为Mann-Whitney)',
|
||||
dependsOn: [1]
|
||||
});
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'correlation':
|
||||
// 根据变量类型选择相关性分析方法
|
||||
if (mentionedVars.length >= 2) {
|
||||
const var1 = mentionedVars[0];
|
||||
const var2 = mentionedVars[1];
|
||||
const var1IsCat = this.isVariableCategorical(var1, profile);
|
||||
const var2IsCat = this.isVariableCategorical(var2, profile);
|
||||
|
||||
if (var1IsCat && var2IsCat) {
|
||||
// 两个分类变量 → 卡方检验
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CHI_SQUARE',
|
||||
toolName: AVAILABLE_TOOLS.ST_CHI_SQUARE.name,
|
||||
inputParams: {
|
||||
var1: var1,
|
||||
var2: var2
|
||||
},
|
||||
purpose: `分析 ${var1} 与 ${var2} 两个分类变量的关联性`,
|
||||
dependsOn: [1]
|
||||
});
|
||||
} else if (!var1IsCat && !var2IsCat) {
|
||||
// 两个连续变量 → Pearson/Spearman 相关
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CORRELATION',
|
||||
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
|
||||
inputParams: {
|
||||
var_x: var1,
|
||||
var_y: var2,
|
||||
method: 'auto'
|
||||
},
|
||||
purpose: `分析 ${var1} 与 ${var2} 的相关性`,
|
||||
dependsOn: [1]
|
||||
});
|
||||
} else {
|
||||
// 一个分类一个连续 → T检验或点双列相关
|
||||
const catVar = var1IsCat ? var1 : var2;
|
||||
const contVar = var1IsCat ? var2 : var1;
|
||||
|
||||
if (this.isVariableBinary(catVar, profile)) {
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_T_TEST_IND',
|
||||
toolName: AVAILABLE_TOOLS.ST_T_TEST_IND.name,
|
||||
inputParams: {
|
||||
group_var: catVar,
|
||||
value_var: contVar
|
||||
},
|
||||
purpose: `比较 ${catVar} 不同组别下 ${contVar} 的差异(点双列相关的等价检验)`,
|
||||
dependsOn: [1]
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (intent.variables?.continuous && intent.variables.continuous.length >= 2) {
|
||||
// 没有明确提到变量,使用默认的连续变量
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CORRELATION',
|
||||
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
|
||||
inputParams: {
|
||||
var_x: intent.variables.continuous[0],
|
||||
var_y: intent.variables.continuous[1],
|
||||
method: 'auto'
|
||||
},
|
||||
purpose: '分析两个连续变量的相关性',
|
||||
dependsOn: [1]
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case 'regression':
|
||||
// 多因素分析 - 使用用户指定的结局变量和预测因素
|
||||
const regressionOutcome = outcomeVar || intent.variables?.grouping;
|
||||
const regressionPredictors = predictorVars.length > 0
|
||||
? predictorVars
|
||||
: intent.variables?.continuous?.slice(0, 5) || [];
|
||||
|
||||
if (regressionOutcome && regressionPredictors.length > 0) {
|
||||
// 判断结局变量类型
|
||||
const outcomeBinary = this.isVariableBinary(regressionOutcome, profile);
|
||||
const outcomeCat = this.isVariableCategorical(regressionOutcome, profile);
|
||||
|
||||
logger.info('[WorkflowPlanner] Regression analysis', {
|
||||
outcome: regressionOutcome,
|
||||
predictors: regressionPredictors,
|
||||
outcomeBinary,
|
||||
outcomeCat
|
||||
});
|
||||
|
||||
if (outcomeBinary || outcomeCat) {
|
||||
// 二分类/分类结局 → Logistic 回归
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_LOGISTIC_BINARY',
|
||||
toolName: AVAILABLE_TOOLS.ST_LOGISTIC_BINARY.name,
|
||||
inputParams: {
|
||||
outcome_var: regressionOutcome,
|
||||
predictors: regressionPredictors
|
||||
},
|
||||
purpose: `分析 ${regressionPredictors.join('、')} 对 ${regressionOutcome} 的影响(二元 Logistic 回归)`,
|
||||
dependsOn: [1]
|
||||
});
|
||||
} else {
|
||||
// 连续结局 → 暂时也使用 Logistic 回归(TODO: 添加线性回归工具)
|
||||
// 实际应该使用线性回归,但当前工具库暂未支持
|
||||
logger.warn('[WorkflowPlanner] Linear regression not yet implemented, falling back to descriptive stats');
|
||||
// 添加一个额外的描述性统计步骤作为替代
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_CORRELATION',
|
||||
toolName: AVAILABLE_TOOLS.ST_CORRELATION.name,
|
||||
inputParams: {
|
||||
var_x: regressionPredictors[0],
|
||||
var_y: regressionOutcome,
|
||||
method: 'auto'
|
||||
},
|
||||
purpose: `分析 ${regressionPredictors[0]} 与 ${regressionOutcome} 的相关性(线性回归待开发)`,
|
||||
dependsOn: [1]
|
||||
});
|
||||
}
|
||||
} else if (intent.variables?.grouping && intent.variables?.continuous?.length) {
|
||||
// 降级:使用默认的分组变量作为结局
|
||||
steps.push({
|
||||
stepOrder: stepOrder++,
|
||||
toolCode: 'ST_LOGISTIC_BINARY',
|
||||
toolName: AVAILABLE_TOOLS.ST_LOGISTIC_BINARY.name,
|
||||
inputParams: {
|
||||
outcome_var: intent.variables.grouping,
|
||||
predictors: intent.variables.continuous?.slice(0, 5) || []
|
||||
},
|
||||
purpose: '多因素分析:控制混杂后分析各因素的独立效应',
|
||||
dependsOn: [1]
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return steps;
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成规划理由说明
|
||||
*/
|
||||
private generateReasoning(intent: ParsedIntent, steps: WorkflowStep[]): string {
|
||||
const reasons: string[] = [];
|
||||
|
||||
reasons.push(`根据您的分析目标「${intent.goal}」,我为您规划了 ${steps.length} 步分析流程:`);
|
||||
|
||||
for (const step of steps) {
|
||||
reasons.push(`${step.stepOrder}. ${step.toolName}:${step.purpose}`);
|
||||
}
|
||||
|
||||
if (intent.analysisType === 'comparison') {
|
||||
reasons.push('\n说明:系统会自动进行正态性检验,如不满足正态性假设,将自动切换为非参数方法。');
|
||||
}
|
||||
|
||||
return reasons.join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算执行时长
|
||||
*/
|
||||
private estimateDuration(steps: WorkflowStep[]): string {
|
||||
const secondsPerStep = 5;
|
||||
const totalSeconds = steps.length * secondsPerStep;
|
||||
|
||||
if (totalSeconds < 60) {
|
||||
return `约 ${totalSeconds} 秒`;
|
||||
} else {
|
||||
return `约 ${Math.ceil(totalSeconds / 60)} 分钟`;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存工作流到数据库
|
||||
*/
|
||||
private async saveWorkflow(sessionId: string, plan: WorkflowPlanInternal): Promise<string> {
|
||||
const workflow = await prisma.ssaWorkflow.create({
|
||||
data: {
|
||||
sessionId,
|
||||
status: 'pending',
|
||||
totalSteps: plan.steps.length,
|
||||
completedSteps: 0,
|
||||
workflowPlan: plan as any,
|
||||
reasoning: plan.reasoning
|
||||
}
|
||||
});
|
||||
|
||||
// 创建步骤记录
|
||||
for (const step of plan.steps) {
|
||||
await prisma.ssaWorkflowStep.create({
|
||||
data: {
|
||||
workflowId: workflow.id,
|
||||
stepOrder: step.stepOrder,
|
||||
toolCode: step.toolCode,
|
||||
toolName: step.toolName,
|
||||
status: 'pending',
|
||||
inputParams: step.inputParams
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
logger.info('[SSA:Planner] Workflow saved', {
|
||||
sessionId,
|
||||
workflowId: workflow.id,
|
||||
stepCount: plan.steps.length
|
||||
});
|
||||
|
||||
return workflow.id;
|
||||
}
|
||||
}
|
||||
|
||||
// 单例导出
|
||||
export const workflowPlannerService = new WorkflowPlannerService();
|
||||
Reference in New Issue
Block a user