feat(ssa): Implement dual-channel architecture Phase 1-3 (QPER + LLM Agent pipeline)
Completed: - Phase 1: DB schema (execution_mode + ssa_agent_executions), ModeToggle component, Session PATCH API - Phase 2: AgentPlannerService + AgentCoderService (streaming) + CodeRunnerService + R Docker /execute-code endpoint - Phase 3: AgentCodePanel (3-step confirmation UI), SSE event handling (7 agent events), streaming code display - Three-step confirmation pipeline: plan -> user confirm -> stream code -> user confirm -> execute R code -> results - R Docker sandbox /execute-code endpoint with 120s timeout + block_helpers preloaded - E2E dual-channel test script (8 tests) - Updated R engine architecture doc (v1.5) and SSA module status doc (v4.0) Technical details: - AgentCoderService uses LLM streaming (chatStream) for real-time code generation feedback - ReviewerAgent temporarily disabled, prioritizing Plan -> Code -> Execute flow - CodeRunnerService wraps user code with auto data loading (df variable injection) - Frontend handles agent_planning, agent_plan_ready, code_generating, code_generated, code_executing, code_result events - ask_user mechanism used for plan and code confirmation steps Files: 24 files (4 new services, 2 new components, 1 migration, 1 E2E test, 16 modified) Made-with: Cursor
This commit is contained in:
444
backend/tests/e2e-dual-channel-test.ts
Normal file
444
backend/tests/e2e-dual-channel-test.ts
Normal file
@@ -0,0 +1,444 @@
|
||||
/**
|
||||
* SSA 双通道架构 E2E 测试
|
||||
*
|
||||
* 测试 Phase 1~3:
|
||||
* T1. 数据库迁移验证 — execution_mode 字段 + ssa_agent_executions 表
|
||||
* T2. Session execution mode 切换 API
|
||||
* T3. R Docker /execute-code 端点
|
||||
* T4. Agent 模式对话(自由对话 + 分析请求判断)
|
||||
* T5. AgentPlannerService 规划能力
|
||||
* T6. AgentCoderService 代码生成能力
|
||||
* T7. AgentReviewerService 审核能力
|
||||
* T8. ModeToggle 前端集成点验证(API 层面)
|
||||
*
|
||||
* 前置条件:
|
||||
* - PostgreSQL 运行中(Docker Desktop)
|
||||
* - R Docker 运行中(可选,T3 跳过如不可用)
|
||||
* - DeepSeek API key 配置在 .env
|
||||
* - 至少有一个 SSA session(有上传数据)
|
||||
*
|
||||
* 运行: npx tsx tests/e2e-dual-channel-test.ts
|
||||
*/
|
||||
|
||||
import { prisma } from '../src/config/database.js';
|
||||
import { logger } from '../src/common/logging/index.js';
|
||||
import axios from 'axios';
|
||||
|
||||
const R_SERVICE_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
|
||||
const BACKEND_URL = process.env.BACKEND_URL || 'http://localhost:3000';
|
||||
|
||||
interface TestResult {
|
||||
id: string;
|
||||
name: string;
|
||||
status: 'pass' | 'fail' | 'skip';
|
||||
duration: number;
|
||||
message?: string;
|
||||
}
|
||||
|
||||
const results: TestResult[] = [];
|
||||
|
||||
async function runTest(
|
||||
id: string,
|
||||
name: string,
|
||||
fn: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
const start = Date.now();
|
||||
try {
|
||||
await fn();
|
||||
const dur = Date.now() - start;
|
||||
results.push({ id, name, status: 'pass', duration: dur });
|
||||
console.log(` ✅ ${id} ${name} (${dur}ms)`);
|
||||
} catch (error: any) {
|
||||
const dur = Date.now() - start;
|
||||
if (error.message?.startsWith('SKIP:')) {
|
||||
results.push({ id, name, status: 'skip', duration: dur, message: error.message });
|
||||
console.log(` ⏭️ ${id} ${name} — ${error.message}`);
|
||||
} else {
|
||||
results.push({ id, name, status: 'fail', duration: dur, message: error.message });
|
||||
console.log(` ❌ ${id} ${name} — ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function assert(condition: boolean, message: string): void {
|
||||
if (!condition) throw new Error(message);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T1: 数据库迁移验证
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t1_dbMigration() {
|
||||
// 检查 ssa_sessions 表有 execution_mode 列
|
||||
const colCheck = await prisma.$queryRaw<any[]>`
|
||||
SELECT column_name, column_default
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = 'ssa_schema'
|
||||
AND table_name = 'ssa_sessions'
|
||||
AND column_name = 'execution_mode'
|
||||
`;
|
||||
assert(colCheck.length === 1, 'execution_mode 列不存在于 ssa_sessions 表');
|
||||
assert(
|
||||
colCheck[0].column_default?.includes('qper'),
|
||||
`execution_mode 默认值应为 qper,实际为 ${colCheck[0].column_default}`,
|
||||
);
|
||||
|
||||
// 检查 ssa_agent_executions 表存在
|
||||
const tableCheck = await prisma.$queryRaw<any[]>`
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'ssa_schema'
|
||||
AND table_name = 'ssa_agent_executions'
|
||||
`;
|
||||
assert(tableCheck.length === 1, 'ssa_agent_executions 表不存在');
|
||||
|
||||
// 检查索引
|
||||
const idxCheck = await prisma.$queryRaw<any[]>`
|
||||
SELECT indexname
|
||||
FROM pg_indexes
|
||||
WHERE schemaname = 'ssa_schema'
|
||||
AND tablename = 'ssa_agent_executions'
|
||||
AND indexname = 'idx_ssa_agent_exec_session'
|
||||
`;
|
||||
assert(idxCheck.length === 1, 'idx_ssa_agent_exec_session 索引不存在');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T2: Session execution mode CRUD
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t2_executionModeSwitch() {
|
||||
// 查找一个现有 session
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active' },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无可用 session,请先创建一个 SSA 会话');
|
||||
|
||||
// 读取当前 mode(应为默认 qper)
|
||||
const current = await prisma.$queryRaw<any[]>`
|
||||
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
|
||||
`;
|
||||
assert(current.length === 1, 'Session 不存在');
|
||||
const originalMode = current[0].execution_mode;
|
||||
console.log(` 当前 mode: ${originalMode}`);
|
||||
|
||||
// 切换到 agent
|
||||
await prisma.$executeRaw`
|
||||
UPDATE ssa_schema.ssa_sessions SET execution_mode = 'agent' WHERE id = ${session.id}
|
||||
`;
|
||||
|
||||
const after = await prisma.$queryRaw<any[]>`
|
||||
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
|
||||
`;
|
||||
assert(after[0].execution_mode === 'agent', '切换到 agent 失败');
|
||||
|
||||
// 切回 qper
|
||||
await prisma.$executeRaw`
|
||||
UPDATE ssa_schema.ssa_sessions SET execution_mode = 'qper' WHERE id = ${session.id}
|
||||
`;
|
||||
|
||||
const restored = await prisma.$queryRaw<any[]>`
|
||||
SELECT execution_mode FROM ssa_schema.ssa_sessions WHERE id = ${session.id}
|
||||
`;
|
||||
assert(restored[0].execution_mode === 'qper', '切回 qper 失败');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T3: R Docker /execute-code 端点
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t3_rExecuteCode() {
|
||||
// 先检查 R 服务是否可用
|
||||
try {
|
||||
await axios.get(`${R_SERVICE_URL}/health`, { timeout: 5000 });
|
||||
} catch {
|
||||
throw new Error('SKIP: R Docker 服务不可用');
|
||||
}
|
||||
|
||||
// 执行一段简单的 R 代码
|
||||
const response = await axios.post(`${R_SERVICE_URL}/api/v1/execute-code`, {
|
||||
code: `
|
||||
blocks <- list()
|
||||
blocks[[1]] <- make_markdown_block("## 测试结果\\n双通道 E2E 测试通过", title = "测试")
|
||||
blocks[[2]] <- make_kv_block(items = list("状态" = "成功", "时间" = as.character(Sys.time())), title = "概况")
|
||||
list(status = "success", report_blocks = blocks)
|
||||
`,
|
||||
session_id: 'e2e-test',
|
||||
timeout: 30,
|
||||
}, { timeout: 35000 });
|
||||
|
||||
assert(response.data?.status === 'success', `R 执行状态不是 success: ${response.data?.status}`);
|
||||
assert(
|
||||
Array.isArray(response.data?.result?.report_blocks),
|
||||
'report_blocks 不是数组',
|
||||
);
|
||||
assert(
|
||||
response.data.result.report_blocks.length === 2,
|
||||
`预期 2 个 block,实际 ${response.data.result.report_blocks.length}`,
|
||||
);
|
||||
|
||||
const markdownBlock = response.data.result.report_blocks[0];
|
||||
assert(markdownBlock.type === 'markdown', `Block 0 类型应为 markdown,实际 ${markdownBlock.type}`);
|
||||
|
||||
console.log(` R 执行耗时: ${response.data.duration_ms}ms, blocks: ${response.data.result.report_blocks.length}`);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T4: R Docker 错误处理
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t4_rExecuteCodeError() {
|
||||
try {
|
||||
await axios.get(`${R_SERVICE_URL}/health`, { timeout: 5000 });
|
||||
} catch {
|
||||
throw new Error('SKIP: R Docker 服务不可用');
|
||||
}
|
||||
|
||||
const response = await axios.post(`${R_SERVICE_URL}/api/v1/execute-code`, {
|
||||
code: 'stop("这是一个故意的错误")',
|
||||
session_id: 'e2e-test-error',
|
||||
timeout: 10,
|
||||
}, { timeout: 15000 });
|
||||
|
||||
assert(response.data?.status === 'error', '错误代码应返回 error 状态');
|
||||
assert(
|
||||
response.data?.message?.includes('故意的错误'),
|
||||
`错误消息应包含原始错误: ${response.data?.message}`,
|
||||
);
|
||||
|
||||
console.log(` 错误捕获正确: "${response.data.message}"`);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T5: AgentPlannerService 单元测试
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t5_agentPlanner() {
|
||||
const { agentPlannerService } = await import('../src/modules/ssa/services/AgentPlannerService.js');
|
||||
|
||||
// 查找有数据的 session
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active', dataOssKey: { not: null } },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无有数据的 session');
|
||||
|
||||
const plan = await agentPlannerService.generatePlan(
|
||||
session.id,
|
||||
'帮我做一个基线特征表,比较两组的差异',
|
||||
[],
|
||||
);
|
||||
|
||||
assert(!!plan.title, '计划标题为空');
|
||||
assert(plan.steps.length > 0, '计划步骤为空');
|
||||
assert(!!plan.rawText, 'rawText 为空');
|
||||
|
||||
console.log(` 计划标题: ${plan.title}`);
|
||||
console.log(` 设计类型: ${plan.designType}`);
|
||||
console.log(` 步骤数: ${plan.steps.length}`);
|
||||
plan.steps.forEach(s => console.log(` ${s.order}. ${s.method}: ${s.description}`));
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T6: AgentCoderService 单元测试
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t6_agentCoder() {
|
||||
const { agentCoderService } = await import('../src/modules/ssa/services/AgentCoderService.js');
|
||||
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active', dataOssKey: { not: null } },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无有数据的 session');
|
||||
|
||||
const mockPlan = {
|
||||
title: '基线特征表分析',
|
||||
designType: '横断面研究',
|
||||
variables: {
|
||||
outcome: [],
|
||||
predictors: [],
|
||||
grouping: 'group',
|
||||
confounders: [],
|
||||
},
|
||||
steps: [
|
||||
{ order: 1, method: '描述性统计', description: '生成基线特征表', rationale: '了解数据分布' },
|
||||
],
|
||||
assumptions: [],
|
||||
rawText: '基线特征表分析计划',
|
||||
};
|
||||
|
||||
const generated = await agentCoderService.generateCode(session.id, mockPlan);
|
||||
|
||||
assert(generated.code.length > 50, `生成代码太短: ${generated.code.length} chars`);
|
||||
assert(generated.code.includes('load_input_data') || generated.code.includes('df'), '代码中未包含数据加载');
|
||||
|
||||
console.log(` 代码长度: ${generated.code.length} chars`);
|
||||
console.log(` 依赖包: ${generated.requiredPackages.join(', ') || '(无额外依赖)'}`);
|
||||
console.log(` 代码前 100 字符: ${generated.code.slice(0, 100).replace(/\n/g, ' ')}...`);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T7: AgentReviewerService 单元测试
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t7_agentReviewer() {
|
||||
const { agentReviewerService } = await import('../src/modules/ssa/services/AgentReviewerService.js');
|
||||
|
||||
const safePlan = {
|
||||
title: '基线分析',
|
||||
designType: '队列研究',
|
||||
variables: {
|
||||
outcome: ['death'],
|
||||
predictors: ['age', 'sex'],
|
||||
grouping: 'treatment',
|
||||
confounders: [],
|
||||
},
|
||||
steps: [{ order: 1, method: '基线特征表', description: '描述统计', rationale: '基线比较' }],
|
||||
assumptions: [],
|
||||
rawText: '',
|
||||
};
|
||||
|
||||
const safeCode = `
|
||||
df <- load_input_data(input)
|
||||
library(gtsummary)
|
||||
tbl <- df %>%
|
||||
tbl_summary(by = treatment, include = c(age, sex, death)) %>%
|
||||
add_p()
|
||||
blocks <- list()
|
||||
blocks[[1]] <- make_markdown_block("## 基线特征表")
|
||||
list(status = "success", report_blocks = blocks)
|
||||
`;
|
||||
|
||||
const review = await agentReviewerService.review(safePlan, safeCode);
|
||||
|
||||
assert(typeof review.passed === 'boolean', 'passed 应为 boolean');
|
||||
assert(typeof review.score === 'number', 'score 应为 number');
|
||||
assert(Array.isArray(review.comments), 'comments 应为 array');
|
||||
|
||||
console.log(` 审核通过: ${review.passed}`);
|
||||
console.log(` 评分: ${review.score}/100`);
|
||||
console.log(` 问题数: ${review.issues.length}`);
|
||||
review.comments.forEach(c => console.log(` - ${c}`));
|
||||
|
||||
// 测试危险代码审核
|
||||
const dangerCode = `
|
||||
install.packages("hacker_pkg")
|
||||
system("rm -rf /")
|
||||
df <- load_input_data(input)
|
||||
list(status = "success", report_blocks = list())
|
||||
`;
|
||||
|
||||
const dangerReview = await agentReviewerService.review(safePlan, dangerCode);
|
||||
console.log(` 危险代码审核通过: ${dangerReview.passed} (预期 false)`);
|
||||
console.log(` 危险代码问题数: ${dangerReview.issues.length}`);
|
||||
|
||||
if (dangerReview.passed) {
|
||||
console.log(' ⚠️ 警告: 危险代码未被拦截,Prompt 需要加强');
|
||||
}
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// T8: Agent Execution 记录 CRUD
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function t8_agentExecutionCrud() {
|
||||
const session = await prisma.ssaSession.findFirst({
|
||||
where: { status: 'active' },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
});
|
||||
|
||||
if (!session) throw new Error('SKIP: 无可用 session');
|
||||
|
||||
// 创建
|
||||
const exec = await (prisma as any).ssaAgentExecution.create({
|
||||
data: {
|
||||
sessionId: session.id,
|
||||
query: 'E2E 测试查询',
|
||||
status: 'pending',
|
||||
},
|
||||
});
|
||||
assert(!!exec.id, '创建执行记录失败');
|
||||
console.log(` 创建记录: ${exec.id}`);
|
||||
|
||||
// 更新
|
||||
await (prisma as any).ssaAgentExecution.update({
|
||||
where: { id: exec.id },
|
||||
data: {
|
||||
status: 'completed',
|
||||
planText: '测试计划',
|
||||
generatedCode: 'print("hello")',
|
||||
durationMs: 1234,
|
||||
},
|
||||
});
|
||||
|
||||
const updated = await (prisma as any).ssaAgentExecution.findUnique({
|
||||
where: { id: exec.id },
|
||||
});
|
||||
assert(updated.status === 'completed', `状态应为 completed,实际 ${updated.status}`);
|
||||
assert(updated.durationMs === 1234, `耗时应为 1234,实际 ${updated.durationMs}`);
|
||||
|
||||
// 删除(清理)
|
||||
await (prisma as any).ssaAgentExecution.delete({ where: { id: exec.id } });
|
||||
console.log(' CRUD 全流程通过');
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// Main
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
async function main() {
|
||||
console.log('\n╔══════════════════════════════════════════════╗');
|
||||
console.log('║ SSA 双通道架构 E2E 测试 (Phase 1~3) ║');
|
||||
console.log('╚══════════════════════════════════════════════╝\n');
|
||||
|
||||
console.log('📦 Phase 1: 基础设施');
|
||||
await runTest('T1', '数据库迁移验证(execution_mode + agent_executions)', t1_dbMigration);
|
||||
await runTest('T2', 'Session execution mode 切换', t2_executionModeSwitch);
|
||||
await runTest('T3', 'R Docker /execute-code 正常执行', t3_rExecuteCode);
|
||||
await runTest('T4', 'R Docker /execute-code 错误处理', t4_rExecuteCodeError);
|
||||
|
||||
console.log('\n🤖 Phase 2: Agent 服务');
|
||||
await runTest('T5', 'AgentPlannerService 规划能力', t5_agentPlanner);
|
||||
await runTest('T6', 'AgentCoderService R 代码生成', t6_agentCoder);
|
||||
await runTest('T7', 'AgentReviewerService 审核能力', t7_agentReviewer);
|
||||
await runTest('T8', 'Agent Execution 记录 CRUD', t8_agentExecutionCrud);
|
||||
|
||||
// 汇总
|
||||
console.log('\n' + '═'.repeat(50));
|
||||
const passed = results.filter(r => r.status === 'pass').length;
|
||||
const failed = results.filter(r => r.status === 'fail').length;
|
||||
const skipped = results.filter(r => r.status === 'skip').length;
|
||||
const totalMs = results.reduce((s, r) => s + r.duration, 0);
|
||||
|
||||
console.log(`\n📊 测试结果: ${passed} 通过 / ${failed} 失败 / ${skipped} 跳过 (总耗时 ${totalMs}ms)`);
|
||||
|
||||
if (failed > 0) {
|
||||
console.log('\n❌ 失败详情:');
|
||||
results.filter(r => r.status === 'fail').forEach(r => {
|
||||
console.log(` ${r.id} ${r.name}: ${r.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
if (skipped > 0) {
|
||||
console.log('\n⏭️ 跳过详情:');
|
||||
results.filter(r => r.status === 'skip').forEach(r => {
|
||||
console.log(` ${r.id} ${r.name}: ${r.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log('\n' + (failed === 0 ? '🎉 所有测试通过!' : '⚠️ 有测试失败,请检查。'));
|
||||
|
||||
await prisma.$disconnect();
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
main().catch(async (err) => {
|
||||
console.error('测试执行异常:', err);
|
||||
await prisma.$disconnect();
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user