Files
AIclinicalresearch/backend/scripts/test-ssa-phase-q-e2e.ts
HaHafeng 371e1c069c feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers
Implement the full QPER intelligent analysis pipeline:

- Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement

- Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging

- Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output

- Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification

End-to-end test: 40/40 passed across two complete analysis scenarios.

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 18:15:53 +08:00

495 lines
18 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* SSA Phase Q — 端到端集成测试
*
* 完整链路:登录 → 创建会话+上传文件 → 数据画像 → LLM 意图解析 → 追问 → Q→P 规划
*
* 依赖Node.js 后端 + PostgreSQL + Python extraction_service + LLM 服务
* 运行方式npx tsx scripts/test-ssa-phase-q-e2e.ts
*
* 测试数据docs/03-业务模块/SSA-智能统计分析/05-测试文档/test.csv
* 测试用户13800000001 / 123456
*/
import { readFileSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const BASE_URL = 'http://localhost:3000';
const TEST_PHONE = '13800000001';
const TEST_PASSWORD = '123456';
const TEST_CSV_PATH = join(__dirname, '../../docs/03-业务模块/SSA-智能统计分析/05-测试文档/test.csv');
// ────────────────────────────────────────────
// 工具函数
// ────────────────────────────────────────────
let passed = 0;
let failed = 0;
let skipped = 0;
let token = '';
let sessionId = '';
function assert(condition: boolean, testName: string, detail?: string) {
if (condition) {
console.log(`${testName}`);
passed++;
} else {
console.log(`${testName}${detail ? `${detail}` : ''}`);
failed++;
}
}
function skip(testName: string, reason: string) {
console.log(` ⏭️ ${testName} — 跳过:${reason}`);
skipped++;
}
function section(title: string) {
console.log(`\n${'─'.repeat(60)}`);
console.log(`📋 ${title}`);
console.log('─'.repeat(60));
}
function authHeaders(contentType?: string): Record<string, string> {
const headers: Record<string, string> = {
'Authorization': `Bearer ${token}`,
};
if (contentType) {
headers['Content-Type'] = contentType;
}
return headers;
}
async function apiPost(path: string, body: any, headers?: Record<string, string>): Promise<any> {
const res = await fetch(`${BASE_URL}${path}`, {
method: 'POST',
headers: headers || authHeaders('application/json'),
body: typeof body === 'string' ? body : JSON.stringify(body),
});
const text = await res.text();
try {
return { status: res.status, data: JSON.parse(text) };
} catch {
return { status: res.status, data: text };
}
}
// ────────────────────────────────────────────
// 测试 1: 登录获取 Token
// ────────────────────────────────────────────
async function testLogin() {
section('测试 1: 登录认证');
try {
const res = await apiPost('/api/v1/auth/login/password', {
phone: TEST_PHONE,
password: TEST_PASSWORD,
}, { 'Content-Type': 'application/json' });
assert(res.status === 200, `登录返回 200实际 ${res.status}`, JSON.stringify(res.data).substring(0, 200));
if (res.status === 200 && res.data) {
token = res.data?.data?.tokens?.accessToken || res.data?.accessToken || res.data?.token || '';
assert(token.length > 0, '获取到 JWT Token', `token 长度: ${token.length}`);
if (res.data?.data?.user) {
const user = res.data.data.user;
console.log(` 用户信息: ${user.name || user.phone || 'N/A'}, 角色: ${user.role}`);
}
}
} catch (e: any) {
assert(false, '登录请求失败', e.message);
}
if (!token) {
console.log('\n ⚠️ Token 获取失败,后续测试无法继续');
return false;
}
return true;
}
// ────────────────────────────────────────────
// 测试 2: 创建会话 + 上传 test.csv
// ────────────────────────────────────────────
async function testCreateSessionWithUpload() {
section('测试 2: 创建会话 + 上传 test.csv');
try {
const csvBuffer = readFileSync(TEST_CSV_PATH);
assert(csvBuffer.length > 0, `test.csv 文件读取成功(${csvBuffer.length} bytes`);
// 构建 multipart/form-data
const formData = new FormData();
const blob = new Blob([csvBuffer], { type: 'text/csv' });
formData.append('file', blob, 'test.csv');
const res = await fetch(`${BASE_URL}/api/v1/ssa/sessions/`, {
method: 'POST',
headers: { 'Authorization': `Bearer ${token}` },
body: formData,
});
const data = await res.json();
assert(res.status === 200, `创建会话返回 200实际 ${res.status}`, JSON.stringify(data).substring(0, 300));
if (data.sessionId) {
sessionId = data.sessionId;
assert(true, `会话 ID: ${sessionId}`);
} else {
assert(false, '未返回 sessionId', JSON.stringify(data).substring(0, 200));
}
if (data.schema) {
const schema = data.schema;
assert(schema.columns?.length > 0, `数据 Schema 解析成功(${schema.columns?.length} 列)`);
assert(schema.rowCount > 0, `行数: ${schema.rowCount}`);
console.log(` 列名: ${schema.columns?.slice(0, 8).map((c: any) => c.name).join(', ')}...`);
}
} catch (e: any) {
assert(false, '创建会话失败', e.message);
}
if (!sessionId) {
console.log('\n ⚠️ SessionId 获取失败,后续测试无法继续');
return false;
}
return true;
}
// ────────────────────────────────────────────
// 测试 3: 数据画像Python DataProfiler
// ────────────────────────────────────────────
async function testDataProfile() {
section('测试 3: 数据画像Python DataProfiler');
try {
const res = await apiPost('/api/v1/ssa/workflow/profile', { sessionId });
assert(res.status === 200, `画像请求返回 200实际 ${res.status}`);
if (res.data?.success) {
const profile = res.data.profile;
assert(!!profile, '画像数据非空');
if (profile) {
assert(profile.row_count > 0 || profile.totalRows > 0,
`行数: ${profile.row_count || profile.totalRows}`);
assert(profile.column_count > 0 || profile.totalColumns > 0,
`列数: ${profile.column_count || profile.totalColumns}`);
const cols = profile.columns || [];
if (cols.length > 0) {
console.log(` 前 5 列类型:`);
cols.slice(0, 5).forEach((c: any) => {
console.log(` ${c.name || c.column_name}: ${c.type || c.dtype} (missing: ${c.missing_ratio ?? c.missingPercent ?? 'N/A'})`);
});
// 检查 is_id_like 标记Phase Q 防御性优化)
const idLikeCols = cols.filter((c: any) => c.is_id_like === true);
if (idLikeCols.length > 0) {
assert(true, `检测到 ${idLikeCols.length} 个 ID-like 列: ${idLikeCols.map((c: any) => c.name || c.column_name).join(', ')}`);
} else {
console.log(' 未检测到 ID-like 列test.csv 无 ID 列,符合预期)');
}
}
}
} else {
assert(false, '画像生成失败', res.data?.error || JSON.stringify(res.data).substring(0, 200));
}
} catch (e: any) {
assert(false, '画像请求异常', e.message);
}
}
// ────────────────────────────────────────────
// 测试 4: LLM 意图解析Phase Q 核心)
// ────────────────────────────────────────────
async function testIntentParsing() {
section('测试 4: LLM 意图理解Phase Q 核心)');
const testQueries = [
{
name: '场景 A — 明确的差异比较',
query: '比较 sex 不同组的 Yqol 有没有差别',
expectGoal: 'comparison',
expectHighConfidence: true,
},
{
name: '场景 B — 相关分析',
query: '分析 age 和 bmi 的相关性',
expectGoal: 'correlation',
expectHighConfidence: true,
},
{
name: '场景 C — 回归分析',
query: 'age、smoke、bmi 对 Yqol 的影响,做个多因素分析',
expectGoal: 'regression',
expectHighConfidence: true,
},
{
name: '场景 D — 模糊意图(应触发追问)',
query: '帮我分析一下这个数据',
expectGoal: null, // 不确定
expectHighConfidence: false,
},
{
name: '场景 E — 描述统计',
query: '描述一下数据的基本情况',
expectGoal: 'descriptive',
expectHighConfidence: true,
},
];
for (const tc of testQueries) {
console.log(`\n 🔬 ${tc.name}`);
console.log(` Query: "${tc.query}"`);
try {
const startTime = Date.now();
const res = await apiPost('/api/v1/ssa/workflow/intent', {
sessionId,
userQuery: tc.query,
});
const elapsed = Date.now() - startTime;
assert(res.status === 200, ` 返回 200实际 ${res.status}`, res.data?.error);
if (res.data?.success) {
const intent = res.data.intent;
console.log(` 耗时: ${elapsed}ms`);
console.log(` Goal: ${intent?.goal}, Confidence: ${intent?.confidence}`);
console.log(` Y: ${intent?.outcome_var || 'null'}, X: ${JSON.stringify(intent?.predictor_vars || [])}`);
console.log(` Design: ${intent?.design}, needsClarification: ${intent?.needsClarification}`);
if (intent) {
// 检查 goal 是否符合预期
if (tc.expectGoal) {
assert(intent.goal === tc.expectGoal,
` Goal = ${tc.expectGoal}(实际 ${intent.goal}`);
}
// 检查置信度
if (tc.expectHighConfidence) {
assert(intent.confidence >= 0.7,
` 高置信度 >= 0.7(实际 ${intent.confidence}`);
assert(!intent.needsClarification,
` 无需追问(实际 needsClarification=${intent.needsClarification}`);
} else {
// 模糊意图应该低置信度或触发追问
const isLowConfOrClarify = intent.confidence < 0.7 || intent.needsClarification;
assert(isLowConfOrClarify,
` 低置信度或需追问confidence=${intent.confidence}, needsClarification=${intent.needsClarification}`);
}
// 检查变量名是否来自真实数据(防幻觉校验)
const realColumns = ['sex', 'smoke', 'age', 'bmi', 'mouth_open', 'bucal_relax',
'toot_morph', 'root_number', 'root_curve', 'lenspace', 'denseratio',
'Pglevel', 'Pgverti', 'Winter', 'presyp', 'flap', 'operation',
'time', 'surgage', 'Yqol', 'times'];
const realColumnsLower = realColumns.map(c => c.toLowerCase());
if (intent.outcome_var) {
const isReal = realColumnsLower.includes(intent.outcome_var.toLowerCase());
assert(isReal,
` Y 变量 "${intent.outcome_var}" 存在于数据中`,
`变量 "${intent.outcome_var}" 不在数据列名中(可能是 LLM 幻觉)`);
}
if (intent.predictor_vars?.length > 0) {
const allReal = intent.predictor_vars.every(
(v: string) => realColumnsLower.includes(v.toLowerCase())
);
assert(allReal,
` X 变量 ${JSON.stringify(intent.predictor_vars)} 全部存在于数据中`,
`部分变量可能为 LLM 幻觉`);
}
// 检查追问卡片(模糊意图时)
if (intent.needsClarification && res.data.clarificationCards?.length > 0) {
const cards = res.data.clarificationCards;
console.log(` 追问卡片: ${cards.length}`);
cards.forEach((card: any, i: number) => {
console.log(` 卡片 ${i + 1}: ${card.question}`);
card.options?.slice(0, 3).forEach((opt: any) => {
console.log(` - ${opt.label}`);
});
});
assert(cards[0].options?.length >= 2, ` 追问卡片有 >= 2 个选项`);
}
}
} else {
assert(false, ` Intent 解析失败`, res.data?.error || JSON.stringify(res.data).substring(0, 200));
}
} catch (e: any) {
assert(false, ` ${tc.name} 请求异常`, e.message);
}
}
}
// ────────────────────────────────────────────
// 测试 5: Q→P 全链路Intent → Plan
// ────────────────────────────────────────────
async function testQtoPPipeline() {
section('测试 5: Q→P 全链路Intent → WorkflowPlan');
const testCases = [
{
name: '差异比较 → T 检验流程',
query: '比较 sex 不同组的 Yqol 有没有差别',
expectSteps: 2, // 描述统计 + 主分析(至少)
expectTool: 'ST_',
},
{
name: '回归分析 → Logistic 流程',
query: 'age、smoke、bmi 对 Yqol 的预测作用,做个 Logistic 回归',
expectSteps: 2,
expectTool: 'ST_LOGISTIC',
},
];
for (const tc of testCases) {
console.log(`\n 🔬 ${tc.name}`);
console.log(` Query: "${tc.query}"`);
try {
const startTime = Date.now();
const res = await apiPost('/api/v1/ssa/workflow/plan', {
sessionId,
userQuery: tc.query,
});
const elapsed = Date.now() - startTime;
assert(res.status === 200, ` 返回 200实际 ${res.status}`, res.data?.error);
if (res.data?.success && res.data.plan) {
const plan = res.data.plan;
console.log(` 耗时: ${elapsed}ms`);
console.log(` 标题: ${plan.title}`);
console.log(` 步骤数: ${plan.total_steps}`);
assert(plan.total_steps >= tc.expectSteps,
` 步骤数 >= ${tc.expectSteps}(实际 ${plan.total_steps}`);
// 打印每步信息
plan.steps?.forEach((step: any, i: number) => {
const sensitivity = step.is_sensitivity ? ' [敏感性分析]' : '';
const guardrail = step.switch_condition ? ` 🛡️${step.switch_condition}` : '';
console.log(` 步骤 ${i + 1}: ${step.tool_name} (${step.tool_code})${sensitivity}${guardrail}`);
});
// 检查是否包含期望的工具
const hasExpectedTool = plan.steps?.some(
(s: any) => s.tool_code?.startsWith(tc.expectTool)
);
assert(hasExpectedTool,
` 包含 ${tc.expectTool}* 工具`,
`工具列表: ${plan.steps?.map((s: any) => s.tool_code).join(', ')}`);
// 检查 PlannedTrace
if (plan.planned_trace) {
const trace = plan.planned_trace;
console.log(` PlannedTrace:`);
console.log(` Primary: ${trace.primaryTool}`);
console.log(` Fallback: ${trace.fallbackTool || 'null'}`);
console.log(` SwitchCondition: ${trace.switchCondition || 'null'}`);
console.log(` Template: ${trace.templateUsed}`);
assert(!!trace.primaryTool, ` PlannedTrace 包含 primaryTool`);
assert(!!trace.templateUsed, ` PlannedTrace 包含 templateUsed`);
} else {
skip('PlannedTrace 检查', '计划中未返回 planned_trace');
}
// EPV 警告检查
if (plan.epv_warning) {
console.log(` ⚠️ EPV Warning: ${plan.epv_warning}`);
}
// 描述文字检查
if (plan.description) {
assert(plan.description.length > 10, ` 规划描述非空(${plan.description.length} 字符)`);
console.log(` 描述: ${plan.description.substring(0, 100)}...`);
}
} else {
assert(false, ` 规划失败`, res.data?.error || JSON.stringify(res.data).substring(0, 200));
}
} catch (e: any) {
assert(false, ` ${tc.name} 请求异常`, e.message);
}
}
}
// ────────────────────────────────────────────
// 运行所有测试
// ────────────────────────────────────────────
async function main() {
console.log('\n🧪 SSA Phase Q+P — 端到端集成测试\n');
console.log('测试链路:登录 → 上传 CSV → 数据画像 → LLM Intent → Q→P Plan');
console.log(`测试用户:${TEST_PHONE}`);
console.log(`后端地址:${BASE_URL}`);
console.log(`测试文件:${TEST_CSV_PATH}\n`);
// 前置检查
try {
readFileSync(TEST_CSV_PATH);
} catch {
console.error('❌ test.csv 文件不存在,请检查路径');
process.exit(1);
}
try {
const healthCheck = await fetch(`${BASE_URL}/health`).catch(() => null);
if (!healthCheck || healthCheck.status !== 200) {
console.error('❌ 后端服务未启动或不可达');
process.exit(1);
}
console.log('✅ 后端服务可达\n');
} catch {
console.error('❌ 后端服务未启动或不可达');
process.exit(1);
}
// 顺序执行测试
const loginOk = await testLogin();
if (!loginOk) {
console.log('\n⛔ 登录失败,终止测试');
process.exit(1);
}
const sessionOk = await testCreateSessionWithUpload();
if (!sessionOk) {
console.log('\n⛔ 会话创建失败,终止测试');
process.exit(1);
}
await testDataProfile();
await testIntentParsing();
await testQtoPPipeline();
// 汇总
console.log(`\n${'═'.repeat(60)}`);
console.log(`📊 测试结果汇总:${passed} 通过 / ${failed} 失败 / ${skipped} 跳过 / ${passed + failed + skipped} 总计`);
if (failed === 0) {
console.log('🎉 全部通过Phase Q+P 端到端验证成功。');
} else {
console.log(`⚠️ 有 ${failed} 个测试失败,请检查上方输出。`);
}
console.log(`\n📝 测试会话 ID: ${sessionId}(可在数据库中查询详情)`);
console.log('═'.repeat(60));
process.exit(failed > 0 ? 1 : 0);
}
main().catch(e => {
console.error('💥 测试脚本异常:', e);
process.exit(1);
});