feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers

Implement the full QPER intelligent analysis pipeline:

- Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement

- Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging

- Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output

- Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification

End-to-end test: 40/40 passed across two complete analysis scenarios.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-21 18:15:53 +08:00
parent 428a22adf2
commit 371e1c069c
73 changed files with 9242 additions and 706 deletions

View File

@@ -0,0 +1,494 @@
/**
* SSA Phase Q — 端到端集成测试
*
* 完整链路:登录 → 创建会话+上传文件 → 数据画像 → LLM 意图解析 → 追问 → Q→P 规划
*
* 依赖Node.js 后端 + PostgreSQL + Python extraction_service + LLM 服务
* 运行方式npx tsx scripts/test-ssa-phase-q-e2e.ts
*
* 测试数据docs/03-业务模块/SSA-智能统计分析/05-测试文档/test.csv
* 测试用户13800000001 / 123456
*/
import { readFileSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const BASE_URL = 'http://localhost:3000';
const TEST_PHONE = '13800000001';
const TEST_PASSWORD = '123456';
const TEST_CSV_PATH = join(__dirname, '../../docs/03-业务模块/SSA-智能统计分析/05-测试文档/test.csv');
// ────────────────────────────────────────────
// 工具函数
// ────────────────────────────────────────────
let passed = 0;
let failed = 0;
let skipped = 0;
let token = '';
let sessionId = '';
function assert(condition: boolean, testName: string, detail?: string) {
if (condition) {
console.log(`${testName}`);
passed++;
} else {
console.log(`${testName}${detail ? `${detail}` : ''}`);
failed++;
}
}
function skip(testName: string, reason: string) {
console.log(` ⏭️ ${testName} — 跳过:${reason}`);
skipped++;
}
function section(title: string) {
console.log(`\n${'─'.repeat(60)}`);
console.log(`📋 ${title}`);
console.log('─'.repeat(60));
}
function authHeaders(contentType?: string): Record<string, string> {
const headers: Record<string, string> = {
'Authorization': `Bearer ${token}`,
};
if (contentType) {
headers['Content-Type'] = contentType;
}
return headers;
}
async function apiPost(path: string, body: any, headers?: Record<string, string>): Promise<any> {
const res = await fetch(`${BASE_URL}${path}`, {
method: 'POST',
headers: headers || authHeaders('application/json'),
body: typeof body === 'string' ? body : JSON.stringify(body),
});
const text = await res.text();
try {
return { status: res.status, data: JSON.parse(text) };
} catch {
return { status: res.status, data: text };
}
}
// ────────────────────────────────────────────
// 测试 1: 登录获取 Token
// ────────────────────────────────────────────
async function testLogin() {
section('测试 1: 登录认证');
try {
const res = await apiPost('/api/v1/auth/login/password', {
phone: TEST_PHONE,
password: TEST_PASSWORD,
}, { 'Content-Type': 'application/json' });
assert(res.status === 200, `登录返回 200实际 ${res.status}`, JSON.stringify(res.data).substring(0, 200));
if (res.status === 200 && res.data) {
token = res.data?.data?.tokens?.accessToken || res.data?.accessToken || res.data?.token || '';
assert(token.length > 0, '获取到 JWT Token', `token 长度: ${token.length}`);
if (res.data?.data?.user) {
const user = res.data.data.user;
console.log(` 用户信息: ${user.name || user.phone || 'N/A'}, 角色: ${user.role}`);
}
}
} catch (e: any) {
assert(false, '登录请求失败', e.message);
}
if (!token) {
console.log('\n ⚠️ Token 获取失败,后续测试无法继续');
return false;
}
return true;
}
// ────────────────────────────────────────────
// 测试 2: 创建会话 + 上传 test.csv
// ────────────────────────────────────────────
async function testCreateSessionWithUpload() {
section('测试 2: 创建会话 + 上传 test.csv');
try {
const csvBuffer = readFileSync(TEST_CSV_PATH);
assert(csvBuffer.length > 0, `test.csv 文件读取成功(${csvBuffer.length} bytes`);
// 构建 multipart/form-data
const formData = new FormData();
const blob = new Blob([csvBuffer], { type: 'text/csv' });
formData.append('file', blob, 'test.csv');
const res = await fetch(`${BASE_URL}/api/v1/ssa/sessions/`, {
method: 'POST',
headers: { 'Authorization': `Bearer ${token}` },
body: formData,
});
const data = await res.json();
assert(res.status === 200, `创建会话返回 200实际 ${res.status}`, JSON.stringify(data).substring(0, 300));
if (data.sessionId) {
sessionId = data.sessionId;
assert(true, `会话 ID: ${sessionId}`);
} else {
assert(false, '未返回 sessionId', JSON.stringify(data).substring(0, 200));
}
if (data.schema) {
const schema = data.schema;
assert(schema.columns?.length > 0, `数据 Schema 解析成功(${schema.columns?.length} 列)`);
assert(schema.rowCount > 0, `行数: ${schema.rowCount}`);
console.log(` 列名: ${schema.columns?.slice(0, 8).map((c: any) => c.name).join(', ')}...`);
}
} catch (e: any) {
assert(false, '创建会话失败', e.message);
}
if (!sessionId) {
console.log('\n ⚠️ SessionId 获取失败,后续测试无法继续');
return false;
}
return true;
}
// ────────────────────────────────────────────
// 测试 3: 数据画像Python DataProfiler
// ────────────────────────────────────────────
async function testDataProfile() {
section('测试 3: 数据画像Python DataProfiler');
try {
const res = await apiPost('/api/v1/ssa/workflow/profile', { sessionId });
assert(res.status === 200, `画像请求返回 200实际 ${res.status}`);
if (res.data?.success) {
const profile = res.data.profile;
assert(!!profile, '画像数据非空');
if (profile) {
assert(profile.row_count > 0 || profile.totalRows > 0,
`行数: ${profile.row_count || profile.totalRows}`);
assert(profile.column_count > 0 || profile.totalColumns > 0,
`列数: ${profile.column_count || profile.totalColumns}`);
const cols = profile.columns || [];
if (cols.length > 0) {
console.log(` 前 5 列类型:`);
cols.slice(0, 5).forEach((c: any) => {
console.log(` ${c.name || c.column_name}: ${c.type || c.dtype} (missing: ${c.missing_ratio ?? c.missingPercent ?? 'N/A'})`);
});
// 检查 is_id_like 标记Phase Q 防御性优化)
const idLikeCols = cols.filter((c: any) => c.is_id_like === true);
if (idLikeCols.length > 0) {
assert(true, `检测到 ${idLikeCols.length} 个 ID-like 列: ${idLikeCols.map((c: any) => c.name || c.column_name).join(', ')}`);
} else {
console.log(' 未检测到 ID-like 列test.csv 无 ID 列,符合预期)');
}
}
}
} else {
assert(false, '画像生成失败', res.data?.error || JSON.stringify(res.data).substring(0, 200));
}
} catch (e: any) {
assert(false, '画像请求异常', e.message);
}
}
// ────────────────────────────────────────────
// 测试 4: LLM 意图解析Phase Q 核心)
// ────────────────────────────────────────────
async function testIntentParsing() {
section('测试 4: LLM 意图理解Phase Q 核心)');
const testQueries = [
{
name: '场景 A — 明确的差异比较',
query: '比较 sex 不同组的 Yqol 有没有差别',
expectGoal: 'comparison',
expectHighConfidence: true,
},
{
name: '场景 B — 相关分析',
query: '分析 age 和 bmi 的相关性',
expectGoal: 'correlation',
expectHighConfidence: true,
},
{
name: '场景 C — 回归分析',
query: 'age、smoke、bmi 对 Yqol 的影响,做个多因素分析',
expectGoal: 'regression',
expectHighConfidence: true,
},
{
name: '场景 D — 模糊意图(应触发追问)',
query: '帮我分析一下这个数据',
expectGoal: null, // 不确定
expectHighConfidence: false,
},
{
name: '场景 E — 描述统计',
query: '描述一下数据的基本情况',
expectGoal: 'descriptive',
expectHighConfidence: true,
},
];
for (const tc of testQueries) {
console.log(`\n 🔬 ${tc.name}`);
console.log(` Query: "${tc.query}"`);
try {
const startTime = Date.now();
const res = await apiPost('/api/v1/ssa/workflow/intent', {
sessionId,
userQuery: tc.query,
});
const elapsed = Date.now() - startTime;
assert(res.status === 200, ` 返回 200实际 ${res.status}`, res.data?.error);
if (res.data?.success) {
const intent = res.data.intent;
console.log(` 耗时: ${elapsed}ms`);
console.log(` Goal: ${intent?.goal}, Confidence: ${intent?.confidence}`);
console.log(` Y: ${intent?.outcome_var || 'null'}, X: ${JSON.stringify(intent?.predictor_vars || [])}`);
console.log(` Design: ${intent?.design}, needsClarification: ${intent?.needsClarification}`);
if (intent) {
// 检查 goal 是否符合预期
if (tc.expectGoal) {
assert(intent.goal === tc.expectGoal,
` Goal = ${tc.expectGoal}(实际 ${intent.goal}`);
}
// 检查置信度
if (tc.expectHighConfidence) {
assert(intent.confidence >= 0.7,
` 高置信度 >= 0.7(实际 ${intent.confidence}`);
assert(!intent.needsClarification,
` 无需追问(实际 needsClarification=${intent.needsClarification}`);
} else {
// 模糊意图应该低置信度或触发追问
const isLowConfOrClarify = intent.confidence < 0.7 || intent.needsClarification;
assert(isLowConfOrClarify,
` 低置信度或需追问confidence=${intent.confidence}, needsClarification=${intent.needsClarification}`);
}
// 检查变量名是否来自真实数据(防幻觉校验)
const realColumns = ['sex', 'smoke', 'age', 'bmi', 'mouth_open', 'bucal_relax',
'toot_morph', 'root_number', 'root_curve', 'lenspace', 'denseratio',
'Pglevel', 'Pgverti', 'Winter', 'presyp', 'flap', 'operation',
'time', 'surgage', 'Yqol', 'times'];
const realColumnsLower = realColumns.map(c => c.toLowerCase());
if (intent.outcome_var) {
const isReal = realColumnsLower.includes(intent.outcome_var.toLowerCase());
assert(isReal,
` Y 变量 "${intent.outcome_var}" 存在于数据中`,
`变量 "${intent.outcome_var}" 不在数据列名中(可能是 LLM 幻觉)`);
}
if (intent.predictor_vars?.length > 0) {
const allReal = intent.predictor_vars.every(
(v: string) => realColumnsLower.includes(v.toLowerCase())
);
assert(allReal,
` X 变量 ${JSON.stringify(intent.predictor_vars)} 全部存在于数据中`,
`部分变量可能为 LLM 幻觉`);
}
// 检查追问卡片(模糊意图时)
if (intent.needsClarification && res.data.clarificationCards?.length > 0) {
const cards = res.data.clarificationCards;
console.log(` 追问卡片: ${cards.length}`);
cards.forEach((card: any, i: number) => {
console.log(` 卡片 ${i + 1}: ${card.question}`);
card.options?.slice(0, 3).forEach((opt: any) => {
console.log(` - ${opt.label}`);
});
});
assert(cards[0].options?.length >= 2, ` 追问卡片有 >= 2 个选项`);
}
}
} else {
assert(false, ` Intent 解析失败`, res.data?.error || JSON.stringify(res.data).substring(0, 200));
}
} catch (e: any) {
assert(false, ` ${tc.name} 请求异常`, e.message);
}
}
}
// ────────────────────────────────────────────
// 测试 5: Q→P 全链路Intent → Plan
// ────────────────────────────────────────────
async function testQtoPPipeline() {
section('测试 5: Q→P 全链路Intent → WorkflowPlan');
const testCases = [
{
name: '差异比较 → T 检验流程',
query: '比较 sex 不同组的 Yqol 有没有差别',
expectSteps: 2, // 描述统计 + 主分析(至少)
expectTool: 'ST_',
},
{
name: '回归分析 → Logistic 流程',
query: 'age、smoke、bmi 对 Yqol 的预测作用,做个 Logistic 回归',
expectSteps: 2,
expectTool: 'ST_LOGISTIC',
},
];
for (const tc of testCases) {
console.log(`\n 🔬 ${tc.name}`);
console.log(` Query: "${tc.query}"`);
try {
const startTime = Date.now();
const res = await apiPost('/api/v1/ssa/workflow/plan', {
sessionId,
userQuery: tc.query,
});
const elapsed = Date.now() - startTime;
assert(res.status === 200, ` 返回 200实际 ${res.status}`, res.data?.error);
if (res.data?.success && res.data.plan) {
const plan = res.data.plan;
console.log(` 耗时: ${elapsed}ms`);
console.log(` 标题: ${plan.title}`);
console.log(` 步骤数: ${plan.total_steps}`);
assert(plan.total_steps >= tc.expectSteps,
` 步骤数 >= ${tc.expectSteps}(实际 ${plan.total_steps}`);
// 打印每步信息
plan.steps?.forEach((step: any, i: number) => {
const sensitivity = step.is_sensitivity ? ' [敏感性分析]' : '';
const guardrail = step.switch_condition ? ` 🛡️${step.switch_condition}` : '';
console.log(` 步骤 ${i + 1}: ${step.tool_name} (${step.tool_code})${sensitivity}${guardrail}`);
});
// 检查是否包含期望的工具
const hasExpectedTool = plan.steps?.some(
(s: any) => s.tool_code?.startsWith(tc.expectTool)
);
assert(hasExpectedTool,
` 包含 ${tc.expectTool}* 工具`,
`工具列表: ${plan.steps?.map((s: any) => s.tool_code).join(', ')}`);
// 检查 PlannedTrace
if (plan.planned_trace) {
const trace = plan.planned_trace;
console.log(` PlannedTrace:`);
console.log(` Primary: ${trace.primaryTool}`);
console.log(` Fallback: ${trace.fallbackTool || 'null'}`);
console.log(` SwitchCondition: ${trace.switchCondition || 'null'}`);
console.log(` Template: ${trace.templateUsed}`);
assert(!!trace.primaryTool, ` PlannedTrace 包含 primaryTool`);
assert(!!trace.templateUsed, ` PlannedTrace 包含 templateUsed`);
} else {
skip('PlannedTrace 检查', '计划中未返回 planned_trace');
}
// EPV 警告检查
if (plan.epv_warning) {
console.log(` ⚠️ EPV Warning: ${plan.epv_warning}`);
}
// 描述文字检查
if (plan.description) {
assert(plan.description.length > 10, ` 规划描述非空(${plan.description.length} 字符)`);
console.log(` 描述: ${plan.description.substring(0, 100)}...`);
}
} else {
assert(false, ` 规划失败`, res.data?.error || JSON.stringify(res.data).substring(0, 200));
}
} catch (e: any) {
assert(false, ` ${tc.name} 请求异常`, e.message);
}
}
}
// ────────────────────────────────────────────
// 运行所有测试
// ────────────────────────────────────────────
async function main() {
console.log('\n🧪 SSA Phase Q+P — 端到端集成测试\n');
console.log('测试链路:登录 → 上传 CSV → 数据画像 → LLM Intent → Q→P Plan');
console.log(`测试用户:${TEST_PHONE}`);
console.log(`后端地址:${BASE_URL}`);
console.log(`测试文件:${TEST_CSV_PATH}\n`);
// 前置检查
try {
readFileSync(TEST_CSV_PATH);
} catch {
console.error('❌ test.csv 文件不存在,请检查路径');
process.exit(1);
}
try {
const healthCheck = await fetch(`${BASE_URL}/health`).catch(() => null);
if (!healthCheck || healthCheck.status !== 200) {
console.error('❌ 后端服务未启动或不可达');
process.exit(1);
}
console.log('✅ 后端服务可达\n');
} catch {
console.error('❌ 后端服务未启动或不可达');
process.exit(1);
}
// 顺序执行测试
const loginOk = await testLogin();
if (!loginOk) {
console.log('\n⛔ 登录失败,终止测试');
process.exit(1);
}
const sessionOk = await testCreateSessionWithUpload();
if (!sessionOk) {
console.log('\n⛔ 会话创建失败,终止测试');
process.exit(1);
}
await testDataProfile();
await testIntentParsing();
await testQtoPPipeline();
// 汇总
console.log(`\n${'═'.repeat(60)}`);
console.log(`📊 测试结果汇总:${passed} 通过 / ${failed} 失败 / ${skipped} 跳过 / ${passed + failed + skipped} 总计`);
if (failed === 0) {
console.log('🎉 全部通过Phase Q+P 端到端验证成功。');
} else {
console.log(`⚠️ 有 ${failed} 个测试失败,请检查上方输出。`);
}
console.log(`\n📝 测试会话 ID: ${sessionId}(可在数据库中查询详情)`);
console.log('═'.repeat(60));
process.exit(failed > 0 ? 1 : 0);
}
main().catch(e => {
console.error('💥 测试脚本异常:', e);
process.exit(1);
});