feat(ssa): Complete QPER architecture - Query, Planner, Execute, Reflection layers
Implement the full QPER intelligent analysis pipeline: - Phase E+: Block-based standardization for all 7 R tools, DynamicReport renderer, Word export enhancement - Phase Q: LLM intent parsing with dynamic Zod validation against real column names, ClarificationCard component, DataProfile is_id_like tagging - Phase P: ConfigLoader with Zod schema validation and hot-reload API, DecisionTableService (4-dimension matching), FlowTemplateService with EPV protection, PlannedTrace audit output - Phase R: ReflectionService with statistical slot injection, sensitivity analysis conflict rules, ConclusionReport with section reveal animation, conclusion caching API, graceful R error classification End-to-end test: 40/40 passed across two complete analysis scenarios. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
494
backend/scripts/test-ssa-phase-q-e2e.ts
Normal file
494
backend/scripts/test-ssa-phase-q-e2e.ts
Normal file
@@ -0,0 +1,494 @@
|
||||
/**
|
||||
* SSA Phase Q — 端到端集成测试
|
||||
*
|
||||
* 完整链路:登录 → 创建会话+上传文件 → 数据画像 → LLM 意图解析 → 追问 → Q→P 规划
|
||||
*
|
||||
* 依赖:Node.js 后端 + PostgreSQL + Python extraction_service + LLM 服务
|
||||
* 运行方式:npx tsx scripts/test-ssa-phase-q-e2e.ts
|
||||
*
|
||||
* 测试数据:docs/03-业务模块/SSA-智能统计分析/05-测试文档/test.csv
|
||||
* 测试用户:13800000001 / 123456
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'fs';
|
||||
import { join, dirname } from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
const BASE_URL = 'http://localhost:3000';
|
||||
const TEST_PHONE = '13800000001';
|
||||
const TEST_PASSWORD = '123456';
|
||||
const TEST_CSV_PATH = join(__dirname, '../../docs/03-业务模块/SSA-智能统计分析/05-测试文档/test.csv');
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 工具函数
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
let passed = 0;
|
||||
let failed = 0;
|
||||
let skipped = 0;
|
||||
let token = '';
|
||||
let sessionId = '';
|
||||
|
||||
function assert(condition: boolean, testName: string, detail?: string) {
|
||||
if (condition) {
|
||||
console.log(` ✅ ${testName}`);
|
||||
passed++;
|
||||
} else {
|
||||
console.log(` ❌ ${testName}${detail ? ` — ${detail}` : ''}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
function skip(testName: string, reason: string) {
|
||||
console.log(` ⏭️ ${testName} — 跳过:${reason}`);
|
||||
skipped++;
|
||||
}
|
||||
|
||||
function section(title: string) {
|
||||
console.log(`\n${'─'.repeat(60)}`);
|
||||
console.log(`📋 ${title}`);
|
||||
console.log('─'.repeat(60));
|
||||
}
|
||||
|
||||
function authHeaders(contentType?: string): Record<string, string> {
|
||||
const headers: Record<string, string> = {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
};
|
||||
if (contentType) {
|
||||
headers['Content-Type'] = contentType;
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
async function apiPost(path: string, body: any, headers?: Record<string, string>): Promise<any> {
|
||||
const res = await fetch(`${BASE_URL}${path}`, {
|
||||
method: 'POST',
|
||||
headers: headers || authHeaders('application/json'),
|
||||
body: typeof body === 'string' ? body : JSON.stringify(body),
|
||||
});
|
||||
const text = await res.text();
|
||||
try {
|
||||
return { status: res.status, data: JSON.parse(text) };
|
||||
} catch {
|
||||
return { status: res.status, data: text };
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 测试 1: 登录获取 Token
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async function testLogin() {
|
||||
section('测试 1: 登录认证');
|
||||
|
||||
try {
|
||||
const res = await apiPost('/api/v1/auth/login/password', {
|
||||
phone: TEST_PHONE,
|
||||
password: TEST_PASSWORD,
|
||||
}, { 'Content-Type': 'application/json' });
|
||||
|
||||
assert(res.status === 200, `登录返回 200(实际 ${res.status})`, JSON.stringify(res.data).substring(0, 200));
|
||||
|
||||
if (res.status === 200 && res.data) {
|
||||
token = res.data?.data?.tokens?.accessToken || res.data?.accessToken || res.data?.token || '';
|
||||
assert(token.length > 0, '获取到 JWT Token', `token 长度: ${token.length}`);
|
||||
|
||||
if (res.data?.data?.user) {
|
||||
const user = res.data.data.user;
|
||||
console.log(` 用户信息: ${user.name || user.phone || 'N/A'}, 角色: ${user.role}`);
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
assert(false, '登录请求失败', e.message);
|
||||
}
|
||||
|
||||
if (!token) {
|
||||
console.log('\n ⚠️ Token 获取失败,后续测试无法继续');
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 测试 2: 创建会话 + 上传 test.csv
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async function testCreateSessionWithUpload() {
|
||||
section('测试 2: 创建会话 + 上传 test.csv');
|
||||
|
||||
try {
|
||||
const csvBuffer = readFileSync(TEST_CSV_PATH);
|
||||
assert(csvBuffer.length > 0, `test.csv 文件读取成功(${csvBuffer.length} bytes)`);
|
||||
|
||||
// 构建 multipart/form-data
|
||||
const formData = new FormData();
|
||||
const blob = new Blob([csvBuffer], { type: 'text/csv' });
|
||||
formData.append('file', blob, 'test.csv');
|
||||
|
||||
const res = await fetch(`${BASE_URL}/api/v1/ssa/sessions/`, {
|
||||
method: 'POST',
|
||||
headers: { 'Authorization': `Bearer ${token}` },
|
||||
body: formData,
|
||||
});
|
||||
|
||||
const data = await res.json();
|
||||
assert(res.status === 200, `创建会话返回 200(实际 ${res.status})`, JSON.stringify(data).substring(0, 300));
|
||||
|
||||
if (data.sessionId) {
|
||||
sessionId = data.sessionId;
|
||||
assert(true, `会话 ID: ${sessionId}`);
|
||||
} else {
|
||||
assert(false, '未返回 sessionId', JSON.stringify(data).substring(0, 200));
|
||||
}
|
||||
|
||||
if (data.schema) {
|
||||
const schema = data.schema;
|
||||
assert(schema.columns?.length > 0, `数据 Schema 解析成功(${schema.columns?.length} 列)`);
|
||||
assert(schema.rowCount > 0, `行数: ${schema.rowCount}`);
|
||||
console.log(` 列名: ${schema.columns?.slice(0, 8).map((c: any) => c.name).join(', ')}...`);
|
||||
}
|
||||
} catch (e: any) {
|
||||
assert(false, '创建会话失败', e.message);
|
||||
}
|
||||
|
||||
if (!sessionId) {
|
||||
console.log('\n ⚠️ SessionId 获取失败,后续测试无法继续');
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 测试 3: 数据画像(Python DataProfiler)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async function testDataProfile() {
|
||||
section('测试 3: 数据画像(Python DataProfiler)');
|
||||
|
||||
try {
|
||||
const res = await apiPost('/api/v1/ssa/workflow/profile', { sessionId });
|
||||
|
||||
assert(res.status === 200, `画像请求返回 200(实际 ${res.status})`);
|
||||
|
||||
if (res.data?.success) {
|
||||
const profile = res.data.profile;
|
||||
assert(!!profile, '画像数据非空');
|
||||
|
||||
if (profile) {
|
||||
assert(profile.row_count > 0 || profile.totalRows > 0,
|
||||
`行数: ${profile.row_count || profile.totalRows}`);
|
||||
assert(profile.column_count > 0 || profile.totalColumns > 0,
|
||||
`列数: ${profile.column_count || profile.totalColumns}`);
|
||||
|
||||
const cols = profile.columns || [];
|
||||
if (cols.length > 0) {
|
||||
console.log(` 前 5 列类型:`);
|
||||
cols.slice(0, 5).forEach((c: any) => {
|
||||
console.log(` ${c.name || c.column_name}: ${c.type || c.dtype} (missing: ${c.missing_ratio ?? c.missingPercent ?? 'N/A'})`);
|
||||
});
|
||||
|
||||
// 检查 is_id_like 标记(Phase Q 防御性优化)
|
||||
const idLikeCols = cols.filter((c: any) => c.is_id_like === true);
|
||||
if (idLikeCols.length > 0) {
|
||||
assert(true, `检测到 ${idLikeCols.length} 个 ID-like 列: ${idLikeCols.map((c: any) => c.name || c.column_name).join(', ')}`);
|
||||
} else {
|
||||
console.log(' ℹ️ 未检测到 ID-like 列(test.csv 无 ID 列,符合预期)');
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert(false, '画像生成失败', res.data?.error || JSON.stringify(res.data).substring(0, 200));
|
||||
}
|
||||
} catch (e: any) {
|
||||
assert(false, '画像请求异常', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 测试 4: LLM 意图解析(Phase Q 核心)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async function testIntentParsing() {
|
||||
section('测试 4: LLM 意图理解(Phase Q 核心)');
|
||||
|
||||
const testQueries = [
|
||||
{
|
||||
name: '场景 A — 明确的差异比较',
|
||||
query: '比较 sex 不同组的 Yqol 有没有差别',
|
||||
expectGoal: 'comparison',
|
||||
expectHighConfidence: true,
|
||||
},
|
||||
{
|
||||
name: '场景 B — 相关分析',
|
||||
query: '分析 age 和 bmi 的相关性',
|
||||
expectGoal: 'correlation',
|
||||
expectHighConfidence: true,
|
||||
},
|
||||
{
|
||||
name: '场景 C — 回归分析',
|
||||
query: 'age、smoke、bmi 对 Yqol 的影响,做个多因素分析',
|
||||
expectGoal: 'regression',
|
||||
expectHighConfidence: true,
|
||||
},
|
||||
{
|
||||
name: '场景 D — 模糊意图(应触发追问)',
|
||||
query: '帮我分析一下这个数据',
|
||||
expectGoal: null, // 不确定
|
||||
expectHighConfidence: false,
|
||||
},
|
||||
{
|
||||
name: '场景 E — 描述统计',
|
||||
query: '描述一下数据的基本情况',
|
||||
expectGoal: 'descriptive',
|
||||
expectHighConfidence: true,
|
||||
},
|
||||
];
|
||||
|
||||
for (const tc of testQueries) {
|
||||
console.log(`\n 🔬 ${tc.name}`);
|
||||
console.log(` Query: "${tc.query}"`);
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
const res = await apiPost('/api/v1/ssa/workflow/intent', {
|
||||
sessionId,
|
||||
userQuery: tc.query,
|
||||
});
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
assert(res.status === 200, ` 返回 200(实际 ${res.status})`, res.data?.error);
|
||||
|
||||
if (res.data?.success) {
|
||||
const intent = res.data.intent;
|
||||
console.log(` 耗时: ${elapsed}ms`);
|
||||
console.log(` Goal: ${intent?.goal}, Confidence: ${intent?.confidence}`);
|
||||
console.log(` Y: ${intent?.outcome_var || 'null'}, X: ${JSON.stringify(intent?.predictor_vars || [])}`);
|
||||
console.log(` Design: ${intent?.design}, needsClarification: ${intent?.needsClarification}`);
|
||||
|
||||
if (intent) {
|
||||
// 检查 goal 是否符合预期
|
||||
if (tc.expectGoal) {
|
||||
assert(intent.goal === tc.expectGoal,
|
||||
` Goal = ${tc.expectGoal}(实际 ${intent.goal})`);
|
||||
}
|
||||
|
||||
// 检查置信度
|
||||
if (tc.expectHighConfidence) {
|
||||
assert(intent.confidence >= 0.7,
|
||||
` 高置信度 >= 0.7(实际 ${intent.confidence})`);
|
||||
assert(!intent.needsClarification,
|
||||
` 无需追问(实际 needsClarification=${intent.needsClarification})`);
|
||||
} else {
|
||||
// 模糊意图应该低置信度或触发追问
|
||||
const isLowConfOrClarify = intent.confidence < 0.7 || intent.needsClarification;
|
||||
assert(isLowConfOrClarify,
|
||||
` 低置信度或需追问(confidence=${intent.confidence}, needsClarification=${intent.needsClarification})`);
|
||||
}
|
||||
|
||||
// 检查变量名是否来自真实数据(防幻觉校验)
|
||||
const realColumns = ['sex', 'smoke', 'age', 'bmi', 'mouth_open', 'bucal_relax',
|
||||
'toot_morph', 'root_number', 'root_curve', 'lenspace', 'denseratio',
|
||||
'Pglevel', 'Pgverti', 'Winter', 'presyp', 'flap', 'operation',
|
||||
'time', 'surgage', 'Yqol', 'times'];
|
||||
const realColumnsLower = realColumns.map(c => c.toLowerCase());
|
||||
|
||||
if (intent.outcome_var) {
|
||||
const isReal = realColumnsLower.includes(intent.outcome_var.toLowerCase());
|
||||
assert(isReal,
|
||||
` Y 变量 "${intent.outcome_var}" 存在于数据中`,
|
||||
`变量 "${intent.outcome_var}" 不在数据列名中(可能是 LLM 幻觉)`);
|
||||
}
|
||||
|
||||
if (intent.predictor_vars?.length > 0) {
|
||||
const allReal = intent.predictor_vars.every(
|
||||
(v: string) => realColumnsLower.includes(v.toLowerCase())
|
||||
);
|
||||
assert(allReal,
|
||||
` X 变量 ${JSON.stringify(intent.predictor_vars)} 全部存在于数据中`,
|
||||
`部分变量可能为 LLM 幻觉`);
|
||||
}
|
||||
|
||||
// 检查追问卡片(模糊意图时)
|
||||
if (intent.needsClarification && res.data.clarificationCards?.length > 0) {
|
||||
const cards = res.data.clarificationCards;
|
||||
console.log(` 追问卡片: ${cards.length} 张`);
|
||||
cards.forEach((card: any, i: number) => {
|
||||
console.log(` 卡片 ${i + 1}: ${card.question}`);
|
||||
card.options?.slice(0, 3).forEach((opt: any) => {
|
||||
console.log(` - ${opt.label}`);
|
||||
});
|
||||
});
|
||||
assert(cards[0].options?.length >= 2, ` 追问卡片有 >= 2 个选项`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert(false, ` Intent 解析失败`, res.data?.error || JSON.stringify(res.data).substring(0, 200));
|
||||
}
|
||||
} catch (e: any) {
|
||||
assert(false, ` ${tc.name} 请求异常`, e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 测试 5: Q→P 全链路(Intent → Plan)
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async function testQtoPPipeline() {
|
||||
section('测试 5: Q→P 全链路(Intent → WorkflowPlan)');
|
||||
|
||||
const testCases = [
|
||||
{
|
||||
name: '差异比较 → T 检验流程',
|
||||
query: '比较 sex 不同组的 Yqol 有没有差别',
|
||||
expectSteps: 2, // 描述统计 + 主分析(至少)
|
||||
expectTool: 'ST_',
|
||||
},
|
||||
{
|
||||
name: '回归分析 → Logistic 流程',
|
||||
query: 'age、smoke、bmi 对 Yqol 的预测作用,做个 Logistic 回归',
|
||||
expectSteps: 2,
|
||||
expectTool: 'ST_LOGISTIC',
|
||||
},
|
||||
];
|
||||
|
||||
for (const tc of testCases) {
|
||||
console.log(`\n 🔬 ${tc.name}`);
|
||||
console.log(` Query: "${tc.query}"`);
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
const res = await apiPost('/api/v1/ssa/workflow/plan', {
|
||||
sessionId,
|
||||
userQuery: tc.query,
|
||||
});
|
||||
const elapsed = Date.now() - startTime;
|
||||
|
||||
assert(res.status === 200, ` 返回 200(实际 ${res.status})`, res.data?.error);
|
||||
|
||||
if (res.data?.success && res.data.plan) {
|
||||
const plan = res.data.plan;
|
||||
console.log(` 耗时: ${elapsed}ms`);
|
||||
console.log(` 标题: ${plan.title}`);
|
||||
console.log(` 步骤数: ${plan.total_steps}`);
|
||||
|
||||
assert(plan.total_steps >= tc.expectSteps,
|
||||
` 步骤数 >= ${tc.expectSteps}(实际 ${plan.total_steps})`);
|
||||
|
||||
// 打印每步信息
|
||||
plan.steps?.forEach((step: any, i: number) => {
|
||||
const sensitivity = step.is_sensitivity ? ' [敏感性分析]' : '';
|
||||
const guardrail = step.switch_condition ? ` 🛡️${step.switch_condition}` : '';
|
||||
console.log(` 步骤 ${i + 1}: ${step.tool_name} (${step.tool_code})${sensitivity}${guardrail}`);
|
||||
});
|
||||
|
||||
// 检查是否包含期望的工具
|
||||
const hasExpectedTool = plan.steps?.some(
|
||||
(s: any) => s.tool_code?.startsWith(tc.expectTool)
|
||||
);
|
||||
assert(hasExpectedTool,
|
||||
` 包含 ${tc.expectTool}* 工具`,
|
||||
`工具列表: ${plan.steps?.map((s: any) => s.tool_code).join(', ')}`);
|
||||
|
||||
// 检查 PlannedTrace
|
||||
if (plan.planned_trace) {
|
||||
const trace = plan.planned_trace;
|
||||
console.log(` PlannedTrace:`);
|
||||
console.log(` Primary: ${trace.primaryTool}`);
|
||||
console.log(` Fallback: ${trace.fallbackTool || 'null'}`);
|
||||
console.log(` SwitchCondition: ${trace.switchCondition || 'null'}`);
|
||||
console.log(` Template: ${trace.templateUsed}`);
|
||||
assert(!!trace.primaryTool, ` PlannedTrace 包含 primaryTool`);
|
||||
assert(!!trace.templateUsed, ` PlannedTrace 包含 templateUsed`);
|
||||
} else {
|
||||
skip('PlannedTrace 检查', '计划中未返回 planned_trace');
|
||||
}
|
||||
|
||||
// EPV 警告检查
|
||||
if (plan.epv_warning) {
|
||||
console.log(` ⚠️ EPV Warning: ${plan.epv_warning}`);
|
||||
}
|
||||
|
||||
// 描述文字检查
|
||||
if (plan.description) {
|
||||
assert(plan.description.length > 10, ` 规划描述非空(${plan.description.length} 字符)`);
|
||||
console.log(` 描述: ${plan.description.substring(0, 100)}...`);
|
||||
}
|
||||
} else {
|
||||
assert(false, ` 规划失败`, res.data?.error || JSON.stringify(res.data).substring(0, 200));
|
||||
}
|
||||
} catch (e: any) {
|
||||
assert(false, ` ${tc.name} 请求异常`, e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────
|
||||
// 运行所有测试
|
||||
// ────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
console.log('\n🧪 SSA Phase Q+P — 端到端集成测试\n');
|
||||
console.log('测试链路:登录 → 上传 CSV → 数据画像 → LLM Intent → Q→P Plan');
|
||||
console.log(`测试用户:${TEST_PHONE}`);
|
||||
console.log(`后端地址:${BASE_URL}`);
|
||||
console.log(`测试文件:${TEST_CSV_PATH}\n`);
|
||||
|
||||
// 前置检查
|
||||
try {
|
||||
readFileSync(TEST_CSV_PATH);
|
||||
} catch {
|
||||
console.error('❌ test.csv 文件不存在,请检查路径');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
try {
|
||||
const healthCheck = await fetch(`${BASE_URL}/health`).catch(() => null);
|
||||
if (!healthCheck || healthCheck.status !== 200) {
|
||||
console.error('❌ 后端服务未启动或不可达');
|
||||
process.exit(1);
|
||||
}
|
||||
console.log('✅ 后端服务可达\n');
|
||||
} catch {
|
||||
console.error('❌ 后端服务未启动或不可达');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// 顺序执行测试
|
||||
const loginOk = await testLogin();
|
||||
if (!loginOk) {
|
||||
console.log('\n⛔ 登录失败,终止测试');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const sessionOk = await testCreateSessionWithUpload();
|
||||
if (!sessionOk) {
|
||||
console.log('\n⛔ 会话创建失败,终止测试');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
await testDataProfile();
|
||||
await testIntentParsing();
|
||||
await testQtoPPipeline();
|
||||
|
||||
// 汇总
|
||||
console.log(`\n${'═'.repeat(60)}`);
|
||||
console.log(`📊 测试结果汇总:${passed} 通过 / ${failed} 失败 / ${skipped} 跳过 / ${passed + failed + skipped} 总计`);
|
||||
if (failed === 0) {
|
||||
console.log('🎉 全部通过!Phase Q+P 端到端验证成功。');
|
||||
} else {
|
||||
console.log(`⚠️ 有 ${failed} 个测试失败,请检查上方输出。`);
|
||||
}
|
||||
console.log(`\n📝 测试会话 ID: ${sessionId}(可在数据库中查询详情)`);
|
||||
console.log('═'.repeat(60));
|
||||
|
||||
process.exit(failed > 0 ? 1 : 0);
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
console.error('💥 测试脚本异常:', e);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user