feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development

Phase I - Session Blackboard + READ Layer:
- SessionBlackboardService with Postgres-Only cache
- DataProfileService for data overview generation
- PicoInferenceService for LLM-driven PICO extraction
- Frontend DataContextCard and VariableDictionaryPanel
- E2E tests: 31/31 passed

Phase II - Conversation Layer LLM + Intent Router:
- ConversationService with SSE streaming
- IntentRouterService (rule-first + LLM fallback, 6 intents)
- SystemPromptService with 6-segment dynamic assembly
- TokenTruncationService for context management
- ChatHandlerService as unified chat entry
- Frontend SSAChatPane and useSSAChat hook
- E2E tests: 38/38 passed

Phase III - Method Consultation + AskUser Standardization:
- ToolRegistryService with Repository Pattern
- MethodConsultService with DecisionTable + LLM enhancement
- AskUserService with global interrupt handling
- Frontend AskUserCard component
- E2E tests: 13/13 passed

Phase IV - Dialogue-Driven Analysis + QPER Integration:
- ToolOrchestratorService (plan/execute/report)
- analysis_plan SSE event for WorkflowPlan transmission
- Dual-channel confirmation (ask_user card + workspace button)
- PICO as optional hint for LLM parsing
- E2E tests: 25/25 passed

R Statistics Service:
- 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon
- Enhanced guardrails and block helpers
- Comprehensive test suite (run_all_tools_test.js)

Documentation:
- Updated system status document (v5.9)
- Updated SSA module status and development plan (v1.8)

Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25)

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-22 18:53:39 +08:00
parent bf10dec4c8
commit 3446909ff7
68 changed files with 11583 additions and 412 deletions

View File

@@ -0,0 +1,299 @@
/**
* SSA R 统计引擎 — 全工具端到端测试
*
* 覆盖范围12 个统计工具 + JIT 护栏 + report_blocks 协议验证
*
* 运行方式:
* node r-statistics-service/tests/run_all_tools_test.js
*
* 前置条件R 服务容器已启动docker-compose up -d
*/
const http = require('http');
const fs = require('fs');
const path = require('path');
const R_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
const TIMEOUT = 60000;
// ==================== HTTP ====================
function post(endpoint, body) {
return new Promise((resolve, reject) => {
const url = new URL(endpoint, R_URL);
const payload = JSON.stringify(body);
const req = http.request(
{ hostname: url.hostname, port: url.port, path: url.pathname, method: 'POST',
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
timeout: TIMEOUT },
(res) => {
let data = '';
res.on('data', c => (data += c));
res.on('end', () => {
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
catch { resolve({ status: res.statusCode, body: data }); }
});
}
);
req.on('error', reject);
req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); });
req.write(payload);
req.end();
});
}
function get(endpoint) {
return new Promise((resolve, reject) => {
const url = new URL(endpoint, R_URL);
http.get(url, { timeout: TIMEOUT }, (res) => {
let data = '';
res.on('data', c => (data += c));
res.on('end', () => {
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
catch { resolve({ status: res.statusCode, body: data }); }
});
}).on('error', reject);
});
}
// ==================== 测试数据 ====================
function loadCSV() {
const csvPath = path.join(__dirname, '..', '..', 'docs', '03-业务模块', 'SSA-智能统计分析', '05-测试文档', 'test.csv');
const lines = fs.readFileSync(csvPath, 'utf-8').trim().split('\n');
const headers = lines[0].split(',');
return lines.slice(1).map(line => {
const vals = line.split(',');
const row = {};
headers.forEach((h, i) => {
const v = vals[i];
if (v === '' || v === undefined) row[h] = null;
else if (!isNaN(Number(v))) row[h] = Number(v);
else row[h] = v;
});
return row;
});
}
function loadJSON(name) {
return JSON.parse(fs.readFileSync(path.join(__dirname, name), 'utf-8'));
}
// ==================== 校验 ====================
function validateBlocks(blocks, toolName) {
const issues = [];
if (!Array.isArray(blocks)) { issues.push('report_blocks 不是数组'); return issues; }
if (blocks.length === 0) { issues.push('report_blocks 为空'); return issues; }
const validTypes = ['markdown', 'table', 'image', 'key_value'];
blocks.forEach((b, i) => {
if (!validTypes.includes(b.type)) issues.push(`blocks[${i}].type 非法: ${b.type}`);
if (b.type === 'table') {
if (!Array.isArray(b.headers)) issues.push(`blocks[${i}] table 缺少 headers`);
if (!Array.isArray(b.rows)) issues.push(`blocks[${i}] table 缺少 rows`);
}
if (b.type === 'markdown' && !b.content) issues.push(`blocks[${i}] markdown 缺少 content`);
if (b.type === 'image' && !b.data) issues.push(`blocks[${i}] image 缺少 data`);
if (b.type === 'key_value' && !Array.isArray(b.items)) issues.push(`blocks[${i}] key_value 缺少 items`);
});
return issues;
}
// ==================== 主测试 ====================
async function main() {
console.log('\n╔══════════════════════════════════════════════════════════╗');
console.log('║ SSA R 统计引擎 — 全工具端到端测试 (12 tools + JIT) ║');
console.log('║ ' + new Date().toISOString().slice(0, 19) + ' ║');
console.log('╚══════════════════════════════════════════════════════════╝\n');
// 0. 健康检查
let toolsLoaded = 0;
try {
const h = await get('/health');
toolsLoaded = h.body.tools_loaded || 0;
console.log(`✅ 健康检查通过 version=${h.body.version} tools_loaded=${toolsLoaded} dev_mode=${h.body.dev_mode}\n`);
} catch (e) {
console.log(`❌ R 服务不可用: ${e.message}\n`);
process.exit(1);
}
// 0.1 工具列表
try {
const tl = await get('/api/v1/tools');
console.log(`📋 已注册工具 (${tl.body.count}): ${tl.body.tools.join(', ')}\n`);
} catch { /* skip */ }
const csvData = loadCSV();
const ds = { type: 'inline', data: csvData };
const results = [];
async function run(name, toolCode, body, checks) {
const t0 = Date.now();
try {
const res = await post(`/api/v1/skills/${toolCode}`, body);
const ms = Date.now() - t0;
const d = res.body;
if (d.status === 'success') {
const blockIssues = validateBlocks(d.report_blocks, toolCode);
const extra = checks ? checks(d) : {};
const hasPlots = Array.isArray(d.plots) && d.plots.length > 0;
const hasCode = !!d.reproducible_code;
const blocksOk = blockIssues.length === 0;
const icon = blocksOk ? '✅' : '⚠️';
console.log(`${icon} ${name} (${ms}ms) blocks=${(d.report_blocks||[]).length} plots=${hasPlots?'✓':'✗'} code=${hasCode?'✓':'✗'} ${JSON.stringify(extra)}`);
if (!blocksOk) blockIssues.forEach(iss => console.log(`${iss}`));
results.push({ name, status: 'pass', ms, blocksOk, extra });
} else if (d.status === 'blocked') {
console.log(`🔒 ${name} (${ms}ms) status=blocked message=${d.message}`);
results.push({ name, status: 'blocked', ms });
} else {
console.log(`${name} (${ms}ms) error=${d.error_code||''} ${d.message||''}`);
results.push({ name, status: 'fail', ms, error: d.message });
}
} catch (e) {
console.log(`${name} EXCEPTION: ${e.message}`);
results.push({ name, status: 'error', error: e.message });
}
}
// ========== Phase 2A 工具(原有 7 个) ==========
console.log('─'.repeat(60));
console.log(' Phase 2A 工具(原有 7 个)');
console.log('─'.repeat(60));
await run('ST_DESCRIPTIVE (描述性统计)', 'ST_DESCRIPTIVE', {
data_source: ds,
params: { variables: ['age', 'bmi', 'time'], group_var: 'sex' }
}, d => ({ groups: Object.keys(d.results?.summary || {}).length }));
await run('ST_T_TEST_IND (独立样本T检验)', 'ST_T_TEST_IND', {
data_source: ds,
params: { group_var: 'sex', value_var: 'age' },
guardrails: { check_normality: true }
}, d => ({ t: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_MANN_WHITNEY (Mann-Whitney U)', 'ST_MANN_WHITNEY', {
data_source: ds,
params: { group_var: 'sex', value_var: 'bmi' }
}, d => ({ U: d.results?.statistic_U, p: d.results?.p_value_fmt }));
await run('ST_CHI_SQUARE (卡方检验)', 'ST_CHI_SQUARE', {
data_source: ds,
params: { var1: 'sex', var2: 'smoke' }
}, d => ({ chi2: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_CORRELATION (相关分析)', 'ST_CORRELATION', {
data_source: ds,
params: { var_x: 'age', var_y: 'bmi', method: 'auto' }
}, d => ({ r: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_LOGISTIC_BINARY (Logistic回归)', 'ST_LOGISTIC_BINARY', {
data_source: ds,
params: { outcome_var: 'Yqol', predictors: ['age', 'bmi', 'sex', 'smoke'] }
}, d => ({ aic: d.results?.model_fit?.aic, sig: d.results?.coefficients?.filter(c => c.significant)?.length }));
await run('ST_T_TEST_PAIRED (配对T检验)', 'ST_T_TEST_PAIRED', {
data_source: ds,
params: { before_var: 'mouth_open', after_var: 'bucal_relax' },
guardrails: { check_normality: true }
}, d => ({ p: d.results?.p_value_fmt }));
// ========== Phase Deploy 新工具5 个) ==========
console.log('\n' + '─'.repeat(60));
console.log(' Phase Deploy 新工具5 个)');
console.log('─'.repeat(60));
const fisherData = loadJSON('test_fisher.json');
await run('ST_FISHER (Fisher精确检验)', 'ST_FISHER',
fisherData,
d => ({ p: d.results?.p_value_fmt, or: d.results?.odds_ratio }));
const anovaData = loadJSON('test_anova_one.json');
await run('ST_ANOVA_ONE (单因素方差分析)', 'ST_ANOVA_ONE',
anovaData,
d => ({ stat: d.results?.statistic, p: d.results?.p_value_fmt, method: d.results?.method }));
const wilcoxData = loadJSON('test_wilcoxon.json');
await run('ST_WILCOXON (Wilcoxon符号秩)', 'ST_WILCOXON',
wilcoxData,
d => ({ V: d.results?.statistic, p: d.results?.p_value_fmt, r: d.results?.effect_size?.r }));
const linearData = loadJSON('test_linear_reg.json');
await run('ST_LINEAR_REG (线性回归)', 'ST_LINEAR_REG',
linearData,
d => ({ r2: d.results?.model_fit?.r_squared, f: d.results?.model_fit?.f_statistic }));
const baselineData = loadJSON('test_baseline_table.json');
await run('ST_BASELINE_TABLE (基线特征表)', 'ST_BASELINE_TABLE',
baselineData,
d => ({
sig_vars: d.results?.significant_vars?.length || 0,
methods: d.results?.method_info?.length || 0,
is_baseline: d.report_blocks?.[0]?.metadata?.is_baseline_table
}));
// ========== JIT 护栏 ==========
console.log('\n' + '─'.repeat(60));
console.log(' JIT 护栏检查');
console.log('─'.repeat(60));
const jitTests = [
{ name: 'JIT for ST_T_TEST_IND', code: 'ST_T_TEST_IND', body: { data_source: ds, tool_code: 'ST_T_TEST_IND', params: { group_var: 'sex', value_var: 'age' } } },
{ name: 'JIT for ST_ANOVA_ONE', code: 'ST_ANOVA_ONE', body: { data_source: anovaData.data_source, tool_code: 'ST_ANOVA_ONE', params: anovaData.params } },
{ name: 'JIT for ST_FISHER', code: 'ST_FISHER', body: { data_source: fisherData.data_source, tool_code: 'ST_FISHER', params: fisherData.params } },
{ name: 'JIT for ST_LINEAR_REG', code: 'ST_LINEAR_REG', body: { data_source: linearData.data_source, tool_code: 'ST_LINEAR_REG', params: linearData.params } },
];
for (const jt of jitTests) {
const t0 = Date.now();
try {
const res = await post('/api/v1/guardrails/jit', jt.body);
const ms = Date.now() - t0;
const d = res.body;
if (d.status === 'success') {
console.log(`${jt.name} (${ms}ms) checks=${d.checks?.length} all_passed=${d.all_checks_passed} suggested=${d.suggested_tool || 'none'}`);
results.push({ name: jt.name, status: 'pass', ms });
} else {
console.log(`${jt.name} (${ms}ms) ${d.message || ''}`);
results.push({ name: jt.name, status: 'fail', ms });
}
} catch (e) {
console.log(`${jt.name} EXCEPTION: ${e.message}`);
results.push({ name: jt.name, status: 'error' });
}
}
// ========== 汇总 ==========
console.log('\n' + '═'.repeat(60));
console.log(' 测试汇总');
console.log('═'.repeat(60));
const pass = results.filter(r => r.status === 'pass').length;
const blocked = results.filter(r => r.status === 'blocked').length;
const fail = results.filter(r => r.status === 'fail' || r.status === 'error').length;
const total = results.length;
console.log(` 通过: ${pass}/${total} 阻塞: ${blocked} 失败: ${fail}`);
if (fail > 0) {
console.log('\n 失败项:');
results.filter(r => r.status === 'fail' || r.status === 'error').forEach(r => {
console.log(`${r.name}: ${r.error || 'unknown'}`);
});
}
const avgMs = Math.round(results.filter(r => r.ms).reduce((s, r) => s + r.ms, 0) / results.filter(r => r.ms).length);
console.log(`\n 平均响应时间: ${avgMs}ms`);
console.log('═'.repeat(60));
if (fail === 0) {
console.log('🎉 全部测试通过R 统计引擎 12 工具 + JIT 护栏就绪。\n');
} else {
console.log('⚠️ 存在失败项,请检查 R 服务日志。\n');
}
process.exit(fail > 0 ? 1 : 0);
}
main().catch(e => { console.error('测试脚本异常:', e); process.exit(1); });