feat(ssa): Complete Phase I-IV intelligent dialogue and tool system development

Phase I - Session Blackboard + READ Layer:
- SessionBlackboardService with Postgres-Only cache
- DataProfileService for data overview generation
- PicoInferenceService for LLM-driven PICO extraction
- Frontend DataContextCard and VariableDictionaryPanel
- E2E tests: 31/31 passed

Phase II - Conversation Layer LLM + Intent Router:
- ConversationService with SSE streaming
- IntentRouterService (rule-first + LLM fallback, 6 intents)
- SystemPromptService with 6-segment dynamic assembly
- TokenTruncationService for context management
- ChatHandlerService as unified chat entry
- Frontend SSAChatPane and useSSAChat hook
- E2E tests: 38/38 passed

Phase III - Method Consultation + AskUser Standardization:
- ToolRegistryService with Repository Pattern
- MethodConsultService with DecisionTable + LLM enhancement
- AskUserService with global interrupt handling
- Frontend AskUserCard component
- E2E tests: 13/13 passed

Phase IV - Dialogue-Driven Analysis + QPER Integration:
- ToolOrchestratorService (plan/execute/report)
- analysis_plan SSE event for WorkflowPlan transmission
- Dual-channel confirmation (ask_user card + workspace button)
- PICO as optional hint for LLM parsing
- E2E tests: 25/25 passed

R Statistics Service:
- 5 new R tools: anova_one, baseline_table, fisher, linear_reg, wilcoxon
- Enhanced guardrails and block helpers
- Comprehensive test suite (run_all_tools_test.js)

Documentation:
- Updated system status document (v5.9)
- Updated SSA module status and development plan (v1.8)

Total E2E: 107/107 passed (Phase I: 31, Phase II: 38, Phase III: 13, Phase IV: 25)

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-22 18:53:39 +08:00
parent bf10dec4c8
commit 3446909ff7
68 changed files with 11583 additions and 412 deletions

View File

@@ -0,0 +1,299 @@
/**
* SSA R 统计引擎 — 全工具端到端测试
*
* 覆盖范围12 个统计工具 + JIT 护栏 + report_blocks 协议验证
*
* 运行方式:
* node r-statistics-service/tests/run_all_tools_test.js
*
* 前置条件R 服务容器已启动docker-compose up -d
*/
const http = require('http');
const fs = require('fs');
const path = require('path');
const R_URL = process.env.R_SERVICE_URL || 'http://localhost:8082';
const TIMEOUT = 60000;
// ==================== HTTP ====================
function post(endpoint, body) {
return new Promise((resolve, reject) => {
const url = new URL(endpoint, R_URL);
const payload = JSON.stringify(body);
const req = http.request(
{ hostname: url.hostname, port: url.port, path: url.pathname, method: 'POST',
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
timeout: TIMEOUT },
(res) => {
let data = '';
res.on('data', c => (data += c));
res.on('end', () => {
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
catch { resolve({ status: res.statusCode, body: data }); }
});
}
);
req.on('error', reject);
req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); });
req.write(payload);
req.end();
});
}
function get(endpoint) {
return new Promise((resolve, reject) => {
const url = new URL(endpoint, R_URL);
http.get(url, { timeout: TIMEOUT }, (res) => {
let data = '';
res.on('data', c => (data += c));
res.on('end', () => {
try { resolve({ status: res.statusCode, body: JSON.parse(data) }); }
catch { resolve({ status: res.statusCode, body: data }); }
});
}).on('error', reject);
});
}
// ==================== 测试数据 ====================
function loadCSV() {
const csvPath = path.join(__dirname, '..', '..', 'docs', '03-业务模块', 'SSA-智能统计分析', '05-测试文档', 'test.csv');
const lines = fs.readFileSync(csvPath, 'utf-8').trim().split('\n');
const headers = lines[0].split(',');
return lines.slice(1).map(line => {
const vals = line.split(',');
const row = {};
headers.forEach((h, i) => {
const v = vals[i];
if (v === '' || v === undefined) row[h] = null;
else if (!isNaN(Number(v))) row[h] = Number(v);
else row[h] = v;
});
return row;
});
}
function loadJSON(name) {
return JSON.parse(fs.readFileSync(path.join(__dirname, name), 'utf-8'));
}
// ==================== 校验 ====================
function validateBlocks(blocks, toolName) {
const issues = [];
if (!Array.isArray(blocks)) { issues.push('report_blocks 不是数组'); return issues; }
if (blocks.length === 0) { issues.push('report_blocks 为空'); return issues; }
const validTypes = ['markdown', 'table', 'image', 'key_value'];
blocks.forEach((b, i) => {
if (!validTypes.includes(b.type)) issues.push(`blocks[${i}].type 非法: ${b.type}`);
if (b.type === 'table') {
if (!Array.isArray(b.headers)) issues.push(`blocks[${i}] table 缺少 headers`);
if (!Array.isArray(b.rows)) issues.push(`blocks[${i}] table 缺少 rows`);
}
if (b.type === 'markdown' && !b.content) issues.push(`blocks[${i}] markdown 缺少 content`);
if (b.type === 'image' && !b.data) issues.push(`blocks[${i}] image 缺少 data`);
if (b.type === 'key_value' && !Array.isArray(b.items)) issues.push(`blocks[${i}] key_value 缺少 items`);
});
return issues;
}
// ==================== 主测试 ====================
async function main() {
console.log('\n╔══════════════════════════════════════════════════════════╗');
console.log('║ SSA R 统计引擎 — 全工具端到端测试 (12 tools + JIT) ║');
console.log('║ ' + new Date().toISOString().slice(0, 19) + ' ║');
console.log('╚══════════════════════════════════════════════════════════╝\n');
// 0. 健康检查
let toolsLoaded = 0;
try {
const h = await get('/health');
toolsLoaded = h.body.tools_loaded || 0;
console.log(`✅ 健康检查通过 version=${h.body.version} tools_loaded=${toolsLoaded} dev_mode=${h.body.dev_mode}\n`);
} catch (e) {
console.log(`❌ R 服务不可用: ${e.message}\n`);
process.exit(1);
}
// 0.1 工具列表
try {
const tl = await get('/api/v1/tools');
console.log(`📋 已注册工具 (${tl.body.count}): ${tl.body.tools.join(', ')}\n`);
} catch { /* skip */ }
const csvData = loadCSV();
const ds = { type: 'inline', data: csvData };
const results = [];
async function run(name, toolCode, body, checks) {
const t0 = Date.now();
try {
const res = await post(`/api/v1/skills/${toolCode}`, body);
const ms = Date.now() - t0;
const d = res.body;
if (d.status === 'success') {
const blockIssues = validateBlocks(d.report_blocks, toolCode);
const extra = checks ? checks(d) : {};
const hasPlots = Array.isArray(d.plots) && d.plots.length > 0;
const hasCode = !!d.reproducible_code;
const blocksOk = blockIssues.length === 0;
const icon = blocksOk ? '✅' : '⚠️';
console.log(`${icon} ${name} (${ms}ms) blocks=${(d.report_blocks||[]).length} plots=${hasPlots?'✓':'✗'} code=${hasCode?'✓':'✗'} ${JSON.stringify(extra)}`);
if (!blocksOk) blockIssues.forEach(iss => console.log(`${iss}`));
results.push({ name, status: 'pass', ms, blocksOk, extra });
} else if (d.status === 'blocked') {
console.log(`🔒 ${name} (${ms}ms) status=blocked message=${d.message}`);
results.push({ name, status: 'blocked', ms });
} else {
console.log(`${name} (${ms}ms) error=${d.error_code||''} ${d.message||''}`);
results.push({ name, status: 'fail', ms, error: d.message });
}
} catch (e) {
console.log(`${name} EXCEPTION: ${e.message}`);
results.push({ name, status: 'error', error: e.message });
}
}
// ========== Phase 2A 工具(原有 7 个) ==========
console.log('─'.repeat(60));
console.log(' Phase 2A 工具(原有 7 个)');
console.log('─'.repeat(60));
await run('ST_DESCRIPTIVE (描述性统计)', 'ST_DESCRIPTIVE', {
data_source: ds,
params: { variables: ['age', 'bmi', 'time'], group_var: 'sex' }
}, d => ({ groups: Object.keys(d.results?.summary || {}).length }));
await run('ST_T_TEST_IND (独立样本T检验)', 'ST_T_TEST_IND', {
data_source: ds,
params: { group_var: 'sex', value_var: 'age' },
guardrails: { check_normality: true }
}, d => ({ t: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_MANN_WHITNEY (Mann-Whitney U)', 'ST_MANN_WHITNEY', {
data_source: ds,
params: { group_var: 'sex', value_var: 'bmi' }
}, d => ({ U: d.results?.statistic_U, p: d.results?.p_value_fmt }));
await run('ST_CHI_SQUARE (卡方检验)', 'ST_CHI_SQUARE', {
data_source: ds,
params: { var1: 'sex', var2: 'smoke' }
}, d => ({ chi2: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_CORRELATION (相关分析)', 'ST_CORRELATION', {
data_source: ds,
params: { var_x: 'age', var_y: 'bmi', method: 'auto' }
}, d => ({ r: d.results?.statistic, p: d.results?.p_value_fmt }));
await run('ST_LOGISTIC_BINARY (Logistic回归)', 'ST_LOGISTIC_BINARY', {
data_source: ds,
params: { outcome_var: 'Yqol', predictors: ['age', 'bmi', 'sex', 'smoke'] }
}, d => ({ aic: d.results?.model_fit?.aic, sig: d.results?.coefficients?.filter(c => c.significant)?.length }));
await run('ST_T_TEST_PAIRED (配对T检验)', 'ST_T_TEST_PAIRED', {
data_source: ds,
params: { before_var: 'mouth_open', after_var: 'bucal_relax' },
guardrails: { check_normality: true }
}, d => ({ p: d.results?.p_value_fmt }));
// ========== Phase Deploy 新工具5 个) ==========
console.log('\n' + '─'.repeat(60));
console.log(' Phase Deploy 新工具5 个)');
console.log('─'.repeat(60));
const fisherData = loadJSON('test_fisher.json');
await run('ST_FISHER (Fisher精确检验)', 'ST_FISHER',
fisherData,
d => ({ p: d.results?.p_value_fmt, or: d.results?.odds_ratio }));
const anovaData = loadJSON('test_anova_one.json');
await run('ST_ANOVA_ONE (单因素方差分析)', 'ST_ANOVA_ONE',
anovaData,
d => ({ stat: d.results?.statistic, p: d.results?.p_value_fmt, method: d.results?.method }));
const wilcoxData = loadJSON('test_wilcoxon.json');
await run('ST_WILCOXON (Wilcoxon符号秩)', 'ST_WILCOXON',
wilcoxData,
d => ({ V: d.results?.statistic, p: d.results?.p_value_fmt, r: d.results?.effect_size?.r }));
const linearData = loadJSON('test_linear_reg.json');
await run('ST_LINEAR_REG (线性回归)', 'ST_LINEAR_REG',
linearData,
d => ({ r2: d.results?.model_fit?.r_squared, f: d.results?.model_fit?.f_statistic }));
const baselineData = loadJSON('test_baseline_table.json');
await run('ST_BASELINE_TABLE (基线特征表)', 'ST_BASELINE_TABLE',
baselineData,
d => ({
sig_vars: d.results?.significant_vars?.length || 0,
methods: d.results?.method_info?.length || 0,
is_baseline: d.report_blocks?.[0]?.metadata?.is_baseline_table
}));
// ========== JIT 护栏 ==========
console.log('\n' + '─'.repeat(60));
console.log(' JIT 护栏检查');
console.log('─'.repeat(60));
const jitTests = [
{ name: 'JIT for ST_T_TEST_IND', code: 'ST_T_TEST_IND', body: { data_source: ds, tool_code: 'ST_T_TEST_IND', params: { group_var: 'sex', value_var: 'age' } } },
{ name: 'JIT for ST_ANOVA_ONE', code: 'ST_ANOVA_ONE', body: { data_source: anovaData.data_source, tool_code: 'ST_ANOVA_ONE', params: anovaData.params } },
{ name: 'JIT for ST_FISHER', code: 'ST_FISHER', body: { data_source: fisherData.data_source, tool_code: 'ST_FISHER', params: fisherData.params } },
{ name: 'JIT for ST_LINEAR_REG', code: 'ST_LINEAR_REG', body: { data_source: linearData.data_source, tool_code: 'ST_LINEAR_REG', params: linearData.params } },
];
for (const jt of jitTests) {
const t0 = Date.now();
try {
const res = await post('/api/v1/guardrails/jit', jt.body);
const ms = Date.now() - t0;
const d = res.body;
if (d.status === 'success') {
console.log(`${jt.name} (${ms}ms) checks=${d.checks?.length} all_passed=${d.all_checks_passed} suggested=${d.suggested_tool || 'none'}`);
results.push({ name: jt.name, status: 'pass', ms });
} else {
console.log(`${jt.name} (${ms}ms) ${d.message || ''}`);
results.push({ name: jt.name, status: 'fail', ms });
}
} catch (e) {
console.log(`${jt.name} EXCEPTION: ${e.message}`);
results.push({ name: jt.name, status: 'error' });
}
}
// ========== 汇总 ==========
console.log('\n' + '═'.repeat(60));
console.log(' 测试汇总');
console.log('═'.repeat(60));
const pass = results.filter(r => r.status === 'pass').length;
const blocked = results.filter(r => r.status === 'blocked').length;
const fail = results.filter(r => r.status === 'fail' || r.status === 'error').length;
const total = results.length;
console.log(` 通过: ${pass}/${total} 阻塞: ${blocked} 失败: ${fail}`);
if (fail > 0) {
console.log('\n 失败项:');
results.filter(r => r.status === 'fail' || r.status === 'error').forEach(r => {
console.log(`${r.name}: ${r.error || 'unknown'}`);
});
}
const avgMs = Math.round(results.filter(r => r.ms).reduce((s, r) => s + r.ms, 0) / results.filter(r => r.ms).length);
console.log(`\n 平均响应时间: ${avgMs}ms`);
console.log('═'.repeat(60));
if (fail === 0) {
console.log('🎉 全部测试通过R 统计引擎 12 工具 + JIT 护栏就绪。\n');
} else {
console.log('⚠️ 存在失败项,请检查 R 服务日志。\n');
}
process.exit(fail > 0 ? 1 : 0);
}
main().catch(e => { console.error('测试脚本异常:', e); process.exit(1); });

View File

@@ -0,0 +1,20 @@
{
"data_source": {
"type": "inline",
"data": {
"group": ["A", "A", "A", "A", "A", "A", "A", "A", "A", "A",
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B",
"C", "C", "C", "C", "C", "C", "C", "C", "C", "C"],
"score": [23, 25, 27, 24, 22, 26, 28, 21, 29, 24,
30, 32, 35, 31, 28, 33, 36, 29, 34, 31,
18, 20, 22, 19, 17, 21, 23, 16, 24, 19]
}
},
"params": {
"group_var": "group",
"value_var": "score"
},
"guardrails": {
"check_normality": true
}
}

View File

@@ -0,0 +1,36 @@
{
"data_source": {
"type": "inline",
"data": {
"group": ["Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug",
"Drug", "Drug", "Drug", "Drug", "Drug",
"Placebo", "Placebo", "Placebo", "Placebo", "Placebo",
"Placebo", "Placebo", "Placebo", "Placebo", "Placebo",
"Placebo", "Placebo", "Placebo", "Placebo", "Placebo"],
"age": [45, 52, 38, 61, 44, 55, 49, 57, 42, 50,
48, 53, 41, 59, 46,
47, 51, 39, 58, 43, 54, 50, 56, 41, 49,
46, 52, 40, 60, 44],
"sex": ["M", "F", "M", "F", "M", "F", "M", "F", "M", "F",
"M", "M", "F", "F", "M",
"F", "M", "F", "M", "F", "M", "F", "M", "F", "M",
"F", "F", "M", "M", "F"],
"sbp": [130, 142, 125, 155, 128, 148, 135, 152, 127, 140,
132, 145, 126, 150, 133,
128, 138, 122, 150, 126, 142, 135, 148, 124, 136,
130, 140, 120, 153, 127],
"bmi": [24.5, 28.1, 22.3, 30.5, 23.8, 29.2, 25.6, 31.0, 22.0, 27.5,
24.8, 29.5, 21.8, 30.2, 25.1,
23.8, 27.2, 21.5, 29.8, 22.9, 28.5, 26.0, 30.1, 21.2, 26.8,
24.0, 28.8, 20.8, 31.2, 23.5],
"smoking": ["Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No",
"Yes", "Yes", "No", "No", "Yes",
"No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes", "No", "Yes",
"No", "No", "Yes", "Yes", "No"]
}
},
"params": {
"group_var": "group",
"analyze_vars": ["age", "sex", "sbp", "bmi", "smoking"]
}
}

View File

@@ -0,0 +1,15 @@
{
"data_source": {
"type": "inline",
"data": {
"treatment": ["Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug", "Drug",
"Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo", "Placebo"],
"outcome": ["Improved", "Improved", "Improved", "Improved", "Improved", "Improved", "Improved", "Not improved", "Not improved", "Not improved",
"Improved", "Improved", "Improved", "Not improved", "Not improved", "Not improved", "Not improved", "Not improved", "Not improved", "Not improved"]
}
},
"params": {
"var1": "treatment",
"var2": "outcome"
}
}

View File

@@ -0,0 +1,24 @@
{
"data_source": {
"type": "inline",
"data": {
"sbp": [120, 130, 125, 140, 135, 128, 145, 138, 122, 127,
133, 141, 136, 129, 132, 126, 148, 139, 124, 131,
137, 143, 134, 128, 150, 142, 123, 130, 136, 144],
"age": [25, 35, 30, 45, 40, 32, 50, 42, 28, 33,
38, 48, 43, 34, 36, 29, 55, 44, 27, 37,
41, 47, 39, 31, 58, 46, 26, 36, 40, 49],
"bmi": [22.1, 25.3, 23.5, 28.7, 26.4, 24.0, 30.2, 27.8, 21.5, 23.8,
25.6, 29.1, 27.2, 24.5, 25.1, 22.8, 31.5, 28.3, 21.9, 24.9,
26.8, 29.5, 26.0, 23.2, 32.1, 28.9, 21.3, 25.0, 26.5, 30.0],
"smoke": [0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
1, 1, 1, 0, 0, 0, 1, 1, 0, 0,
1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
}
},
"params": {
"outcome_var": "sbp",
"predictors": ["age", "bmi"],
"confounders": ["smoke"]
}
}

View File

@@ -0,0 +1,15 @@
{
"data_source": {
"type": "inline",
"data": {
"before": [120, 130, 125, 140, 135, 128, 132, 145, 138, 122,
127, 133, 141, 136, 129],
"after": [115, 122, 118, 130, 125, 120, 126, 135, 128, 118,
121, 125, 132, 128, 122]
}
},
"params": {
"before_var": "before",
"after_var": "after"
}
}