AIclinicalresearch/backend/scripts/test-phase-i-e2e.cjs

/**
 * Phase I — 端到端联调测试脚本
 *
 * 覆盖: Python 扩展 → SessionBlackboard → GetDataOverview → GetVariableDetail → TokenTruncation
 *
 * 运行:
 *   node backend/scripts/test-phase-i-e2e.js
 *
 * 前置: 数据库 + Python(8000) + Node 后端(3001) 均已启动
 */

const http = require('http');
const fs = require('fs');
const path = require('path');

const PYTHON_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
const TIMEOUT = 60000;

const CSV_PATH = path.resolve(__dirname, '../../docs/03-业务模块/SSA-智能统计分析/05-测试文档/test.csv');

let passed = 0;
let failed = 0;
const errors = [];

// ==================== Helpers ====================

function post(baseUrl, endpoint, body) {
  return new Promise((resolve, reject) => {
    const url = new URL(endpoint, baseUrl);
    const payload = JSON.stringify(body);
    const req = http.request(
      {
        hostname: url.hostname,
        port: url.port,
        path: url.pathname,
        method: 'POST',
        headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
        timeout: TIMEOUT,
      },
      (res) => {
        let data = '';
        res.on('data', (c) => (data += c));
        res.on('end', () => {
          try {
            resolve({ status: res.statusCode, body: JSON.parse(data) });
          } catch {
            resolve({ status: res.statusCode, body: data });
          }
        });
      }
    );
    req.on('error', reject);
    req.on('timeout', () => { req.destroy(); reject(new Error('Timeout')); });
    req.write(payload);
    req.end();
  });
}

function assert(condition, label) {
  if (condition) {
    console.log(`  ✅ ${label}`);
    passed++;
  } else {
    console.log(`  ❌ ${label}`);
    failed++;
    errors.push(label);
  }
}

// ==================== Test 1: Python data-profile-csv 扩展 ====================

async function testPythonDataProfile() {
  console.log('\n━━━ Test 1: Python data-profile-csv（正态性检验 + 完整病例数） ━━━');

  const csvContent = fs.readFileSync(CSV_PATH, 'utf-8');
  const res = await post(PYTHON_URL, '/api/ssa/data-profile-csv', {
    csv_content: csvContent,
    max_unique_values: 20,
    include_quality_score: true,
  });

  assert(res.status === 200, 'HTTP 200');
  assert(res.body.success === true, 'success = true');

  const profile = res.body.profile;
  assert(profile && profile.columns.length > 0, `columns 数量: ${profile?.columns?.length}`);
  assert(profile && profile.summary.totalRows === 311, `totalRows = 311 (got ${profile?.summary?.totalRows})`);

  // Phase I 新增字段
  assert(profile && Array.isArray(profile.normalityTests), 'normalityTests 存在且为数组');
  assert(profile && profile.normalityTests.length > 0, `normalityTests 数量: ${profile?.normalityTests?.length}`);
  assert(profile && typeof profile.completeCaseCount === 'number', `completeCaseCount: ${profile?.completeCaseCount}`);

  // 验证正态性检验结构
  if (profile?.normalityTests?.length > 0) {
    const nt = profile.normalityTests[0];
    assert(typeof nt.variable === 'string', `normalityTest.variable: ${nt.variable}`);
    assert(['shapiro_wilk', 'kolmogorov_smirnov'].includes(nt.method), `normalityTest.method: ${nt.method}`);
    assert(typeof nt.pValue === 'number', `normalityTest.pValue: ${nt.pValue}`);
    assert(typeof nt.isNormal === 'boolean', `normalityTest.isNormal: ${nt.isNormal}`);
  }

  return profile;
}

// ==================== Test 2: Python variable-detail 端点 ====================

async function testPythonVariableDetail() {
  console.log('\n━━━ Test 2: Python variable-detail（数值型: age） ━━━');

  const csvContent = fs.readFileSync(CSV_PATH, 'utf-8');
  const res = await post(PYTHON_URL, '/api/ssa/variable-detail', {
    csv_content: csvContent,
    variable_name: 'age',
    max_bins: 30,
    max_qq_points: 200,
  });

  assert(res.status === 200, 'HTTP 200');
  assert(res.body.success === true, 'success = true');
  assert(res.body.type === 'numeric', `type = numeric (got ${res.body.type})`);

  // 描述统计
  assert(res.body.descriptive && typeof res.body.descriptive.mean === 'number', `mean: ${res.body.descriptive?.mean}`);

  // 直方图 bins 上限（H2 防护）
  if (res.body.histogram) {
    assert(res.body.histogram.counts.length <= 30, `histogram bins <= 30 (got ${res.body.histogram.counts.length})`);
    assert(res.body.histogram.edges.length === res.body.histogram.counts.length + 1, 'edges = counts + 1');
  }

  // 正态性检验
  assert(res.body.normalityTest !== undefined, 'normalityTest 存在');

  // Q-Q 图数据点上限
  if (res.body.qqPlot) {
    assert(res.body.qqPlot.observed.length <= 200, `Q-Q points <= 200 (got ${res.body.qqPlot.observed.length})`);
  }

  // 异常值
  assert(res.body.outliers && typeof res.body.outliers.count === 'number', `outliers.count: ${res.body.outliers?.count}`);

  console.log('\n━━━ Test 2b: Python variable-detail（分类型: sex） ━━━');

  const res2 = await post(PYTHON_URL, '/api/ssa/variable-detail', {
    csv_content: csvContent,
    variable_name: 'sex',
    max_bins: 30,
  });

  assert(res2.status === 200, 'HTTP 200');
  assert(res2.body.type === 'categorical' || res2.body.type === 'numeric', `type: ${res2.body.type}`);

  if (res2.body.distribution) {
    assert(Array.isArray(res2.body.distribution), '分类分布为数组');
    assert(res2.body.distribution.length > 0, `分类水平数: ${res2.body.distribution.length}`);
  }

  console.log('\n━━━ Test 2c: Python variable-detail（不存在的变量） ━━━');

  const res3 = await post(PYTHON_URL, '/api/ssa/variable-detail', {
    csv_content: csvContent,
    variable_name: 'nonexistent_var',
  });

  assert(res3.status === 400, `HTTP 400 for nonexistent var (got ${res3.status})`);
  assert(res3.body.success === false, 'success = false');
}

// ==================== Test 3: TokenTruncationService（纯逻辑测试） ====================

async function testTokenTruncation() {
  console.log('\n━━━ Test 3: TokenTruncationService（纯逻辑） ━━━');

  // 构造一个 mock blackboard 来测试截断逻辑
  const mockBlackboard = {
    sessionId: 'test-truncation',
    createdAt: new Date().toISOString(),
    updatedAt: new Date().toISOString(),
    dataOverview: {
      profile: {
        columns: Array.from({ length: 25 }, (_, i) => ({
          name: `var_${i}`,
          type: i < 10 ? 'numeric' : 'categorical',
          missingCount: i % 3 === 0 ? 5 : 0,
          missingRate: i % 3 === 0 ? 1.6 : 0,
          uniqueCount: i < 10 ? 100 : 3,
          totalCount: 311,
        })),
        summary: {
          totalRows: 311, totalColumns: 25,
          numericColumns: 10, categoricalColumns: 15,
          datetimeColumns: 0, textColumns: 0,
          overallMissingRate: 0.5, totalMissingCells: 20,
        },
      },
      normalityTests: [
        { variable: 'var_0', method: 'shapiro_wilk', statistic: 0.95, pValue: 0.001, isNormal: false },
        { variable: 'var_1', method: 'shapiro_wilk', statistic: 0.99, pValue: 0.45, isNormal: true },
      ],
      completeCaseCount: 290,
      generatedAt: new Date().toISOString(),
    },
    variableDictionary: Array.from({ length: 25 }, (_, i) => ({
      name: `var_${i}`,
      inferredType: i < 10 ? 'numeric' : 'categorical',
      confirmedType: null,
      label: null,
      picoRole: i === 0 ? 'O' : i === 15 ? 'I' : null,
      isIdLike: i === 24,
      confirmStatus: 'ai_inferred',
    })),
    picoInference: {
      population: '311 例患者',
      intervention: '手术方式 (var_15)',
      comparison: null,
      outcome: '结局指标 (var_0)',
      confidence: 'medium',
      status: 'ai_inferred',
    },
    qperTrace: [],
  };

  // 直接 require TokenTruncationService 不行（ES module），所以用逻辑验证
  // 验证 mock 数据结构正确性
  assert(mockBlackboard.variableDictionary.length === 25, '变量字典 25 条');
  assert(mockBlackboard.variableDictionary.filter(v => !v.isIdLike).length === 24, '非 ID 变量 24 条');
  assert(mockBlackboard.variableDictionary.filter(v => v.picoRole).length === 2, 'PICO 变量 2 条');
  assert(mockBlackboard.picoInference.intervention !== null, 'PICO intervention 非 null');
  assert(mockBlackboard.picoInference.comparison === null, 'PICO comparison = null（H3 观察性研究）');

  console.log('  ℹ️  TokenTruncationService 为 ES Module，完整截断逻辑将在后端启动后通过 API 间接验证');
}

// ==================== Main ====================

async function main() {
  console.log('╔══════════════════════════════════════════════════════╗');
  console.log('║  Phase I — Session Blackboard + READ Layer E2E Test ║');
  console.log('╠══════════════════════════════════════════════════════╣');
  console.log(`║  Python:  ${PYTHON_URL.padEnd(41)}║`);
  console.log(`║  CSV:     test.csv (311 rows × 21 cols)             ║`);
  console.log('╚══════════════════════════════════════════════════════╝');

  try {
    await testPythonDataProfile();
    await testPythonVariableDetail();
    await testTokenTruncation();
  } catch (err) {
    console.error('\n💥 Fatal error:', err.message);
    failed++;
    errors.push(`Fatal: ${err.message}`);
  }

  // Summary
  console.log('\n══════════════════════════════════════════');
  console.log(`  结果: ${passed} 通过, ${failed} 失败`);
  if (errors.length > 0) {
    console.log('  失败项:');
    errors.forEach((e) => console.log(`    - ${e}`));
  }
  console.log('══════════════════════════════════════════\n');

  process.exit(failed > 0 ? 1 : 0);
}

main();