AIclinicalresearch/extraction_service/test_day6_validators.py

"""
Day 6 验证器测试脚本

测试内容：
1. T 检验逆向验证
2. SE 三角验证
3. SD > Mean 检查
4. CI vs P 值逻辑检查
"""

import sys
from pathlib import Path

# 添加项目路径
sys.path.insert(0, str(Path(__file__).parent))

from forensics.types import ForensicsConfig, TableData, Severity
from forensics.validator import StatValidator, SCIPY_AVAILABLE

print("=" * 60)
print("Day 6 验证器测试")
print("=" * 60)
print(f"scipy 可用: {SCIPY_AVAILABLE}")
print()


def create_mock_table(table_id: str, data: list[list[str]], caption: str = "") -> TableData:
    """创建模拟表格数据"""
    return TableData(
        id=table_id,
        caption=caption,
        row_count=len(data),
        col_count=len(data[0]) if data else 0,
        html="<table></table>",
        data=data,
        issues=[],
        skipped=False
    )


def test_ci_pvalue_consistency():
    """测试 CI vs P 值逻辑一致性检查"""
    print("=" * 40)
    print("测试 1: CI vs P 值逻辑一致性")
    print("=" * 40)

    config = ForensicsConfig(check_level="L1_L2")
    validator = StatValidator(config)

    # 测试数据：CI 跨越 1 但 P < 0.05（矛盾）
    data_conflict1 = [
        ["Variable", "OR", "95% CI", "P value"],
        ["Age", "1.2", "(0.8-1.5)", "P=0.03"],  # CI 跨越 1，但 P < 0.05，矛盾
    ]

    table1 = create_mock_table("test_ci_1", data_conflict1, "CI 矛盾测试 1")
    issues1 = validator._validate_ci_pvalue_consistency(table1)

    print(f"  测试数据: CI=0.8-1.5 (跨越1), P=0.03 (显著)")
    print(f"  期望: 发现 ERROR")
    print(f"  结果: {len(issues1)} 个问题")
    if issues1:
        print(f"    - {issues1[0].severity.value}: {issues1[0].message}")
    print()

    # 测试数据：CI 不跨越 1 且 P < 0.05（正确）
    data_correct = [
        ["Variable", "OR", "95% CI", "P value"],
        ["Smoking", "2.5", "(1.2-4.8)", "P=0.01"],  # CI 不跨越 1，P < 0.05，正确
    ]

    table2 = create_mock_table("test_ci_2", data_correct, "CI 正确测试")
    issues2 = validator._validate_ci_pvalue_consistency(table2)

    print(f"  测试数据: CI=1.2-4.8 (不跨越1), P=0.01 (显著)")
    print(f"  期望: 无问题")
    print(f"  结果: {len(issues2)} 个问题")
    print()

    return len(issues1) > 0 and len(issues2) == 0


def test_se_triangle():
    """测试 SE 三角验证"""
    print("=" * 40)
    print("测试 2: SE 三角验证 (OR/CI/P 一致性)")
    print("=" * 40)

    if not SCIPY_AVAILABLE:
        print("  跳过: scipy 不可用")
        return True

    config = ForensicsConfig(check_level="L1_L2")
    validator = StatValidator(config)

    # 测试数据：OR=2.5, CI=1.5-4.2, P=0.001
    # 根据 SE 三角公式验证
    # SE = (ln(4.2) - ln(1.5)) / 3.92 = (1.435 - 0.405) / 3.92 = 0.263
    # Z = ln(2.5) / 0.263 = 0.916 / 0.263 = 3.48
    # P = 2 * (1 - norm.cdf(3.48)) ≈ 0.0005

    data_consistent = [
        ["Variable", "OR (95% CI)", "P value"],
        ["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.001"],  # 应该一致
    ]

    table1 = create_mock_table("test_se_1", data_consistent, "SE 三角一致性测试")
    issues1 = validator._validate_se_triangle(table1)

    print(f"  测试数据: OR=2.5, CI=1.5-4.2, P=0.001")
    print(f"  结果: {len(issues1)} 个问题")
    for issue in issues1:
        print(f"    - {issue.severity.value}: {issue.message}")
    print()

    # 测试数据：OR=2.5, CI=1.5-4.2, P=0.5（明显矛盾）
    data_conflict = [
        ["Variable", "OR (95% CI)", "P value"],
        ["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.5"],  # P 值严重矛盾
    ]

    table2 = create_mock_table("test_se_2", data_conflict, "SE 三角矛盾测试")
    issues2 = validator._validate_se_triangle(table2)

    print(f"  测试数据: OR=2.5, CI=1.5-4.2, P=0.5 (矛盾)")
    print(f"  期望: 发现 ERROR")
    print(f"  结果: {len(issues2)} 个问题")
    for issue in issues2:
        print(f"    - {issue.severity.value}: {issue.message}")
        if issue.evidence:
            print(f"      证据: P_calculated={issue.evidence.get('p_calculated')}, P_reported={issue.evidence.get('p_reported')}")
    print()

    return len(issues2) > 0


def test_sd_greater_mean():
    """测试 SD > Mean 检查"""
    print("=" * 40)
    print("测试 3: SD > Mean 启发式检查")
    print("=" * 40)

    config = ForensicsConfig(check_level="L1_L2")
    validator = StatValidator(config)

    # 测试数据：年龄 SD > Mean（明显异常）
    data_abnormal = [
        ["Variable", "Group A", "Group B"],
        ["Age (years)", "25.0 ± 30.0", "28.0 ± 8.5"],  # 第一个 SD > Mean
    ]

    table1 = create_mock_table("test_sd_1", data_abnormal, "SD > Mean 异常测试")
    issues1 = validator._validate_sd_greater_mean(table1)

    print(f"  测试数据: 年龄 = 25.0 ± 30.0 (SD > Mean)")
    print(f"  期望: 发现 ERROR (年龄是正值指标)")
    print(f"  结果: {len(issues1)} 个问题")
    for issue in issues1:
        print(f"    - {issue.severity.value}: {issue.message}")
    print()

    # 测试数据：正常情况
    data_normal = [
        ["Variable", "Group A", "Group B"],
        ["Age (years)", "45.0 ± 12.0", "48.0 ± 10.5"],  # 正常
    ]

    table2 = create_mock_table("test_sd_2", data_normal, "SD 正常测试")
    issues2 = validator._validate_sd_greater_mean(table2)

    print(f"  测试数据: 年龄 = 45.0 ± 12.0 (正常)")
    print(f"  期望: 无问题")
    print(f"  结果: {len(issues2)} 个问题")
    print()

    return len(issues1) > 0 and len(issues2) == 0


def test_ttest_validation():
    """测试 T 检验逆向验证"""
    print("=" * 40)
    print("测试 4: T 检验逆向验证")
    print("=" * 40)

    if not SCIPY_AVAILABLE:
        print("  跳过: scipy 不可用")
        return True

    config = ForensicsConfig(check_level="L1_L2")
    validator = StatValidator(config)

    # 测试数据：包含样本量的表头
    # 真实 t 检验：M1=45, SD1=10, n1=50; M2=50, SD2=12, n2=48
    # t = (50-45) / sqrt(10²/50 + 12²/48) = 5 / sqrt(2 + 3) = 5/2.24 = 2.23
    # P ≈ 0.028

    data_with_n = [
        ["Variable", "Group A (n=50)", "Group B (n=48)", "P value"],
        ["Score", "45.0 ± 10.0", "50.0 ± 12.0", "P=0.03"],  # 接近正确
    ]

    table1 = create_mock_table("test_t_1", data_with_n, "T 检验测试")
    issues1 = validator._validate_ttest(table1)

    print(f"  测试数据: Group A: 45.0±10.0 (n=50), Group B: 50.0±12.0 (n=48), P=0.03")
    print(f"  结果: {len(issues1)} 个问题")
    for issue in issues1:
        print(f"    - {issue.severity.value}: {issue.message}")
    print()

    return True


def run_all_tests():
    """运行所有测试"""
    results = []

    results.append(("CI vs P 值一致性", test_ci_pvalue_consistency()))
    results.append(("SE 三角验证", test_se_triangle()))
    results.append(("SD > Mean 检查", test_sd_greater_mean()))
    results.append(("T 检验逆向验证", test_ttest_validation()))

    print("=" * 60)
    print("测试结果汇总")
    print("=" * 60)

    all_passed = True
    for name, passed in results:
        status = "✅ PASS" if passed else "❌ FAIL"
        print(f"  {name}: {status}")
        if not passed:
            all_passed = False

    print()
    if all_passed:
        print("🎉 所有测试通过！Day 6 验证器实现完成。")
    else:
        print("⚠️ 部分测试失败，请检查代码。")

    return all_passed


if __name__ == "__main__":
    success = run_all_tests()
    sys.exit(0 if success else 1)