"""
Day 6 验证器测试脚本
测试内容:
1. T 检验逆向验证
2. SE 三角验证
3. SD > Mean 检查
4. CI vs P 值逻辑检查
"""
import sys
from pathlib import Path
# 添加项目路径
sys.path.insert(0, str(Path(__file__).parent))
from forensics.types import ForensicsConfig, TableData, Severity
from forensics.validator import StatValidator, SCIPY_AVAILABLE
print("=" * 60)
print("Day 6 验证器测试")
print("=" * 60)
print(f"scipy 可用: {SCIPY_AVAILABLE}")
print()
def create_mock_table(table_id: str, data: list[list[str]], caption: str = "") -> TableData:
"""创建模拟表格数据"""
return TableData(
id=table_id,
caption=caption,
row_count=len(data),
col_count=len(data[0]) if data else 0,
html="
",
data=data,
issues=[],
skipped=False
)
def test_ci_pvalue_consistency():
"""测试 CI vs P 值逻辑一致性检查"""
print("=" * 40)
print("测试 1: CI vs P 值逻辑一致性")
print("=" * 40)
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据:CI 跨越 1 但 P < 0.05(矛盾)
data_conflict1 = [
["Variable", "OR", "95% CI", "P value"],
["Age", "1.2", "(0.8-1.5)", "P=0.03"], # CI 跨越 1,但 P < 0.05,矛盾
]
table1 = create_mock_table("test_ci_1", data_conflict1, "CI 矛盾测试 1")
issues1 = validator._validate_ci_pvalue_consistency(table1)
print(f" 测试数据: CI=0.8-1.5 (跨越1), P=0.03 (显著)")
print(f" 期望: 发现 ERROR")
print(f" 结果: {len(issues1)} 个问题")
if issues1:
print(f" - {issues1[0].severity.value}: {issues1[0].message}")
print()
# 测试数据:CI 不跨越 1 且 P < 0.05(正确)
data_correct = [
["Variable", "OR", "95% CI", "P value"],
["Smoking", "2.5", "(1.2-4.8)", "P=0.01"], # CI 不跨越 1,P < 0.05,正确
]
table2 = create_mock_table("test_ci_2", data_correct, "CI 正确测试")
issues2 = validator._validate_ci_pvalue_consistency(table2)
print(f" 测试数据: CI=1.2-4.8 (不跨越1), P=0.01 (显著)")
print(f" 期望: 无问题")
print(f" 结果: {len(issues2)} 个问题")
print()
return len(issues1) > 0 and len(issues2) == 0
def test_se_triangle():
"""测试 SE 三角验证"""
print("=" * 40)
print("测试 2: SE 三角验证 (OR/CI/P 一致性)")
print("=" * 40)
if not SCIPY_AVAILABLE:
print(" 跳过: scipy 不可用")
return True
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据:OR=2.5, CI=1.5-4.2, P=0.001
# 根据 SE 三角公式验证
# SE = (ln(4.2) - ln(1.5)) / 3.92 = (1.435 - 0.405) / 3.92 = 0.263
# Z = ln(2.5) / 0.263 = 0.916 / 0.263 = 3.48
# P = 2 * (1 - norm.cdf(3.48)) ≈ 0.0005
data_consistent = [
["Variable", "OR (95% CI)", "P value"],
["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.001"], # 应该一致
]
table1 = create_mock_table("test_se_1", data_consistent, "SE 三角一致性测试")
issues1 = validator._validate_se_triangle(table1)
print(f" 测试数据: OR=2.5, CI=1.5-4.2, P=0.001")
print(f" 结果: {len(issues1)} 个问题")
for issue in issues1:
print(f" - {issue.severity.value}: {issue.message}")
print()
# 测试数据:OR=2.5, CI=1.5-4.2, P=0.5(明显矛盾)
data_conflict = [
["Variable", "OR (95% CI)", "P value"],
["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.5"], # P 值严重矛盾
]
table2 = create_mock_table("test_se_2", data_conflict, "SE 三角矛盾测试")
issues2 = validator._validate_se_triangle(table2)
print(f" 测试数据: OR=2.5, CI=1.5-4.2, P=0.5 (矛盾)")
print(f" 期望: 发现 ERROR")
print(f" 结果: {len(issues2)} 个问题")
for issue in issues2:
print(f" - {issue.severity.value}: {issue.message}")
if issue.evidence:
print(f" 证据: P_calculated={issue.evidence.get('p_calculated')}, P_reported={issue.evidence.get('p_reported')}")
print()
return len(issues2) > 0
def test_sd_greater_mean():
"""测试 SD > Mean 检查"""
print("=" * 40)
print("测试 3: SD > Mean 启发式检查")
print("=" * 40)
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据:年龄 SD > Mean(明显异常)
data_abnormal = [
["Variable", "Group A", "Group B"],
["Age (years)", "25.0 ± 30.0", "28.0 ± 8.5"], # 第一个 SD > Mean
]
table1 = create_mock_table("test_sd_1", data_abnormal, "SD > Mean 异常测试")
issues1 = validator._validate_sd_greater_mean(table1)
print(f" 测试数据: 年龄 = 25.0 ± 30.0 (SD > Mean)")
print(f" 期望: 发现 ERROR (年龄是正值指标)")
print(f" 结果: {len(issues1)} 个问题")
for issue in issues1:
print(f" - {issue.severity.value}: {issue.message}")
print()
# 测试数据:正常情况
data_normal = [
["Variable", "Group A", "Group B"],
["Age (years)", "45.0 ± 12.0", "48.0 ± 10.5"], # 正常
]
table2 = create_mock_table("test_sd_2", data_normal, "SD 正常测试")
issues2 = validator._validate_sd_greater_mean(table2)
print(f" 测试数据: 年龄 = 45.0 ± 12.0 (正常)")
print(f" 期望: 无问题")
print(f" 结果: {len(issues2)} 个问题")
print()
return len(issues1) > 0 and len(issues2) == 0
def test_ttest_validation():
"""测试 T 检验逆向验证"""
print("=" * 40)
print("测试 4: T 检验逆向验证")
print("=" * 40)
if not SCIPY_AVAILABLE:
print(" 跳过: scipy 不可用")
return True
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据:包含样本量的表头
# 真实 t 检验:M1=45, SD1=10, n1=50; M2=50, SD2=12, n2=48
# t = (50-45) / sqrt(10²/50 + 12²/48) = 5 / sqrt(2 + 3) = 5/2.24 = 2.23
# P ≈ 0.028
data_with_n = [
["Variable", "Group A (n=50)", "Group B (n=48)", "P value"],
["Score", "45.0 ± 10.0", "50.0 ± 12.0", "P=0.03"], # 接近正确
]
table1 = create_mock_table("test_t_1", data_with_n, "T 检验测试")
issues1 = validator._validate_ttest(table1)
print(f" 测试数据: Group A: 45.0±10.0 (n=50), Group B: 50.0±12.0 (n=48), P=0.03")
print(f" 结果: {len(issues1)} 个问题")
for issue in issues1:
print(f" - {issue.severity.value}: {issue.message}")
print()
return True
def run_all_tests():
"""运行所有测试"""
results = []
results.append(("CI vs P 值一致性", test_ci_pvalue_consistency()))
results.append(("SE 三角验证", test_se_triangle()))
results.append(("SD > Mean 检查", test_sd_greater_mean()))
results.append(("T 检验逆向验证", test_ttest_validation()))
print("=" * 60)
print("测试结果汇总")
print("=" * 60)
all_passed = True
for name, passed in results:
status = "✅ PASS" if passed else "❌ FAIL"
print(f" {name}: {status}")
if not passed:
all_passed = False
print()
if all_passed:
print("🎉 所有测试通过!Day 6 验证器实现完成。")
else:
print("⚠️ 部分测试失败,请检查代码。")
return all_passed
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)