""" Day 6 验证器测试脚本测试内容： 1. T 检验逆向验证 2. SE 三角验证 3. SD > Mean 检查 4. CI vs P 值逻辑检查 """ import sys from pathlib import Path # 添加项目路径 sys.path.insert(0, str(Path(__file__).parent)) from forensics.types import ForensicsConfig, TableData, Severity from forensics.validator import StatValidator, SCIPY_AVAILABLE print("=" * 60) print("Day 6 验证器测试") print("=" * 60) print(f"scipy 可用: {SCIPY_AVAILABLE}") print() def create_mock_table(table_id: str, data: list[list[str]], caption: str = "") -> TableData: """创建模拟表格数据""" return TableData( id=table_id, caption=caption, row_count=len(data), col_count=len(data[0]) if data else 0, html="

", data=data, issues=[], skipped=False ) def test_ci_pvalue_consistency(): """测试 CI vs P 值逻辑一致性检查""" print("=" * 40) print("测试 1: CI vs P 值逻辑一致性") print("=" * 40) config = ForensicsConfig(check_level="L1_L2") validator = StatValidator(config) # 测试数据：CI 跨越 1 但 P < 0.05（矛盾） data_conflict1 = [ ["Variable", "OR", "95% CI", "P value"], ["Age", "1.2", "(0.8-1.5)", "P=0.03"], # CI 跨越 1，但 P < 0.05，矛盾 ] table1 = create_mock_table("test_ci_1", data_conflict1, "CI 矛盾测试 1") issues1 = validator._validate_ci_pvalue_consistency(table1) print(f" 测试数据: CI=0.8-1.5 (跨越1), P=0.03 (显著)") print(f" 期望: 发现 ERROR") print(f" 结果: {len(issues1)} 个问题") if issues1: print(f" - {issues1[0].severity.value}: {issues1[0].message}") print() # 测试数据：CI 不跨越 1 且 P < 0.05（正确） data_correct = [ ["Variable", "OR", "95% CI", "P value"], ["Smoking", "2.5", "(1.2-4.8)", "P=0.01"], # CI 不跨越 1，P < 0.05，正确 ] table2 = create_mock_table("test_ci_2", data_correct, "CI 正确测试") issues2 = validator._validate_ci_pvalue_consistency(table2) print(f" 测试数据: CI=1.2-4.8 (不跨越1), P=0.01 (显著)") print(f" 期望: 无问题") print(f" 结果: {len(issues2)} 个问题") print() return len(issues1) > 0 and len(issues2) == 0 def test_se_triangle(): """测试 SE 三角验证""" print("=" * 40) print("测试 2: SE 三角验证 (OR/CI/P 一致性)") print("=" * 40) if not SCIPY_AVAILABLE: print(" 跳过: scipy 不可用") return True config = ForensicsConfig(check_level="L1_L2") validator = StatValidator(config) # 测试数据：OR=2.5, CI=1.5-4.2, P=0.001 # 根据 SE 三角公式验证 # SE = (ln(4.2) - ln(1.5)) / 3.92 = (1.435 - 0.405) / 3.92 = 0.263 # Z = ln(2.5) / 0.263 = 0.916 / 0.263 = 3.48 # P = 2 * (1 - norm.cdf(3.48)) ≈ 0.0005 data_consistent = [ ["Variable", "OR (95% CI)", "P value"], ["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.001"], # 应该一致 ] table1 = create_mock_table("test_se_1", data_consistent, "SE 三角一致性测试") issues1 = validator._validate_se_triangle(table1) print(f" 测试数据: OR=2.5, CI=1.5-4.2, P=0.001") print(f" 结果: {len(issues1)} 个问题") for issue in issues1: print(f" - {issue.severity.value}: {issue.message}") print() # 测试数据：OR=2.5, CI=1.5-4.2, P=0.5（明显矛盾） data_conflict = [ ["Variable", "OR (95% CI)", "P value"], ["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.5"], # P 值严重矛盾 ] table2 = create_mock_table("test_se_2", data_conflict, "SE 三角矛盾测试") issues2 = validator._validate_se_triangle(table2) print(f" 测试数据: OR=2.5, CI=1.5-4.2, P=0.5 (矛盾)") print(f" 期望: 发现 ERROR") print(f" 结果: {len(issues2)} 个问题") for issue in issues2: print(f" - {issue.severity.value}: {issue.message}") if issue.evidence: print(f" 证据: P_calculated={issue.evidence.get('p_calculated')}, P_reported={issue.evidence.get('p_reported')}") print() return len(issues2) > 0 def test_sd_greater_mean(): """测试 SD > Mean 检查""" print("=" * 40) print("测试 3: SD > Mean 启发式检查") print("=" * 40) config = ForensicsConfig(check_level="L1_L2") validator = StatValidator(config) # 测试数据：年龄 SD > Mean（明显异常） data_abnormal = [ ["Variable", "Group A", "Group B"], ["Age (years)", "25.0 ± 30.0", "28.0 ± 8.5"], # 第一个 SD > Mean ] table1 = create_mock_table("test_sd_1", data_abnormal, "SD > Mean 异常测试") issues1 = validator._validate_sd_greater_mean(table1) print(f" 测试数据: 年龄 = 25.0 ± 30.0 (SD > Mean)") print(f" 期望: 发现 ERROR (年龄是正值指标)") print(f" 结果: {len(issues1)} 个问题") for issue in issues1: print(f" - {issue.severity.value}: {issue.message}") print() # 测试数据：正常情况 data_normal = [ ["Variable", "Group A", "Group B"], ["Age (years)", "45.0 ± 12.0", "48.0 ± 10.5"], # 正常 ] table2 = create_mock_table("test_sd_2", data_normal, "SD 正常测试") issues2 = validator._validate_sd_greater_mean(table2) print(f" 测试数据: 年龄 = 45.0 ± 12.0 (正常)") print(f" 期望: 无问题") print(f" 结果: {len(issues2)} 个问题") print() return len(issues1) > 0 and len(issues2) == 0 def test_ttest_validation(): """测试 T 检验逆向验证""" print("=" * 40) print("测试 4: T 检验逆向验证") print("=" * 40) if not SCIPY_AVAILABLE: print(" 跳过: scipy 不可用") return True config = ForensicsConfig(check_level="L1_L2") validator = StatValidator(config) # 测试数据：包含样本量的表头 # 真实 t 检验：M1=45, SD1=10, n1=50; M2=50, SD2=12, n2=48 # t = (50-45) / sqrt(10²/50 + 12²/48) = 5 / sqrt(2 + 3) = 5/2.24 = 2.23 # P ≈ 0.028 data_with_n = [ ["Variable", "Group A (n=50)", "Group B (n=48)", "P value"], ["Score", "45.0 ± 10.0", "50.0 ± 12.0", "P=0.03"], # 接近正确 ] table1 = create_mock_table("test_t_1", data_with_n, "T 检验测试") issues1 = validator._validate_ttest(table1) print(f" 测试数据: Group A: 45.0±10.0 (n=50), Group B: 50.0±12.0 (n=48), P=0.03") print(f" 结果: {len(issues1)} 个问题") for issue in issues1: print(f" - {issue.severity.value}: {issue.message}") print() return True def run_all_tests(): """运行所有测试""" results = [] results.append(("CI vs P 值一致性", test_ci_pvalue_consistency())) results.append(("SE 三角验证", test_se_triangle())) results.append(("SD > Mean 检查", test_sd_greater_mean())) results.append(("T 检验逆向验证", test_ttest_validation())) print("=" * 60) print("测试结果汇总") print("=" * 60) all_passed = True for name, passed in results: status = "✅ PASS" if passed else "❌ FAIL" print(f" {name}: {status}") if not passed: all_passed = False print() if all_passed: print("🎉 所有测试通过！Day 6 验证器实现完成。") else: print("⚠️ 部分测试失败，请检查代码。") return all_passed if __name__ == "__main__": success = run_all_tests() sys.exit(0 if success else 1)