Files
AIclinicalresearch/extraction_service/test_day6_validators.py
HaHafeng e785969e54 feat(rvw): Implement RVW V2.0 Data Forensics Module - Day 6 StatValidator
Summary:
- Implement L2 Statistical Validator (CI-P consistency, T-test reverse)
- Implement L2.5 Consistency Forensics (SE Triangle, SD>Mean check)
- Add error/warning severity classification with tolerance thresholds
- Support 5+ CI formats parsing (parentheses, brackets, 95% CI prefix)
- Complete Python forensics service (types, config, validator, extractor)

V2.0 Development Progress (Week 2 Day 6):
- Day 1-5: Python service setup, Word table extraction, L1 arithmetic validator
- Day 6: L2 StatValidator + L2.5 consistency forensics (promoted from V2.1)

Test Results:
- Unit tests: 4/4 passed (CI-P, SE Triangle, SD>Mean, T-test)
- Real document tests: 5/5 successful, 2 reasonable WARNINGs

Status: Day 6 completed, ready for Day 7 (Skills Framework)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-17 22:15:27 +08:00

246 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Day 6 验证器测试脚本
测试内容:
1. T 检验逆向验证
2. SE 三角验证
3. SD > Mean 检查
4. CI vs P 值逻辑检查
"""
import sys
from pathlib import Path
# 添加项目路径
sys.path.insert(0, str(Path(__file__).parent))
from forensics.types import ForensicsConfig, TableData, Severity
from forensics.validator import StatValidator, SCIPY_AVAILABLE
print("=" * 60)
print("Day 6 验证器测试")
print("=" * 60)
print(f"scipy 可用: {SCIPY_AVAILABLE}")
print()
def create_mock_table(table_id: str, data: list[list[str]], caption: str = "") -> TableData:
"""创建模拟表格数据"""
return TableData(
id=table_id,
caption=caption,
row_count=len(data),
col_count=len(data[0]) if data else 0,
html="<table></table>",
data=data,
issues=[],
skipped=False
)
def test_ci_pvalue_consistency():
"""测试 CI vs P 值逻辑一致性检查"""
print("=" * 40)
print("测试 1: CI vs P 值逻辑一致性")
print("=" * 40)
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据CI 跨越 1 但 P < 0.05(矛盾)
data_conflict1 = [
["Variable", "OR", "95% CI", "P value"],
["Age", "1.2", "(0.8-1.5)", "P=0.03"], # CI 跨越 1但 P < 0.05,矛盾
]
table1 = create_mock_table("test_ci_1", data_conflict1, "CI 矛盾测试 1")
issues1 = validator._validate_ci_pvalue_consistency(table1)
print(f" 测试数据: CI=0.8-1.5 (跨越1), P=0.03 (显著)")
print(f" 期望: 发现 ERROR")
print(f" 结果: {len(issues1)} 个问题")
if issues1:
print(f" - {issues1[0].severity.value}: {issues1[0].message}")
print()
# 测试数据CI 不跨越 1 且 P < 0.05(正确)
data_correct = [
["Variable", "OR", "95% CI", "P value"],
["Smoking", "2.5", "(1.2-4.8)", "P=0.01"], # CI 不跨越 1P < 0.05,正确
]
table2 = create_mock_table("test_ci_2", data_correct, "CI 正确测试")
issues2 = validator._validate_ci_pvalue_consistency(table2)
print(f" 测试数据: CI=1.2-4.8 (不跨越1), P=0.01 (显著)")
print(f" 期望: 无问题")
print(f" 结果: {len(issues2)} 个问题")
print()
return len(issues1) > 0 and len(issues2) == 0
def test_se_triangle():
"""测试 SE 三角验证"""
print("=" * 40)
print("测试 2: SE 三角验证 (OR/CI/P 一致性)")
print("=" * 40)
if not SCIPY_AVAILABLE:
print(" 跳过: scipy 不可用")
return True
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据OR=2.5, CI=1.5-4.2, P=0.001
# 根据 SE 三角公式验证
# SE = (ln(4.2) - ln(1.5)) / 3.92 = (1.435 - 0.405) / 3.92 = 0.263
# Z = ln(2.5) / 0.263 = 0.916 / 0.263 = 3.48
# P = 2 * (1 - norm.cdf(3.48)) ≈ 0.0005
data_consistent = [
["Variable", "OR (95% CI)", "P value"],
["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.001"], # 应该一致
]
table1 = create_mock_table("test_se_1", data_consistent, "SE 三角一致性测试")
issues1 = validator._validate_se_triangle(table1)
print(f" 测试数据: OR=2.5, CI=1.5-4.2, P=0.001")
print(f" 结果: {len(issues1)} 个问题")
for issue in issues1:
print(f" - {issue.severity.value}: {issue.message}")
print()
# 测试数据OR=2.5, CI=1.5-4.2, P=0.5(明显矛盾)
data_conflict = [
["Variable", "OR (95% CI)", "P value"],
["Diabetes", "OR=2.5 (1.5-4.2)", "P=0.5"], # P 值严重矛盾
]
table2 = create_mock_table("test_se_2", data_conflict, "SE 三角矛盾测试")
issues2 = validator._validate_se_triangle(table2)
print(f" 测试数据: OR=2.5, CI=1.5-4.2, P=0.5 (矛盾)")
print(f" 期望: 发现 ERROR")
print(f" 结果: {len(issues2)} 个问题")
for issue in issues2:
print(f" - {issue.severity.value}: {issue.message}")
if issue.evidence:
print(f" 证据: P_calculated={issue.evidence.get('p_calculated')}, P_reported={issue.evidence.get('p_reported')}")
print()
return len(issues2) > 0
def test_sd_greater_mean():
"""测试 SD > Mean 检查"""
print("=" * 40)
print("测试 3: SD > Mean 启发式检查")
print("=" * 40)
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据:年龄 SD > Mean明显异常
data_abnormal = [
["Variable", "Group A", "Group B"],
["Age (years)", "25.0 ± 30.0", "28.0 ± 8.5"], # 第一个 SD > Mean
]
table1 = create_mock_table("test_sd_1", data_abnormal, "SD > Mean 异常测试")
issues1 = validator._validate_sd_greater_mean(table1)
print(f" 测试数据: 年龄 = 25.0 ± 30.0 (SD > Mean)")
print(f" 期望: 发现 ERROR (年龄是正值指标)")
print(f" 结果: {len(issues1)} 个问题")
for issue in issues1:
print(f" - {issue.severity.value}: {issue.message}")
print()
# 测试数据:正常情况
data_normal = [
["Variable", "Group A", "Group B"],
["Age (years)", "45.0 ± 12.0", "48.0 ± 10.5"], # 正常
]
table2 = create_mock_table("test_sd_2", data_normal, "SD 正常测试")
issues2 = validator._validate_sd_greater_mean(table2)
print(f" 测试数据: 年龄 = 45.0 ± 12.0 (正常)")
print(f" 期望: 无问题")
print(f" 结果: {len(issues2)} 个问题")
print()
return len(issues1) > 0 and len(issues2) == 0
def test_ttest_validation():
"""测试 T 检验逆向验证"""
print("=" * 40)
print("测试 4: T 检验逆向验证")
print("=" * 40)
if not SCIPY_AVAILABLE:
print(" 跳过: scipy 不可用")
return True
config = ForensicsConfig(check_level="L1_L2")
validator = StatValidator(config)
# 测试数据:包含样本量的表头
# 真实 t 检验M1=45, SD1=10, n1=50; M2=50, SD2=12, n2=48
# t = (50-45) / sqrt(10²/50 + 12²/48) = 5 / sqrt(2 + 3) = 5/2.24 = 2.23
# P ≈ 0.028
data_with_n = [
["Variable", "Group A (n=50)", "Group B (n=48)", "P value"],
["Score", "45.0 ± 10.0", "50.0 ± 12.0", "P=0.03"], # 接近正确
]
table1 = create_mock_table("test_t_1", data_with_n, "T 检验测试")
issues1 = validator._validate_ttest(table1)
print(f" 测试数据: Group A: 45.0±10.0 (n=50), Group B: 50.0±12.0 (n=48), P=0.03")
print(f" 结果: {len(issues1)} 个问题")
for issue in issues1:
print(f" - {issue.severity.value}: {issue.message}")
print()
return True
def run_all_tests():
"""运行所有测试"""
results = []
results.append(("CI vs P 值一致性", test_ci_pvalue_consistency()))
results.append(("SE 三角验证", test_se_triangle()))
results.append(("SD > Mean 检查", test_sd_greater_mean()))
results.append(("T 检验逆向验证", test_ttest_validation()))
print("=" * 60)
print("测试结果汇总")
print("=" * 60)
all_passed = True
for name, passed in results:
status = "✅ PASS" if passed else "❌ FAIL"
print(f" {name}: {status}")
if not passed:
all_passed = False
print()
if all_passed:
print("🎉 所有测试通过Day 6 验证器实现完成。")
else:
print("⚠️ 部分测试失败,请检查代码。")
return all_passed
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)