Files
AIclinicalresearch/extraction_service/test_dc_api.py
HaHafeng ef967d7d7c build(backend): Complete Node.js backend deployment preparation
Major changes:
- Add Docker configuration (Dockerfile, .dockerignore)
- Fix 200+ TypeScript compilation errors
- Add Prisma schema relations for all models (30+ relations)
- Update tsconfig.json to relax non-critical checks
- Optimize Docker build with local dist strategy

Technical details:
- Exclude test files from TypeScript compilation
- Add manual relations for ASL, PKB, DC, AIA modules
- Use type assertions for JSON/Buffer compatibility
- Fix pg-boss, extractionWorker, and other legacy code issues

Build result:
- Docker image: 838MB (compressed ~186MB)
- Successfully pushed to ACR
- Zero TypeScript compilation errors

Related docs:
- Update deployment documentation
- Add Python microservice SAE deployment guide
2025-12-24 22:12:00 +08:00

305 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
DC工具C - API测试脚本
测试项:
1. 健康检查 (GET /api/health)
2. AST安全检查 - 正常代码
3. AST安全检查 - 危险代码
4. Pandas代码执行 - 简单场景
5. Pandas代码执行 - 医疗数据清洗场景
"""
import requests
import json
from typing import Dict, Any
BASE_URL = "http://localhost:8000"
def print_test_header(title: str):
"""打印测试标题"""
print("\n" + "=" * 70)
print(f" {title}")
print("=" * 70)
def print_result(response: requests.Response):
"""打印响应结果"""
print(f"\n状态码: {response.status_code}")
print(f"响应内容:")
try:
result = response.json()
print(json.dumps(result, indent=2, ensure_ascii=False))
except:
print(response.text)
def test_health_check():
"""测试1: 健康检查"""
print_test_header("测试1: 健康检查")
try:
response = requests.get(f"{BASE_URL}/api/health", timeout=5)
print_result(response)
if response.status_code == 200:
print("\n✅ 健康检查通过")
return True
else:
print("\n❌ 健康检查失败")
return False
except Exception as e:
print(f"\n❌ 健康检查异常: {str(e)}")
return False
def test_validate_safe_code():
"""测试2: AST安全检查 - 正常代码"""
print_test_header("测试2: AST安全检查 - 正常代码")
safe_code = """
import pandas as pd
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": safe_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("valid"):
print("\n✅ 正常代码验证通过valid=True")
return True
else:
print("\n❌ 正常代码被误判为危险")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_validate_dangerous_code():
"""测试3: AST安全检查 - 危险代码"""
print_test_header("测试3: AST安全检查 - 危险代码(应该被拦截)")
dangerous_code = """
import os
import sys
os.system('echo "危险操作"')
eval('print("evil code")')
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": dangerous_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if not result.get("valid") and len(result.get("errors", [])) > 0:
print("\n✅ 危险代码成功拦截valid=False, 有错误信息)")
return True
else:
print("\n❌ 危险代码未被拦截!")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_simple_code():
"""测试4: Pandas代码执行 - 简单场景"""
print_test_header("测试4: Pandas代码执行 - 简单场景")
test_data = [
{"patient_id": "P001", "age": 25, "gender": ""},
{"patient_id": "P002", "age": 65, "gender": ""},
{"patient_id": "P003", "age": 45, "gender": ""},
{"patient_id": "P004", "age": 70, "gender": ""},
]
simple_code = """
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(f"数据处理完成,共 {len(df)} 行")
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": test_data, "code": simple_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'age_group' in result_data[0]:
print("\n✅ 简单代码执行成功(新增列 age_group")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_medical_cleaning():
"""测试5: Pandas代码执行 - 医疗数据清洗场景"""
print_test_header("测试5: Pandas代码执行 - 医疗数据清洗场景")
# 模拟医疗数据
medical_data = [
{"patient_id": "P001", "age": 25, "gender": "", "sbp": 120, "dbp": 80},
{"patient_id": "P002", "age": 65, "gender": "", "sbp": 150, "dbp": 95},
{"patient_id": "P003", "age": 45, "gender": "", "sbp": 135, "dbp": 85},
{"patient_id": "P004", "age": None, "gender": "", "sbp": 160, "dbp": 100},
{"patient_id": "P005", "age": 200, "gender": "", "sbp": 110, "dbp": 70},
]
# 复杂的医疗数据清洗代码
medical_code = """
import numpy as np
# 1. 清理异常年龄值(>120视为异常
df['age'] = df['age'].apply(lambda x: np.nan if x is None or x > 120 else x)
# 2. 计算血压状态(收缩压 >= 140 或舒张压 >= 90 为高血压)
df['hypertension'] = df.apply(
lambda row: '高血压' if row['sbp'] >= 140 or row['dbp'] >= 90 else '正常',
axis=1
)
# 3. 统计结果
print(f"总样本数: {len(df)}")
print(f"年龄缺失数: {df['age'].isna().sum()}")
print(f"高血压人数: {(df['hypertension'] == '高血压').sum()}")
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": medical_data, "code": medical_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'hypertension' in result_data[0]:
# 验证数据清洗逻辑
hypertension_count = sum(
1 for row in result_data
if row.get('hypertension') == '高血压'
)
print(f"高血压人数: {hypertension_count}")
print("\n✅ 医疗数据清洗场景执行成功")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def main():
"""主测试函数"""
print("\n" + "🚀" * 35)
print(" DC工具C - Python微服务API测试")
print("🚀" * 35)
# 运行所有测试
results = {
"健康检查": test_health_check(),
"AST检查-正常代码": test_validate_safe_code(),
"AST检查-危险代码": test_validate_dangerous_code(),
"代码执行-简单场景": test_execute_simple_code(),
"代码执行-医疗清洗": test_execute_medical_cleaning(),
}
# 汇总结果
print("\n" + "=" * 70)
print(" 测试结果汇总")
print("=" * 70)
for test_name, passed in results.items():
status = "✅ 通过" if passed else "❌ 失败"
print(f"{test_name:20s}: {status}")
total = len(results)
passed = sum(1 for r in results.values() if r)
success_rate = (passed / total * 100) if total > 0 else 0
print("\n" + "-" * 70)
print(f"总计: {passed}/{total} 通过 ({success_rate:.1f}%)")
print("-" * 70)
if passed == total:
print("\n🎉 所有测试通过Day 1 Python服务开发完成")
else:
print(f"\n⚠️ 有 {total - passed} 个测试失败,请检查")
print("\n")
if __name__ == "__main__":
main()