Files
AIclinicalresearch/extraction_service/test_dc_api.py
HaHafeng 2481b786d8 deploy: Complete 0126-27 deployment - database upgrade, services update, code recovery
Major Changes:
- Database: Install pg_bigm/pgvector plugins, create test database
- Python service: v1.0 -> v1.1, add pymupdf4llm/openpyxl/pypandoc
- Node.js backend: v1.3 -> v1.7, fix pino-pretty and ES Module imports
- Frontend: v1.2 -> v1.3, skip TypeScript check for deployment
- Code recovery: Restore empty files from local backup

Technical Fixes:
- Fix pino-pretty error in production (conditional loading)
- Fix ES Module import paths (add .js extensions)
- Fix OSSAdapter TypeScript errors
- Update Prisma Schema (63 models, 16 schemas)
- Update environment variables (DATABASE_URL, EXTRACTION_SERVICE_URL, OSS)
- Remove deprecated variables (REDIS_URL, DIFY_API_URL, DIFY_API_KEY)

Documentation:
- Create 0126 deployment folder with 8 documents
- Update database development standards v2.0
- Update SAE deployment status records

Deployment Status:
- PostgreSQL: ai_clinical_research_test with plugins
- Python: v1.1 @ 172.17.173.84:8000
- Backend: v1.7 @ 172.17.173.89:3001
- Frontend: v1.3 @ 172.17.173.90:80

Tested: All services running successfully on SAE
2026-01-27 08:13:27 +08:00

355 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
DC工具C - API测试脚本
测试项:
1. 健康检查 (GET /api/health)
2. AST安全检查 - 正常代码
3. AST安全检查 - 危险代码
4. Pandas代码执行 - 简单场景
5. Pandas代码执行 - 医疗数据清洗场景
"""
import requests
import json
from typing import Dict, Any
BASE_URL = "http://localhost:8000"
def print_test_header(title: str):
"""打印测试标题"""
print("\n" + "=" * 70)
print(f" {title}")
print("=" * 70)
def print_result(response: requests.Response):
"""打印响应结果"""
print(f"\n状态码: {response.status_code}")
print(f"响应内容:")
try:
result = response.json()
print(json.dumps(result, indent=2, ensure_ascii=False))
except:
print(response.text)
def test_health_check():
"""测试1: 健康检查"""
print_test_header("测试1: 健康检查")
try:
response = requests.get(f"{BASE_URL}/api/health", timeout=5)
print_result(response)
if response.status_code == 200:
print("\n✅ 健康检查通过")
return True
else:
print("\n❌ 健康检查失败")
return False
except Exception as e:
print(f"\n❌ 健康检查异常: {str(e)}")
return False
def test_validate_safe_code():
"""测试2: AST安全检查 - 正常代码"""
print_test_header("测试2: AST安全检查 - 正常代码")
safe_code = """
import pandas as pd
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": safe_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("valid"):
print("\n✅ 正常代码验证通过valid=True")
return True
else:
print("\n❌ 正常代码被误判为危险")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_validate_dangerous_code():
"""测试3: AST安全检查 - 危险代码"""
print_test_header("测试3: AST安全检查 - 危险代码(应该被拦截)")
dangerous_code = """
import os
import sys
os.system('echo "危险操作"')
eval('print("evil code")')
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": dangerous_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if not result.get("valid") and len(result.get("errors", [])) > 0:
print("\n✅ 危险代码成功拦截valid=False, 有错误信息)")
return True
else:
print("\n❌ 危险代码未被拦截!")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_simple_code():
"""测试4: Pandas代码执行 - 简单场景"""
print_test_header("测试4: Pandas代码执行 - 简单场景")
test_data = [
{"patient_id": "P001", "age": 25, "gender": ""},
{"patient_id": "P002", "age": 65, "gender": ""},
{"patient_id": "P003", "age": 45, "gender": ""},
{"patient_id": "P004", "age": 70, "gender": ""},
]
simple_code = """
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(f"数据处理完成,共 {len(df)} 行")
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": test_data, "code": simple_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'age_group' in result_data[0]:
print("\n✅ 简单代码执行成功(新增列 age_group")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_medical_cleaning():
"""测试5: Pandas代码执行 - 医疗数据清洗场景"""
print_test_header("测试5: Pandas代码执行 - 医疗数据清洗场景")
# 模拟医疗数据
medical_data = [
{"patient_id": "P001", "age": 25, "gender": "", "sbp": 120, "dbp": 80},
{"patient_id": "P002", "age": 65, "gender": "", "sbp": 150, "dbp": 95},
{"patient_id": "P003", "age": 45, "gender": "", "sbp": 135, "dbp": 85},
{"patient_id": "P004", "age": None, "gender": "", "sbp": 160, "dbp": 100},
{"patient_id": "P005", "age": 200, "gender": "", "sbp": 110, "dbp": 70},
]
# 复杂的医疗数据清洗代码
medical_code = """
import numpy as np
# 1. 清理异常年龄值(>120视为异常
df['age'] = df['age'].apply(lambda x: np.nan if x is None or x > 120 else x)
# 2. 计算血压状态(收缩压 >= 140 或舒张压 >= 90 为高血压)
df['hypertension'] = df.apply(
lambda row: '高血压' if row['sbp'] >= 140 or row['dbp'] >= 90 else '正常',
axis=1
)
# 3. 统计结果
print(f"总样本数: {len(df)}")
print(f"年龄缺失数: {df['age'].isna().sum()}")
print(f"高血压人数: {(df['hypertension'] == '高血压').sum()}")
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": medical_data, "code": medical_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'hypertension' in result_data[0]:
# 验证数据清洗逻辑
hypertension_count = sum(
1 for row in result_data
if row.get('hypertension') == '高血压'
)
print(f"高血压人数: {hypertension_count}")
print("\n✅ 医疗数据清洗场景执行成功")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def main():
"""主测试函数"""
print("\n" + "🚀" * 35)
print(" DC工具C - Python微服务API测试")
print("🚀" * 35)
# 运行所有测试
results = {
"健康检查": test_health_check(),
"AST检查-正常代码": test_validate_safe_code(),
"AST检查-危险代码": test_validate_dangerous_code(),
"代码执行-简单场景": test_execute_simple_code(),
"代码执行-医疗清洗": test_execute_medical_cleaning(),
}
# 汇总结果
print("\n" + "=" * 70)
print(" 测试结果汇总")
print("=" * 70)
for test_name, passed in results.items():
status = "✅ 通过" if passed else "❌ 失败"
print(f"{test_name:20s}: {status}")
total = len(results)
passed = sum(1 for r in results.values() if r)
success_rate = (passed / total * 100) if total > 0 else 0
print("\n" + "-" * 70)
print(f"总计: {passed}/{total} 通过 ({success_rate:.1f}%)")
print("-" * 70)
if passed == total:
print("\n🎉 所有测试通过Day 1 Python服务开发完成")
else:
print(f"\n⚠️ 有 {total - passed} 个测试失败,请检查")
print("\n")
if __name__ == "__main__":
main()