Files
AIclinicalresearch/extraction_service/test_dc_api.py
HaHafeng b64896a307 feat(deploy): Complete PostgreSQL migration and Docker image build
Summary:
- PostgreSQL database migration to RDS completed (90MB SQL, 11 schemas)
- Frontend Nginx Docker image built and pushed to ACR (v1.0, ~50MB)
- Python microservice Docker image built and pushed to ACR (v1.0, 1.12GB)
- Created 3 deployment documentation files

Docker Configuration Files:
- frontend-v2/Dockerfile: Multi-stage build with nginx:alpine
- frontend-v2/.dockerignore: Optimize build context
- frontend-v2/nginx.conf: SPA routing and API proxy
- frontend-v2/docker-entrypoint.sh: Dynamic env injection
- extraction_service/Dockerfile: Multi-stage build with Aliyun Debian mirror
- extraction_service/.dockerignore: Optimize build context
- extraction_service/requirements-prod.txt: Production dependencies (removed Nougat)

Deployment Documentation:
- docs/05-部署文档/00-部署进度总览.md: One-stop deployment status overview
- docs/05-部署文档/07-前端Nginx-SAE部署操作手册.md: Frontend deployment guide
- docs/05-部署文档/08-PostgreSQL数据库部署操作手册.md: Database deployment guide
- docs/00-系统总体设计/00-系统当前状态与开发指南.md: Updated with deployment status

Database Migration:
- RDS instance: pgm-2zex1m2y3r23hdn5 (2C4G, PostgreSQL 15.0)
- Database: ai_clinical_research
- Schemas: 11 business schemas migrated successfully
- Data: 3 users, 2 projects, 1204 literatures verified
- Backup: rds_init_20251224_154529.sql (90MB)

Docker Images:
- Frontend: crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.0
- Python: crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0

Key Achievements:
- Resolved Docker Hub network issues (using generic tags)
- Fixed 30 TypeScript compilation errors
- Removed Nougat OCR to reduce image size by 1.5GB
- Used Aliyun Debian mirror to resolve apt-get network issues
- Implemented multi-stage builds for optimization

Next Steps:
- Deploy Python microservice to SAE
- Build Node.js backend Docker image
- Deploy Node.js backend to SAE
- Deploy frontend Nginx to SAE
- End-to-end verification testing

Status: Docker images ready, SAE deployment pending
2025-12-24 18:21:55 +08:00

304 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
DC工具C - API测试脚本
测试项:
1. 健康检查 (GET /api/health)
2. AST安全检查 - 正常代码
3. AST安全检查 - 危险代码
4. Pandas代码执行 - 简单场景
5. Pandas代码执行 - 医疗数据清洗场景
"""
import requests
import json
from typing import Dict, Any
BASE_URL = "http://localhost:8000"
def print_test_header(title: str):
"""打印测试标题"""
print("\n" + "=" * 70)
print(f" {title}")
print("=" * 70)
def print_result(response: requests.Response):
"""打印响应结果"""
print(f"\n状态码: {response.status_code}")
print(f"响应内容:")
try:
result = response.json()
print(json.dumps(result, indent=2, ensure_ascii=False))
except:
print(response.text)
def test_health_check():
"""测试1: 健康检查"""
print_test_header("测试1: 健康检查")
try:
response = requests.get(f"{BASE_URL}/api/health", timeout=5)
print_result(response)
if response.status_code == 200:
print("\n✅ 健康检查通过")
return True
else:
print("\n❌ 健康检查失败")
return False
except Exception as e:
print(f"\n❌ 健康检查异常: {str(e)}")
return False
def test_validate_safe_code():
"""测试2: AST安全检查 - 正常代码"""
print_test_header("测试2: AST安全检查 - 正常代码")
safe_code = """
import pandas as pd
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": safe_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("valid"):
print("\n✅ 正常代码验证通过valid=True")
return True
else:
print("\n❌ 正常代码被误判为危险")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_validate_dangerous_code():
"""测试3: AST安全检查 - 危险代码"""
print_test_header("测试3: AST安全检查 - 危险代码(应该被拦截)")
dangerous_code = """
import os
import sys
os.system('echo "危险操作"')
eval('print("evil code")')
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": dangerous_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if not result.get("valid") and len(result.get("errors", [])) > 0:
print("\n✅ 危险代码成功拦截valid=False, 有错误信息)")
return True
else:
print("\n❌ 危险代码未被拦截!")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_simple_code():
"""测试4: Pandas代码执行 - 简单场景"""
print_test_header("测试4: Pandas代码执行 - 简单场景")
test_data = [
{"patient_id": "P001", "age": 25, "gender": ""},
{"patient_id": "P002", "age": 65, "gender": ""},
{"patient_id": "P003", "age": 45, "gender": ""},
{"patient_id": "P004", "age": 70, "gender": ""},
]
simple_code = """
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(f"数据处理完成,共 {len(df)} 行")
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": test_data, "code": simple_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'age_group' in result_data[0]:
print("\n✅ 简单代码执行成功(新增列 age_group")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_medical_cleaning():
"""测试5: Pandas代码执行 - 医疗数据清洗场景"""
print_test_header("测试5: Pandas代码执行 - 医疗数据清洗场景")
# 模拟医疗数据
medical_data = [
{"patient_id": "P001", "age": 25, "gender": "", "sbp": 120, "dbp": 80},
{"patient_id": "P002", "age": 65, "gender": "", "sbp": 150, "dbp": 95},
{"patient_id": "P003", "age": 45, "gender": "", "sbp": 135, "dbp": 85},
{"patient_id": "P004", "age": None, "gender": "", "sbp": 160, "dbp": 100},
{"patient_id": "P005", "age": 200, "gender": "", "sbp": 110, "dbp": 70},
]
# 复杂的医疗数据清洗代码
medical_code = """
import numpy as np
# 1. 清理异常年龄值(>120视为异常
df['age'] = df['age'].apply(lambda x: np.nan if x is None or x > 120 else x)
# 2. 计算血压状态(收缩压 >= 140 或舒张压 >= 90 为高血压)
df['hypertension'] = df.apply(
lambda row: '高血压' if row['sbp'] >= 140 or row['dbp'] >= 90 else '正常',
axis=1
)
# 3. 统计结果
print(f"总样本数: {len(df)}")
print(f"年龄缺失数: {df['age'].isna().sum()}")
print(f"高血压人数: {(df['hypertension'] == '高血压').sum()}")
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": medical_data, "code": medical_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'hypertension' in result_data[0]:
# 验证数据清洗逻辑
hypertension_count = sum(
1 for row in result_data
if row.get('hypertension') == '高血压'
)
print(f"高血压人数: {hypertension_count}")
print("\n✅ 医疗数据清洗场景执行成功")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def main():
"""主测试函数"""
print("\n" + "🚀" * 35)
print(" DC工具C - Python微服务API测试")
print("🚀" * 35)
# 运行所有测试
results = {
"健康检查": test_health_check(),
"AST检查-正常代码": test_validate_safe_code(),
"AST检查-危险代码": test_validate_dangerous_code(),
"代码执行-简单场景": test_execute_simple_code(),
"代码执行-医疗清洗": test_execute_medical_cleaning(),
}
# 汇总结果
print("\n" + "=" * 70)
print(" 测试结果汇总")
print("=" * 70)
for test_name, passed in results.items():
status = "✅ 通过" if passed else "❌ 失败"
print(f"{test_name:20s}: {status}")
total = len(results)
passed = sum(1 for r in results.values() if r)
success_rate = (passed / total * 100) if total > 0 else 0
print("\n" + "-" * 70)
print(f"总计: {passed}/{total} 通过 ({success_rate:.1f}%)")
print("-" * 70)
if passed == total:
print("\n🎉 所有测试通过Day 1 Python服务开发完成")
else:
print(f"\n⚠️ 有 {total - passed} 个测试失败,请检查")
print("\n")
if __name__ == "__main__":
main()