Files
AIclinicalresearch/extraction_service/test_dc_api.py
HaHafeng 19f9c5ea93 docs(deployment): Fix 8 critical deployment issues and enhance documentation
Summary of fixes:
- Fix service discovery address (change .sae domain to internal IP)
- Unify timezone configuration (Asia/Shanghai for all services)
- Enhance ECS security group configuration (Redis/Weaviate port binding)
- Add image pull strategy best practices
- Add Python service memory management guidelines
- Update Dify API Key deployment strategy (avoid deadlock)
- Add SSH tunnel for RDS database access
- Add NAT gateway cost optimization explanation

Modified files (7 docs):
- 00-部署架构总览.md (enhanced with 7 sections)
- 03-Dify-ECS部署完全指南.md (security hardening)
- 04-Python微服务-SAE容器部署指南.md (timezone + service discovery)
- 05-Node.js后端-SAE容器部署指南.md (timezone configuration)
- PostgreSQL部署策略-摸底报告.md (timezone best practice)
- 07-关键配置补充说明.md (3 new sections)
- 08-部署检查清单.md (service address fix)

New files:
- 文档修正报告-20251214.md (comprehensive fix report)
- Review documents from technical team

Impact:
- Fixed 3 P0/P1 critical issues (100% connection failure risk)
- Fixed 3 P2 important issues (stability and maintainability)
- Added 2 P3 best practices (developer convenience)

Status: All deployment documents reviewed and corrected, ready for production deployment
2025-12-14 13:25:28 +08:00

294 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
DC工具C - API测试脚本
测试项:
1. 健康检查 (GET /api/health)
2. AST安全检查 - 正常代码
3. AST安全检查 - 危险代码
4. Pandas代码执行 - 简单场景
5. Pandas代码执行 - 医疗数据清洗场景
"""
import requests
import json
from typing import Dict, Any
BASE_URL = "http://localhost:8000"
def print_test_header(title: str):
"""打印测试标题"""
print("\n" + "=" * 70)
print(f" {title}")
print("=" * 70)
def print_result(response: requests.Response):
"""打印响应结果"""
print(f"\n状态码: {response.status_code}")
print(f"响应内容:")
try:
result = response.json()
print(json.dumps(result, indent=2, ensure_ascii=False))
except:
print(response.text)
def test_health_check():
"""测试1: 健康检查"""
print_test_header("测试1: 健康检查")
try:
response = requests.get(f"{BASE_URL}/api/health", timeout=5)
print_result(response)
if response.status_code == 200:
print("\n✅ 健康检查通过")
return True
else:
print("\n❌ 健康检查失败")
return False
except Exception as e:
print(f"\n❌ 健康检查异常: {str(e)}")
return False
def test_validate_safe_code():
"""测试2: AST安全检查 - 正常代码"""
print_test_header("测试2: AST安全检查 - 正常代码")
safe_code = """
import pandas as pd
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": safe_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("valid"):
print("\n✅ 正常代码验证通过valid=True")
return True
else:
print("\n❌ 正常代码被误判为危险")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_validate_dangerous_code():
"""测试3: AST安全检查 - 危险代码"""
print_test_header("测试3: AST安全检查 - 危险代码(应该被拦截)")
dangerous_code = """
import os
import sys
os.system('echo "危险操作"')
eval('print("evil code")')
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/validate",
json={"code": dangerous_code},
timeout=5
)
print_result(response)
if response.status_code == 200:
result = response.json()
if not result.get("valid") and len(result.get("errors", [])) > 0:
print("\n✅ 危险代码成功拦截valid=False, 有错误信息)")
return True
else:
print("\n❌ 危险代码未被拦截!")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_simple_code():
"""测试4: Pandas代码执行 - 简单场景"""
print_test_header("测试4: Pandas代码执行 - 简单场景")
test_data = [
{"patient_id": "P001", "age": 25, "gender": ""},
{"patient_id": "P002", "age": 65, "gender": ""},
{"patient_id": "P003", "age": 45, "gender": ""},
{"patient_id": "P004", "age": 70, "gender": ""},
]
simple_code = """
df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
print(f"数据处理完成,共 {len(df)} 行")
print(df['age_group'].value_counts())
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": test_data, "code": simple_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'age_group' in result_data[0]:
print("\n✅ 简单代码执行成功(新增列 age_group")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def test_execute_medical_cleaning():
"""测试5: Pandas代码执行 - 医疗数据清洗场景"""
print_test_header("测试5: Pandas代码执行 - 医疗数据清洗场景")
# 模拟医疗数据
medical_data = [
{"patient_id": "P001", "age": 25, "gender": "", "sbp": 120, "dbp": 80},
{"patient_id": "P002", "age": 65, "gender": "", "sbp": 150, "dbp": 95},
{"patient_id": "P003", "age": 45, "gender": "", "sbp": 135, "dbp": 85},
{"patient_id": "P004", "age": None, "gender": "", "sbp": 160, "dbp": 100},
{"patient_id": "P005", "age": 200, "gender": "", "sbp": 110, "dbp": 70},
]
# 复杂的医疗数据清洗代码
medical_code = """
import numpy as np
# 1. 清理异常年龄值(>120视为异常
df['age'] = df['age'].apply(lambda x: np.nan if x is None or x > 120 else x)
# 2. 计算血压状态(收缩压 >= 140 或舒张压 >= 90 为高血压)
df['hypertension'] = df.apply(
lambda row: '高血压' if row['sbp'] >= 140 or row['dbp'] >= 90 else '正常',
axis=1
)
# 3. 统计结果
print(f"总样本数: {len(df)}")
print(f"年龄缺失数: {df['age'].isna().sum()}")
print(f"高血压人数: {(df['hypertension'] == '高血压').sum()}")
"""
try:
response = requests.post(
f"{BASE_URL}/api/dc/execute",
json={"data": medical_data, "code": medical_code},
timeout=10
)
print_result(response)
if response.status_code == 200:
result = response.json()
if result.get("success"):
result_data = result.get("result_data", [])
print(f"\n结果数据行数: {len(result_data)}")
print(f"执行时间: {result.get('execution_time', 0):.3f}")
# 验证新列是否添加
if len(result_data) > 0 and 'hypertension' in result_data[0]:
# 验证数据清洗逻辑
hypertension_count = sum(
1 for row in result_data
if row.get('hypertension') == '高血压'
)
print(f"高血压人数: {hypertension_count}")
print("\n✅ 医疗数据清洗场景执行成功")
return True
else:
print("\n❌ 代码执行成功但结果不正确")
return False
else:
print(f"\n❌ 代码执行失败: {result.get('error')}")
return False
else:
print("\n❌ API调用失败")
return False
except Exception as e:
print(f"\n❌ 测试异常: {str(e)}")
return False
def main():
"""主测试函数"""
print("\n" + "🚀" * 35)
print(" DC工具C - Python微服务API测试")
print("🚀" * 35)
# 运行所有测试
results = {
"健康检查": test_health_check(),
"AST检查-正常代码": test_validate_safe_code(),
"AST检查-危险代码": test_validate_dangerous_code(),
"代码执行-简单场景": test_execute_simple_code(),
"代码执行-医疗清洗": test_execute_medical_cleaning(),
}
# 汇总结果
print("\n" + "=" * 70)
print(" 测试结果汇总")
print("=" * 70)
for test_name, passed in results.items():
status = "✅ 通过" if passed else "❌ 失败"
print(f"{test_name:20s}: {status}")
total = len(results)
passed = sum(1 for r in results.values() if r)
success_rate = (passed / total * 100) if total > 0 else 0
print("\n" + "-" * 70)
print(f"总计: {passed}/{total} 通过 ({success_rate:.1f}%)")
print("-" * 70)
if passed == total:
print("\n🎉 所有测试通过Day 1 Python服务开发完成")
else:
print(f"\n⚠️ 有 {total - passed} 个测试失败,请检查")
print("\n")
if __name__ == "__main__":
main()