Files
AIclinicalresearch/extraction_service/test_service.py

172 lines
4.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
服务测试脚本
测试文档提取微服务的各项功能
"""
import requests
import sys
from pathlib import Path
BASE_URL = "http://localhost:8000"
def test_health():
"""测试健康检查"""
print("\n" + "="*50)
print("测试1: 健康检查")
print("="*50)
try:
response = requests.get(f"{BASE_URL}/api/health")
print(f"状态码: {response.status_code}")
if response.status_code == 200:
data = response.json()
print(f"服务状态: {data['status']}")
print(f"PyMuPDF: {data['checks']['pymupdf']['available']} (v{data['checks']['pymupdf']['version']})")
print(f"临时目录: {data['checks']['temp_dir']['path']}")
print("✅ 健康检查通过")
return True
else:
print("❌ 健康检查失败")
return False
except Exception as e:
print(f"❌ 连接失败: {str(e)}")
print("提示: 请确保服务已启动python main.py")
return False
def test_pdf_extraction(pdf_file: str = None):
"""测试PDF提取"""
print("\n" + "="*50)
print("测试2: PDF文本提取")
print("="*50)
if not pdf_file:
print("跳过: 未提供测试PDF文件")
print("使用方法: python test_service.py <pdf文件路径>")
return None
pdf_path = Path(pdf_file)
if not pdf_path.exists():
print(f"❌ 文件不存在: {pdf_file}")
return False
try:
print(f"上传文件: {pdf_path.name}")
print(f"文件大小: {pdf_path.stat().st_size / 1024:.2f} KB")
with open(pdf_path, 'rb') as f:
files = {'file': (pdf_path.name, f, 'application/pdf')}
response = requests.post(
f"{BASE_URL}/api/extract/pdf",
files=files
)
print(f"状态码: {response.status_code}")
if response.status_code == 200:
data = response.json()
print("\n提取结果:")
print(f"方法: {data['method']}")
print(f"页数: {data['metadata']['page_count']}")
print(f"字符数: {data['metadata']['char_count']}")
print(f"文本长度: {len(data['text'])} 字符")
# 显示前500字符
print("\n文本预览:")
print("-" * 50)
print(data['text'][:500])
if len(data['text']) > 500:
print("...")
print("-" * 50)
print("\n✅ PDF提取成功")
return True
else:
print(f"❌ 提取失败: {response.text}")
return False
except Exception as e:
print(f"❌ 请求失败: {str(e)}")
return False
def test_root():
"""测试根路径"""
print("\n" + "="*50)
print("测试0: 根路径")
print("="*50)
try:
response = requests.get(f"{BASE_URL}/")
print(f"状态码: {response.status_code}")
if response.status_code == 200:
data = response.json()
print(f"服务: {data['service']}")
print(f"版本: {data['version']}")
print("✅ 根路径正常")
return True
else:
print("❌ 根路径异常")
return False
except Exception as e:
print(f"❌ 连接失败: {str(e)}")
return False
def main():
"""主测试函数"""
print("\n" + "="*50)
print("文档提取微服务 - 测试套件")
print("="*50)
# 获取PDF文件路径如果提供
pdf_file = sys.argv[1] if len(sys.argv) > 1 else None
# 运行测试
results = []
results.append(("根路径", test_root()))
results.append(("健康检查", test_health()))
results.append(("PDF提取", test_pdf_extraction(pdf_file)))
# 总结
print("\n" + "="*50)
print("测试总结")
print("="*50)
for name, result in results:
if result is True:
status = "✅ 通过"
elif result is False:
status = "❌ 失败"
else:
status = "⏭️ 跳过"
print(f"{name}: {status}")
passed = sum(1 for _, r in results if r is True)
total = len([r for _, r in results if r is not None])
print(f"\n通过率: {passed}/{total}")
if passed == total:
print("\n🎉 所有测试通过!")
else:
print("\n⚠️ 部分测试失败")
if __name__ == "__main__":
main()