172 lines
4.5 KiB
Python
172 lines
4.5 KiB
Python
"""
|
||
服务测试脚本
|
||
|
||
测试文档提取微服务的各项功能
|
||
"""
|
||
|
||
import requests
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
|
||
BASE_URL = "http://localhost:8000"
|
||
|
||
|
||
def test_health():
|
||
"""测试健康检查"""
|
||
print("\n" + "="*50)
|
||
print("测试1: 健康检查")
|
||
print("="*50)
|
||
|
||
try:
|
||
response = requests.get(f"{BASE_URL}/api/health")
|
||
print(f"状态码: {response.status_code}")
|
||
|
||
if response.status_code == 200:
|
||
data = response.json()
|
||
print(f"服务状态: {data['status']}")
|
||
print(f"PyMuPDF: {data['checks']['pymupdf']['available']} (v{data['checks']['pymupdf']['version']})")
|
||
print(f"临时目录: {data['checks']['temp_dir']['path']}")
|
||
print("✅ 健康检查通过")
|
||
return True
|
||
else:
|
||
print("❌ 健康检查失败")
|
||
return False
|
||
except Exception as e:
|
||
print(f"❌ 连接失败: {str(e)}")
|
||
print("提示: 请确保服务已启动(python main.py)")
|
||
return False
|
||
|
||
|
||
def test_pdf_extraction(pdf_file: str = None):
|
||
"""测试PDF提取"""
|
||
print("\n" + "="*50)
|
||
print("测试2: PDF文本提取")
|
||
print("="*50)
|
||
|
||
if not pdf_file:
|
||
print("跳过: 未提供测试PDF文件")
|
||
print("使用方法: python test_service.py <pdf文件路径>")
|
||
return None
|
||
|
||
pdf_path = Path(pdf_file)
|
||
|
||
if not pdf_path.exists():
|
||
print(f"❌ 文件不存在: {pdf_file}")
|
||
return False
|
||
|
||
try:
|
||
print(f"上传文件: {pdf_path.name}")
|
||
print(f"文件大小: {pdf_path.stat().st_size / 1024:.2f} KB")
|
||
|
||
with open(pdf_path, 'rb') as f:
|
||
files = {'file': (pdf_path.name, f, 'application/pdf')}
|
||
response = requests.post(
|
||
f"{BASE_URL}/api/extract/pdf",
|
||
files=files
|
||
)
|
||
|
||
print(f"状态码: {response.status_code}")
|
||
|
||
if response.status_code == 200:
|
||
data = response.json()
|
||
|
||
print("\n提取结果:")
|
||
print(f"方法: {data['method']}")
|
||
print(f"页数: {data['metadata']['page_count']}")
|
||
print(f"字符数: {data['metadata']['char_count']}")
|
||
print(f"文本长度: {len(data['text'])} 字符")
|
||
|
||
# 显示前500字符
|
||
print("\n文本预览:")
|
||
print("-" * 50)
|
||
print(data['text'][:500])
|
||
if len(data['text']) > 500:
|
||
print("...")
|
||
print("-" * 50)
|
||
|
||
print("\n✅ PDF提取成功")
|
||
return True
|
||
else:
|
||
print(f"❌ 提取失败: {response.text}")
|
||
return False
|
||
|
||
except Exception as e:
|
||
print(f"❌ 请求失败: {str(e)}")
|
||
return False
|
||
|
||
|
||
def test_root():
|
||
"""测试根路径"""
|
||
print("\n" + "="*50)
|
||
print("测试0: 根路径")
|
||
print("="*50)
|
||
|
||
try:
|
||
response = requests.get(f"{BASE_URL}/")
|
||
print(f"状态码: {response.status_code}")
|
||
|
||
if response.status_code == 200:
|
||
data = response.json()
|
||
print(f"服务: {data['service']}")
|
||
print(f"版本: {data['version']}")
|
||
print("✅ 根路径正常")
|
||
return True
|
||
else:
|
||
print("❌ 根路径异常")
|
||
return False
|
||
except Exception as e:
|
||
print(f"❌ 连接失败: {str(e)}")
|
||
return False
|
||
|
||
|
||
def main():
|
||
"""主测试函数"""
|
||
print("\n" + "="*50)
|
||
print("文档提取微服务 - 测试套件")
|
||
print("="*50)
|
||
|
||
# 获取PDF文件路径(如果提供)
|
||
pdf_file = sys.argv[1] if len(sys.argv) > 1 else None
|
||
|
||
# 运行测试
|
||
results = []
|
||
|
||
results.append(("根路径", test_root()))
|
||
results.append(("健康检查", test_health()))
|
||
results.append(("PDF提取", test_pdf_extraction(pdf_file)))
|
||
|
||
# 总结
|
||
print("\n" + "="*50)
|
||
print("测试总结")
|
||
print("="*50)
|
||
|
||
for name, result in results:
|
||
if result is True:
|
||
status = "✅ 通过"
|
||
elif result is False:
|
||
status = "❌ 失败"
|
||
else:
|
||
status = "⏭️ 跳过"
|
||
print(f"{name}: {status}")
|
||
|
||
passed = sum(1 for _, r in results if r is True)
|
||
total = len([r for _, r in results if r is not None])
|
||
|
||
print(f"\n通过率: {passed}/{total}")
|
||
|
||
if passed == total:
|
||
print("\n🎉 所有测试通过!")
|
||
else:
|
||
print("\n⚠️ 部分测试失败")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
|
||
|
||
|
||
|
||
|
||
|