""" 服务测试脚本 测试文档提取微服务的各项功能 """ import requests import sys from pathlib import Path BASE_URL = "http://localhost:8000" def test_health(): """测试健康检查""" print("\n" + "="*50) print("测试1: 健康检查") print("="*50) try: response = requests.get(f"{BASE_URL}/api/health") print(f"状态码: {response.status_code}") if response.status_code == 200: data = response.json() print(f"服务状态: {data['status']}") print(f"PyMuPDF: {data['checks']['pymupdf']['available']} (v{data['checks']['pymupdf']['version']})") print(f"临时目录: {data['checks']['temp_dir']['path']}") print("✅ 健康检查通过") return True else: print("❌ 健康检查失败") return False except Exception as e: print(f"❌ 连接失败: {str(e)}") print("提示: 请确保服务已启动(python main.py)") return False def test_pdf_extraction(pdf_file: str = None): """测试PDF提取""" print("\n" + "="*50) print("测试2: PDF文本提取") print("="*50) if not pdf_file: print("跳过: 未提供测试PDF文件") print("使用方法: python test_service.py ") return None pdf_path = Path(pdf_file) if not pdf_path.exists(): print(f"❌ 文件不存在: {pdf_file}") return False try: print(f"上传文件: {pdf_path.name}") print(f"文件大小: {pdf_path.stat().st_size / 1024:.2f} KB") with open(pdf_path, 'rb') as f: files = {'file': (pdf_path.name, f, 'application/pdf')} response = requests.post( f"{BASE_URL}/api/extract/pdf", files=files ) print(f"状态码: {response.status_code}") if response.status_code == 200: data = response.json() print("\n提取结果:") print(f"方法: {data['method']}") print(f"页数: {data['metadata']['page_count']}") print(f"字符数: {data['metadata']['char_count']}") print(f"文本长度: {len(data['text'])} 字符") # 显示前500字符 print("\n文本预览:") print("-" * 50) print(data['text'][:500]) if len(data['text']) > 500: print("...") print("-" * 50) print("\n✅ PDF提取成功") return True else: print(f"❌ 提取失败: {response.text}") return False except Exception as e: print(f"❌ 请求失败: {str(e)}") return False def test_root(): """测试根路径""" print("\n" + "="*50) print("测试0: 根路径") print("="*50) try: response = requests.get(f"{BASE_URL}/") print(f"状态码: {response.status_code}") if response.status_code == 200: data = response.json() print(f"服务: {data['service']}") print(f"版本: {data['version']}") print("✅ 根路径正常") return True else: print("❌ 根路径异常") return False except Exception as e: print(f"❌ 连接失败: {str(e)}") return False def main(): """主测试函数""" print("\n" + "="*50) print("文档提取微服务 - 测试套件") print("="*50) # 获取PDF文件路径(如果提供) pdf_file = sys.argv[1] if len(sys.argv) > 1 else None # 运行测试 results = [] results.append(("根路径", test_root())) results.append(("健康检查", test_health())) results.append(("PDF提取", test_pdf_extraction(pdf_file))) # 总结 print("\n" + "="*50) print("测试总结") print("="*50) for name, result in results: if result is True: status = "✅ 通过" elif result is False: status = "❌ 失败" else: status = "⏭️ 跳过" print(f"{name}: {status}") passed = sum(1 for _, r in results if r is True) total = len([r for _, r in results if r is not None]) print(f"\n通过率: {passed}/{total}") if passed == total: print("\n🎉 所有测试通过!") else: print("\n⚠️ 部分测试失败") if __name__ == "__main__": main()