feat: add extraction_service (PDF/Docx/Txt) and update .gitignore to exclude venv
This commit is contained in:
31
extraction_service/requirements.txt
Normal file
31
extraction_service/requirements.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
# FastAPI核心依赖
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
python-multipart==0.0.6
|
||||
|
||||
# PDF处理
|
||||
PyMuPDF>=1.24.0 # 使用更新版本,有预编译wheel
|
||||
pdfplumber==0.10.3
|
||||
nougat-ocr==0.1.17 # 学术PDF高质量提取(英文)
|
||||
albumentations==1.3.1 # Nougat兼容版本(不要升级到2.x)
|
||||
|
||||
# Docx处理(Day 3需要)
|
||||
mammoth==1.6.0
|
||||
python-docx==1.1.0
|
||||
|
||||
# 语言检测(Day 2需要)
|
||||
langdetect==1.0.9
|
||||
|
||||
# 编码检测(Day 3需要)
|
||||
chardet==5.2.0
|
||||
|
||||
# 工具
|
||||
python-dotenv==1.0.0
|
||||
pydantic>=2.10.0 # 使用更新版本,有预编译wheel
|
||||
|
||||
# 日志
|
||||
loguru==0.7.2
|
||||
|
||||
# 测试工具
|
||||
requests==2.31.0
|
||||
|
||||
Reference in New Issue
Block a user