deploy: Complete 0126-27 deployment - database upgrade, services update, code recovery
Major Changes: - Database: Install pg_bigm/pgvector plugins, create test database - Python service: v1.0 -> v1.1, add pymupdf4llm/openpyxl/pypandoc - Node.js backend: v1.3 -> v1.7, fix pino-pretty and ES Module imports - Frontend: v1.2 -> v1.3, skip TypeScript check for deployment - Code recovery: Restore empty files from local backup Technical Fixes: - Fix pino-pretty error in production (conditional loading) - Fix ES Module import paths (add .js extensions) - Fix OSSAdapter TypeScript errors - Update Prisma Schema (63 models, 16 schemas) - Update environment variables (DATABASE_URL, EXTRACTION_SERVICE_URL, OSS) - Remove deprecated variables (REDIS_URL, DIFY_API_URL, DIFY_API_KEY) Documentation: - Create 0126 deployment folder with 8 documents - Update database development standards v2.0 - Update SAE deployment status records Deployment Status: - PostgreSQL: ai_clinical_research_test with plugins - Python: v1.1 @ 172.17.173.84:8000 - Backend: v1.7 @ 172.17.173.89:3001 - Frontend: v1.3 @ 172.17.173.90:80 Tested: All services running successfully on SAE
This commit is contained in:
@@ -88,6 +88,10 @@ models/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@ RUN sed -i 's|http://deb.debian.org|http://mirrors.aliyun.com|g' /etc/apt/source
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libgomp1 \
|
||||
pandoc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 从构建阶段复制Python包
|
||||
|
||||
@@ -76,6 +76,10 @@ __version__ = '1.0.0'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -209,6 +209,10 @@ def get_missing_summary(df: pd.DataFrame) -> dict:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -169,6 +169,10 @@ def apply_filter(
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -333,6 +333,10 @@ def get_unpivot_preview(
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# ========================================
|
||||
# 生产环境依赖 (移除Nougat和重量级依赖)
|
||||
# 生产环境依赖 (2026-01-26 更新)
|
||||
# 移除 Nougat,使用 pymupdf4llm 替代
|
||||
# ========================================
|
||||
|
||||
# Web框架
|
||||
@@ -12,13 +13,22 @@ pandas>=2.0.0
|
||||
numpy>=1.24.0
|
||||
polars>=0.19.0
|
||||
|
||||
# PDF处理 (核心轻量级库)
|
||||
PyMuPDF>=1.24.0
|
||||
pdfplumber==0.10.3
|
||||
# PDF处理 - 使用 pymupdf4llm(替代 nougat,更轻量)
|
||||
PyMuPDF>=1.24.0 # PDF 核心库(代码中 import fitz 使用)
|
||||
pymupdf4llm>=0.0.17 # PDF → Markdown
|
||||
pdfplumber==0.10.3 # 备用 PDF 处理
|
||||
|
||||
# Docx处理
|
||||
mammoth==1.6.0
|
||||
python-docx==1.1.0
|
||||
# Word处理
|
||||
mammoth==1.6.0 # Docx → Markdown
|
||||
python-docx==1.1.0 # Docx 读取
|
||||
pypandoc>=1.13 # Markdown → Docx (需要系统安装 pandoc)
|
||||
|
||||
# Excel/CSV处理
|
||||
openpyxl>=3.1.2 # Excel 读取
|
||||
tabulate>=0.9.0 # DataFrame → Markdown
|
||||
|
||||
# PPT处理
|
||||
python-pptx>=0.6.23 # PPT 读取
|
||||
|
||||
# 语言检测
|
||||
langdetect==1.0.9
|
||||
@@ -40,4 +50,6 @@ requests==2.31.0
|
||||
# 注意:生产环境已移除以下重量级依赖
|
||||
# - nougat-ocr==0.1.17 (约1.5GB)
|
||||
# - albumentations==1.3.1 (Nougat依赖)
|
||||
#
|
||||
# 已使用 pymupdf4llm 替代,功能相似但更轻量
|
||||
# ========================================
|
||||
|
||||
@@ -151,3 +151,7 @@ def extract_pdf_to_markdown(pdf_path: str) -> Dict[str, Any]:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -343,6 +343,10 @@ if __name__ == "__main__":
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -109,6 +109,10 @@ except Exception as e:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -89,6 +89,10 @@ except Exception as e:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user