feat(deploy): Complete PostgreSQL migration and Docker image build
Summary: - PostgreSQL database migration to RDS completed (90MB SQL, 11 schemas) - Frontend Nginx Docker image built and pushed to ACR (v1.0, ~50MB) - Python microservice Docker image built and pushed to ACR (v1.0, 1.12GB) - Created 3 deployment documentation files Docker Configuration Files: - frontend-v2/Dockerfile: Multi-stage build with nginx:alpine - frontend-v2/.dockerignore: Optimize build context - frontend-v2/nginx.conf: SPA routing and API proxy - frontend-v2/docker-entrypoint.sh: Dynamic env injection - extraction_service/Dockerfile: Multi-stage build with Aliyun Debian mirror - extraction_service/.dockerignore: Optimize build context - extraction_service/requirements-prod.txt: Production dependencies (removed Nougat) Deployment Documentation: - docs/05-部署文档/00-部署进度总览.md: One-stop deployment status overview - docs/05-部署文档/07-前端Nginx-SAE部署操作手册.md: Frontend deployment guide - docs/05-部署文档/08-PostgreSQL数据库部署操作手册.md: Database deployment guide - docs/00-系统总体设计/00-系统当前状态与开发指南.md: Updated with deployment status Database Migration: - RDS instance: pgm-2zex1m2y3r23hdn5 (2C4G, PostgreSQL 15.0) - Database: ai_clinical_research - Schemas: 11 business schemas migrated successfully - Data: 3 users, 2 projects, 1204 literatures verified - Backup: rds_init_20251224_154529.sql (90MB) Docker Images: - Frontend: crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.0 - Python: crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0 Key Achievements: - Resolved Docker Hub network issues (using generic tags) - Fixed 30 TypeScript compilation errors - Removed Nougat OCR to reduce image size by 1.5GB - Used Aliyun Debian mirror to resolve apt-get network issues - Implemented multi-stage builds for optimization Next Steps: - Deploy Python microservice to SAE - Build Node.js backend Docker image - Deploy Node.js backend to SAE - Deploy frontend Nginx to SAE - End-to-end verification testing Status: Docker images ready, SAE deployment pending
This commit is contained in:
@@ -242,6 +242,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -38,5 +38,7 @@ WHERE table_schema = 'dc_schema'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -78,3 +78,5 @@ ORDER BY ordinal_position;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -91,3 +91,5 @@ runMigration()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -25,3 +25,5 @@ COMMENT ON COLUMN "dc_schema"."dc_tool_c_sessions"."column_mapping" IS '列名
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -49,6 +49,8 @@ COMMENT ON COLUMN dc_schema.dc_tool_c_sessions.expires_at IS '过期时间(创
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -199,6 +199,8 @@ function extractCodeBlocks(obj, blocks = []) {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -218,6 +218,8 @@ checkDCTables();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -170,6 +170,8 @@ createAiHistoryTable()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -157,6 +157,8 @@ createToolCTable()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -154,6 +154,8 @@ createToolCTable()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -289,3 +289,5 @@ export function getBatchItems<T>(
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -322,6 +322,8 @@ runTests().catch((error) => {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -263,6 +263,8 @@ runTest()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -301,6 +301,8 @@ Content-Type: application/json
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -380,6 +380,8 @@ export class ExcelExporter {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -237,6 +237,8 @@ export const conflictDetectionService = new ConflictDetectionService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -265,6 +265,8 @@ export const templateService = new TemplateService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -187,6 +187,8 @@ curl -X POST http://localhost:3000/api/v1/dc/tool-c/test/execute \
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -242,5 +242,7 @@ export const streamAIController = new StreamAIController();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -390,3 +390,5 @@ SET session_replication_role = 'origin';
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -92,3 +92,5 @@ WHERE key = 'verify_test';
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -235,3 +235,5 @@ verifyDatabase()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
2
backend/src/types/global.d.ts
vendored
2
backend/src/types/global.d.ts
vendored
@@ -25,3 +25,5 @@ export {}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -45,6 +45,8 @@ Write-Host "✅ 完成!" -ForegroundColor Green
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -333,5 +333,7 @@ runAdvancedTests().catch(error => {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -398,6 +398,8 @@ runAllTests()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -356,6 +356,8 @@ runAllTests()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -143,3 +143,5 @@ Set-Location ..
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
# AIclinicalresearch 系统当前状态与开发指南
|
||||
|
||||
> **文档版本:** v2.0
|
||||
> **文档版本:** v2.1
|
||||
> **创建日期:** 2025-11-28
|
||||
> **维护者:** 开发团队
|
||||
> **最后更新:** 2025-12-22
|
||||
> **重大进展:** 🏆 **DC Tool C Postgres-Only异步架构改造完成** - 性能提升99%,异步任务处理标准建立
|
||||
> **最后更新:** 2024-12-24
|
||||
> **重大进展:** 🚀 **阿里云生产环境部署启动** - PostgreSQL数据迁移完成、前端Nginx与Python微服务镜像已推送ACR
|
||||
> **文档目的:** 快速了解系统当前状态,为新AI助手提供上下文
|
||||
|
||||
---
|
||||
@@ -98,11 +98,13 @@
|
||||
|
||||
**云原生部署**:
|
||||
- 阿里云 SAE (Serverless 应用引擎)
|
||||
- RDS PostgreSQL 15 + OSS (对象存储) + Redis (可选,Dify需要)
|
||||
- RDS PostgreSQL 15 + OSS (对象存储) + NAT网关
|
||||
- ACR (容器镜像服务 - 个人版免费)
|
||||
- **部署状态**:🚀 **进行中**(PostgreSQL✅、前端镜像✅、Python镜像✅)
|
||||
|
||||
---
|
||||
|
||||
## 🚀 当前开发状态(2025-12-10)
|
||||
## 🚀 当前开发状态(2024-12-24)
|
||||
|
||||
### ✅ 已完成模块
|
||||
|
||||
@@ -254,6 +256,137 @@
|
||||
|
||||
---
|
||||
|
||||
## 🚀 阿里云生产环境部署状态(2024-12-24)
|
||||
|
||||
### ✅ 已完成部署
|
||||
|
||||
#### 1. 基础设施层
|
||||
- ✅ **VPC网络**:`vpc-2ze055cptkew9c38w4r06`(172.17.0.0/16)
|
||||
- ✅ **NAT网关**:`ngw-2zeec9ulzgw7ywvx1pst6`(公网IP: 182.92.176.14)
|
||||
- ✅ **安全组**:`sg-2zedk6fi8sgmmcwdu7tu`
|
||||
- ✅ **交换机**:2个(可用区F + 可用区A)
|
||||
- ✅ **SAE命名空间**:`cn-beijing:test-airesearch`
|
||||
|
||||
#### 2. 数据存储层
|
||||
- ✅ **RDS PostgreSQL 15**
|
||||
- 实例ID: `pgm-2zex1m2y3r23hdn5`
|
||||
- 规格: 2核4GB
|
||||
- 内网地址: `pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com:5432`
|
||||
- 数据库: `ai_clinical_research`
|
||||
- **数据迁移**: ✅ 完成(90MB SQL文件,约12秒导入)
|
||||
- **Schema验证**: ✅ 11个Schema全部迁移成功
|
||||
- **数据验证**: ✅ 用户3条、项目2条、文献1204条
|
||||
- **部署时间**: 2024-12-24
|
||||
|
||||
- ✅ **OSS对象存储**
|
||||
- Bucket: `ai-clinical-research`
|
||||
- 存储类型: 标准存储(同城冗余)
|
||||
- 内网域名: `ai-clinical-research.oss-cn-beijing-internal.aliyuncs.com`
|
||||
- RAM用户: `oss-bucket-put-object@1991407246109125.onaliyun.com`
|
||||
- AccessKey: 已配置(不公开)
|
||||
|
||||
#### 3. 容器镜像服务(ACR)
|
||||
- ✅ **命名空间**: `ai-clinical`
|
||||
- ✅ **Registry**: `crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com`
|
||||
- ✅ **已推送镜像**:
|
||||
- **前端Nginx**: `ai-clinical_frontend-nginx:v1.0`(约50MB)
|
||||
- 构建时间: 2024-12-24
|
||||
- 基础镜像: `nginx:alpine`
|
||||
- 功能: React SPA + Nginx反向代理 + 动态环境变量
|
||||
- 配置文件: `frontend-v2/Dockerfile`, `nginx.conf`, `.dockerignore`
|
||||
|
||||
- **Python微服务**: `python-extraction:v1.0`(1.12GB)
|
||||
- 构建时间: 2024-12-24
|
||||
- 基础镜像: `python:3-slim`
|
||||
- 功能: PDF提取(PyMuPDF)+ 数据清洗(pandas/numpy/polars)
|
||||
- 特性: 移除Nougat(减小1.5GB)、使用阿里云Debian源
|
||||
- 配置文件: `extraction_service/Dockerfile`, `requirements-prod.txt`, `.dockerignore`
|
||||
|
||||
### 🚧 进行中
|
||||
|
||||
#### 4. SAE应用部署
|
||||
- ⏳ **Python微服务**: 镜像已推送,待部署到SAE
|
||||
- 目标规格: 1核2GB
|
||||
- 端口: 8000
|
||||
- 健康检查: `/api/health`
|
||||
|
||||
- ⏳ **Node.js后端**: Docker镜像待构建
|
||||
- 目标规格: 2核4GB
|
||||
- 端口: 3001
|
||||
- 依赖: RDS PostgreSQL
|
||||
|
||||
- ⏳ **前端Nginx**: 镜像已推送,待部署到SAE
|
||||
- 目标规格: 1核2GB
|
||||
- 端口: 80
|
||||
- 需配置: 后端API内网地址
|
||||
|
||||
### 📋 待完成
|
||||
|
||||
- [ ] Python微服务部署到SAE
|
||||
- [ ] Node.js后端Docker镜像构建
|
||||
- [ ] Node.js后端部署到SAE
|
||||
- [ ] 前端Nginx部署到SAE
|
||||
- [ ] 配置服务间内网通信
|
||||
- [ ] 全链路验证测试
|
||||
- [ ] Dify AI服务部署(可选)
|
||||
|
||||
### 📊 部署文档
|
||||
|
||||
**部署进度总览**:
|
||||
- [00-部署进度总览.md](../05-部署文档/00-部署进度总览.md) - 🎯 **一站式部署状态查看**
|
||||
|
||||
**操作手册**:
|
||||
- [07-前端Nginx-SAE部署操作手册.md](../05-部署文档/07-前端Nginx-SAE部署操作手册.md)
|
||||
- [08-PostgreSQL数据库部署操作手册.md](../05-部署文档/08-PostgreSQL数据库部署操作手册.md)
|
||||
- Python微服务SAE部署操作手册(待创建)
|
||||
|
||||
**技术指南**:
|
||||
- [01-快速部署SOP-零基础版.md](../05-部署文档/01-快速部署SOP-零基础版.md) - 完整部署流程
|
||||
- [04-Python微服务-SAE容器部署指南.md](../05-部署文档/04-Python微服务-SAE容器部署指南.md)
|
||||
- [06-前端Nginx-SAE容器部署指南.md](../05-部署文档/06-前端Nginx-SAE容器部署指南.md)
|
||||
|
||||
### 🎯 部署关键成就
|
||||
|
||||
1. **PostgreSQL数据迁移** ✅
|
||||
- 采用`pg_dump`全量导出/导入方案
|
||||
- 11个Schema完整迁移
|
||||
- 数据一致性验证通过
|
||||
- 安全加固(外网访问已关闭)
|
||||
|
||||
2. **前端Nginx镜像优化** ✅
|
||||
- 解决Docker Hub网络问题(使用通用标签)
|
||||
- 修复30个TypeScript编译错误
|
||||
- 多阶段构建优化
|
||||
- 健康检查通过
|
||||
|
||||
3. **Python微服务镜像优化** ✅
|
||||
- 移除Nougat OCR(减小1.5GB)
|
||||
- 使用阿里云Debian镜像源(解决apt-get网络问题)
|
||||
- 保留数据清洗功能(pandas/numpy/polars)
|
||||
- 运行时依赖优化(libgl1、libglib2.0)
|
||||
|
||||
4. **镜像配置文件Git管理** ✅
|
||||
- Dockerfile: ✅ 已提交Git(构建蓝图)
|
||||
- .dockerignore: ✅ 已提交Git(优化构建)
|
||||
- 依赖文件: ✅ 已提交Git(可复现)
|
||||
- 敏感信息: ❌ 禁止提交(.env等)
|
||||
|
||||
### 💰 当前运行成本估算
|
||||
|
||||
| 服务 | 规格 | 月成本 | 状态 |
|
||||
|------|------|-------|------|
|
||||
| RDS PostgreSQL | 2核4GB | ¥260 | ✅ 运行中 |
|
||||
| OSS存储 | 10GB | ¥2 | ✅ 运行中 |
|
||||
| NAT网关 | 小型 | ¥60 | ✅ 运行中 |
|
||||
| EIP流量 | 5Mbps | ¥40 | ✅ 运行中 |
|
||||
| ACR镜像仓库 | 个人版 | ¥0(免费) | ✅ 运行中 |
|
||||
| SAE - Python | 1核2GB×1 | ¥60 | ⏳ 待部署 |
|
||||
| SAE - Node.js | 2核4GB×1 | ¥120 | ⏳ 待部署 |
|
||||
| SAE - Frontend | 1核2GB×1 | ¥60 | ⏳ 待部署 |
|
||||
| **总计** | - | **¥602/月** | 部署中 |
|
||||
|
||||
---
|
||||
|
||||
## 📁 项目结构概览
|
||||
|
||||
```
|
||||
@@ -336,33 +469,68 @@ AIclinicalresearch/
|
||||
| **2025-11-18~21** | Week 3~4 | ✅ ASL标题摘要初筛MVP |
|
||||
| **2025-11-22~23** | ASL Day 2-5 | ✅ ASL全文复筛后端完成 |
|
||||
| **2025-11-26~27** | DC Day 2-3 | ✅ DC工具B健康检查+模板管理 |
|
||||
| **2025-11-28** | 当前 | 🚧 ASL全文复筛前端 + DC工具B开发 |
|
||||
| **2025-11-28** | DC Day 4-8 | ✅ DC Tool C MVP + UX重大改进完成 |
|
||||
| **2025-12-13** | 架构优化 | ✅ Postgres-Only架构改造完成 |
|
||||
| **2024-12-24** | **部署启动** 🚀 | ✅ PostgreSQL数据迁移 + 前端/Python镜像推送ACR |
|
||||
| **当前** | 部署进行中 | 🚧 SAE应用部署 + Node.js后端构建 |
|
||||
|
||||
---
|
||||
|
||||
## 🎯 下一步计划
|
||||
|
||||
### 短期(1-2周)
|
||||
1. **ASL全文复筛前端**(Day 6-8)
|
||||
### 🔥 最高优先级(1-2天)- 部署到生产环境
|
||||
1. **Python微服务部署到SAE**
|
||||
- 创建SAE应用
|
||||
- 配置环境变量
|
||||
- 健康检查验证
|
||||
- 获取内网地址
|
||||
|
||||
2. **Node.js后端Docker镜像构建**
|
||||
- 创建Dockerfile
|
||||
- 配置数据库连接(RDS内网地址)
|
||||
- 构建并推送到ACR
|
||||
|
||||
3. **Node.js后端部署到SAE**
|
||||
- 创建SAE应用(2核4GB)
|
||||
- 配置环境变量(DATABASE_URL、OSS等)
|
||||
- 配置Python微服务内网地址
|
||||
- 健康检查验证
|
||||
|
||||
4. **前端Nginx部署到SAE**
|
||||
- 创建SAE应用(1核2GB)
|
||||
- 配置后端API内网地址
|
||||
- 公网域名绑定
|
||||
- SSL证书配置
|
||||
|
||||
5. **全链路验证测试**
|
||||
- 前端→Node.js后端→Python微服务→RDS
|
||||
- ASL文献筛选完整流程
|
||||
- DC数据清洗完整流程
|
||||
- 性能和稳定性测试
|
||||
|
||||
### 短期(1-2周)- 功能完善
|
||||
6. **ASL全文复筛前端**(Day 6-8)
|
||||
- 4个核心页面:设置、进度、工作台、结果
|
||||
- PDF上传和预览功能
|
||||
- 双模型判断对比UI
|
||||
- 实时进度监控
|
||||
|
||||
2. **DC工具B完成**(Day 4-7)
|
||||
- ExtractionService实现
|
||||
- 批量提取API
|
||||
- 前端集成和测试
|
||||
7. **DC工具B前端开发**
|
||||
- 健康检查界面
|
||||
- 模板管理界面
|
||||
- 批量提取界面
|
||||
- 冲突解决界面
|
||||
|
||||
### 中期(1-2月)
|
||||
3. DC模块完整实现(工具A、工具C、Portal)
|
||||
4. ASL模块优化(Prompt优化、并发处理)
|
||||
5. LLM网关统一抽取
|
||||
### 中期(1-2月)- 模块完善
|
||||
8. DC模块完整实现(工具A、Portal优化)
|
||||
9. ASL模块优化(Prompt优化、并发处理)
|
||||
10. 监控和告警系统配置
|
||||
11. CI/CD流程建立
|
||||
|
||||
### 长期(3月+)
|
||||
6. SSA模块(智能统计分析)
|
||||
7. ST模块(统计分析工具)
|
||||
8. RVW模块(稿件审查系统)
|
||||
### 长期(3月+)- 新模块开发
|
||||
12. SSA模块(智能统计分析)
|
||||
13. ST模块(统计分析工具)
|
||||
14. RVW模块(稿件审查系统)
|
||||
|
||||
---
|
||||
|
||||
@@ -380,6 +548,12 @@ AIclinicalresearch/
|
||||
- [DC模块README](../03-业务模块/DC-数据清洗整理/README.md)
|
||||
- [DC Day3完成总结](../03-业务模块/DC-数据清洗整理/06-开发记录/Day3完成总结.md)
|
||||
|
||||
### 🚀 部署文档(新增)
|
||||
- ⭐ [00-部署进度总览.md](../05-部署文档/00-部署进度总览.md) - **一站式部署状态查看**
|
||||
- [01-快速部署SOP-零基础版.md](../05-部署文档/01-快速部署SOP-零基础版.md) - 完整部署流程
|
||||
- [07-前端Nginx-SAE部署操作手册.md](../05-部署文档/07-前端Nginx-SAE部署操作手册.md)
|
||||
- [08-PostgreSQL数据库部署操作手册.md](../05-部署文档/08-PostgreSQL数据库部署操作手册.md)
|
||||
|
||||
### 🏗️ 架构设计
|
||||
- [平台基础设施规划](../09-架构实施/04-平台基础设施规划.md)
|
||||
- [云原生部署架构指南](../09-架构实施/03-云原生部署架构指南.md)
|
||||
@@ -447,6 +621,13 @@ npm run dev # http://localhost:3000
|
||||
- 🚧 **开发中**:ASL(80%)、DC(Tool C 98%,Tool B后端100%,Tool B前端0%)
|
||||
- 📋 **未开始**:SSA、ST、RVW
|
||||
|
||||
### 部署完成度
|
||||
- ✅ **基础设施**:VPC(100%)、NAT网关(100%)、安全组(100%)
|
||||
- ✅ **数据存储**:RDS PostgreSQL(100%)、OSS(100%)
|
||||
- ✅ **容器镜像**:前端Nginx(100%)、Python微服务(100%)
|
||||
- 🚧 **SAE应用**:Python微服务(待部署)、Node.js后端(镜像待构建)、前端Nginx(待部署)
|
||||
- 📋 **验证测试**:全链路测试(待进行)
|
||||
|
||||
### 测试覆盖率
|
||||
- **平台基础层**:100%(8/8模块全部通过)
|
||||
- **AIA模块**:手动测试通过
|
||||
@@ -574,9 +755,9 @@ if (items.length >= 50) {
|
||||
|
||||
---
|
||||
|
||||
**文档版本**:v1.8
|
||||
**最后更新**:2025-12-13
|
||||
**下次更新**:Phase 8 全面测试完成 或 Phase 9 SAE 部署完成
|
||||
**文档版本**:v2.1
|
||||
**最后更新**:2024-12-24
|
||||
**下次更新**:SAE应用部署完成 或 全链路验证测试完成
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -585,3 +585,5 @@ async saveProcessedData(recordId, newData) {
|
||||
**最后更新**: 2025-12-22
|
||||
**文档状态**: ✅ 已完成
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -772,3 +772,5 @@ export const AsyncProgressBar: React.FC<AsyncProgressBarProps> = ({
|
||||
**最后更新**: 2025-12-22
|
||||
**文档状态**: ✅ 初始版本
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1262,6 +1262,8 @@ interface FulltextScreeningResult {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -376,6 +376,8 @@ GET /api/v1/asl/fulltext-screening/tasks/:taskId/export
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -319,6 +319,8 @@ Linter错误:0个
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -478,6 +478,8 @@ Failed to open file '\\tmp\\extraction_service\\temp_10000_test.pdf'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -544,6 +544,8 @@ df['creatinine'] = pd.to_numeric(df['creatinine'], errors='coerce')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -385,3 +385,5 @@ npm run dev
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -959,6 +959,8 @@ export const aiController = new AIController();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1293,6 +1293,8 @@ npm install react-markdown
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -204,3 +204,5 @@ FMA___基线 | FMA___1个月 | FMA___2个月
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -362,3 +362,5 @@ formula = "FMA总分(0-100) / 100"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -196,3 +196,5 @@ async handleFillnaMice(request, reply) {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -168,3 +168,5 @@ method: 'mean' | 'median' | 'mode' | 'constant' | 'ffill' | 'bfill'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -315,6 +315,8 @@ Changes:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -387,6 +387,8 @@ cd path; command
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -616,6 +616,8 @@ import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -621,5 +621,7 @@ Content-Length: 45234
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -272,6 +272,8 @@ Response:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -425,6 +425,8 @@ Response:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -419,6 +419,8 @@ import { ChatContainer } from '@/shared/components/Chat';
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -329,6 +329,8 @@ const initialMessages = defaultMessages.length > 0 ? defaultMessages : [{
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -369,6 +369,8 @@ python main.py
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -617,6 +617,8 @@ http://localhost:5173/data-cleaning/tool-c
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -227,6 +227,8 @@ Day 5 (6-8小时):
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -405,6 +405,8 @@ Docs: docs/03-业务模块/DC-数据清洗整理/06-开发记录/DC模块重建
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -380,6 +380,8 @@ const mockAssets: Asset[] = [
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -364,6 +364,8 @@ frontend-v2/src/modules/dc/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -324,6 +324,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -278,6 +278,8 @@ ConflictDetectionService // 冲突检测(字段级对比)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -327,6 +327,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -290,6 +290,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -354,6 +354,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -442,6 +442,8 @@ Tool B后端代码**100%复用**了平台通用能力层,无任何重复开发
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -288,6 +288,8 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -219,6 +219,8 @@ $ node scripts/check-dc-tables.mjs
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -452,6 +452,8 @@ ${fields.map((f, i) => `${i + 1}. ${f.name}:${f.desc}`).join('\n')}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
699
docs/05-部署文档/00-部署进度总览.md
Normal file
699
docs/05-部署文档/00-部署进度总览.md
Normal file
@@ -0,0 +1,699 @@
|
||||
# 🚀 AI临床研究平台 - 部署进度总览
|
||||
|
||||
> **文档用途**:统一展示项目部署状态、资源信息、操作手册索引
|
||||
> **更新时间**:2024-12-24
|
||||
> **维护人员**:开发团队
|
||||
|
||||
---
|
||||
|
||||
## 📊 一、部署进度一览表
|
||||
|
||||
| 服务名称 | 部署状态 | 镜像版本 | 部署位置 | 完成时间 | 操作文档 |
|
||||
|---------|---------|---------|---------|---------|---------|
|
||||
| **PostgreSQL数据库** | ✅ 已完成 | PostgreSQL 15 | RDS | 2024-12-24 | [08-PostgreSQL数据库部署操作手册.md](./08-PostgreSQL数据库部署操作手册.md) |
|
||||
| **前端Nginx服务** | ✅ 已完成 | v1.0 | SAE(待部署) | 2024-12-24 | [07-前端Nginx-SAE部署操作手册.md](./07-前端Nginx-SAE部署操作手册.md) |
|
||||
| **Python微服务** | ✅ 镜像已推送 | v1.0 | SAE(待部署) | 2024-12-24 | 待创建 |
|
||||
| **Node.js后端** | ⏳ 待开始 | - | - | - | - |
|
||||
| **Dify AI服务** | ⏳ 待开始 | - | - | - | - |
|
||||
|
||||
**图例说明**:
|
||||
- ✅ 已完成:服务已成功部署并验证
|
||||
- 🔄 进行中:正在部署或配置
|
||||
- ⏳ 待开始:尚未开始
|
||||
|
||||
---
|
||||
|
||||
## 🔑 二、阿里云资源速查表
|
||||
|
||||
### 2.1 ACR容器镜像仓库
|
||||
|
||||
**基本信息**:
|
||||
- **命名空间**:`ai-clinical`
|
||||
- **Registry地址(公网)**:`crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com`
|
||||
- **Registry地址(VPC)**:`crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com`
|
||||
- **用户名**:`gofeng117@163.com`
|
||||
- **密码**:`fengzhibo117`
|
||||
|
||||
**镜像仓库列表**:
|
||||
|
||||
| 仓库名称 | 最新版本 | 镜像大小 | 公网地址 | VPC地址 |
|
||||
|---------|---------|---------|---------|---------|
|
||||
| **python-extraction** | v1.0 | 1.12GB | `crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0` | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0` |
|
||||
| **ai-clinical_frontend-nginx** | v1.0 | ~50MB | `crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.0` | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.0` |
|
||||
| **nodejs-backend** | - | - | `crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/nodejs-backend` | `crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/nodejs-backend` |
|
||||
|
||||
---
|
||||
|
||||
### 2.2 VPC网络与NAT网关
|
||||
|
||||
**VPC信息**:
|
||||
- **VPC ID**:`vpc-2ze055cptkew9c38w4r06`
|
||||
- **VPC名称**:`ai-clinical-vpc`
|
||||
- **网段**:`172.17.0.0/16`
|
||||
- **地域**:华北2(北京)
|
||||
- **交换机1**:`vsw-2zevacop039bxrmj6yc0c`(可用区F)
|
||||
- **交换机2**:`vsw-2zehoeyw9ldncymcyvfwq`(可用区A)
|
||||
- **安全组ID**:`sg-2zedk6fi8sgmmcwdu7tu`
|
||||
|
||||
**NAT网关信息**:
|
||||
- **NAT网关名称**:`NAT_airesearch`
|
||||
- **NAT网关ID**:`ngw-2zeec9ulzgw7ywvx1pst6`
|
||||
- **公网IP(EIP)**:`182.92.176.14`
|
||||
- **SNAT表ID**:`stb-2zesszmzx1qpwf1cb2bry`
|
||||
- **SNAT条目**:覆盖所有交换机,允许VPC内资源访问公网
|
||||
|
||||
---
|
||||
|
||||
### 2.3 RDS PostgreSQL数据库
|
||||
|
||||
**实例信息**:
|
||||
- **实例ID**:`pgm-2zex1m2y3r23hdn5`
|
||||
- **规格**:2核4GB(pg.n2.2c.1m)
|
||||
- **存储空间**:100GB(SSD云盘)
|
||||
- **PostgreSQL版本**:15.0
|
||||
- **内网地址**:`pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com:5432`
|
||||
- **外网地址**:`pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com`(⚠️ 已关闭)
|
||||
- **最大连接数**:400
|
||||
- **时区**:Asia/Shanghai
|
||||
- **白名单**:172.17.0.0/16(VPC网段)
|
||||
|
||||
**数据库配置**:
|
||||
- **数据库名**:`ai_clinical_research`
|
||||
- **用户名**:`airesearch`
|
||||
- **密码**:`Xibahe@fengzhibo117`
|
||||
- **字符集**:UTF8
|
||||
- **排序规则**:en_US.utf8
|
||||
|
||||
**Schema架构**(11个业务Schema):
|
||||
| Schema名称 | 功能模块 | 表数量 | 说明 |
|
||||
|-----------|---------|-------|------|
|
||||
| `platform_schema` | 平台核心 | 8 | 用户、权限、任务队列(pg-boss) |
|
||||
| `asl_schema` | 系统文献筛查 | 6 | 项目、文献、筛查记录 |
|
||||
| `aia_schema` | AI智能摘要 | 5 | AI项目、摘要任务 |
|
||||
| `dc_schema` | 数据清洗 | 6 | 工具A/B/C数据处理 |
|
||||
| `pkb_schema` | 个人知识库 | 5 | 文献管理、笔记 |
|
||||
| `admin_schema` | 系统管理 | 0 | 预留 |
|
||||
| `rvw_schema` | 文献回顾 | 0 | 预留 |
|
||||
| `ssa_schema` | 智能统计分析 | 0 | 预留 |
|
||||
| `st_schema` | 统计工具 | 0 | 预留 |
|
||||
| `common_schema` | 公共模块 | 0 | 预留 |
|
||||
| `information_schema` | 系统元数据 | - | PostgreSQL标准 |
|
||||
|
||||
**关键数据量**(截至2024-12-24):
|
||||
- 用户:3条
|
||||
- AI项目:2条
|
||||
- 文献:1204条
|
||||
- 筛查记录:783条
|
||||
- pg-boss任务:350条
|
||||
|
||||
**连接字符串格式**:
|
||||
```
|
||||
postgresql://airesearch:Xibahe%40fengzhibo117@pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com:5432/ai_clinical_research
|
||||
```
|
||||
⚠️ 注意:密码中的 `@` 需要URL编码为 `%40`
|
||||
|
||||
---
|
||||
|
||||
### 2.4 SAE Serverless应用
|
||||
|
||||
**命名空间**:
|
||||
- **命名空间ID**:`cn-beijing:test-airesearch`
|
||||
- **地域**:华北2(北京)
|
||||
|
||||
**已部署/待部署应用列表**:
|
||||
|
||||
| 应用名称 | 状态 | 规格 | 实例数 | 端口 | 内网地址 | 公网地址 |
|
||||
|---------|------|------|-------|------|---------|---------|
|
||||
| **python-extraction** | 镜像已推送 | 1核2GB | 1 | 8000 | 待部署后填写 | 待部署后填写 |
|
||||
| **nodejs-backend** | 待构建 | 2核4GB | 1 | 3001 | 待部署后填写 | 待部署后填写 |
|
||||
| **frontend-nginx** | 镜像已推送 | 1核2GB | 1 | 80 | 待部署后填写 | 待部署后填写 |
|
||||
|
||||
---
|
||||
|
||||
### 2.5 OSS对象存储
|
||||
|
||||
**Bucket信息**:
|
||||
- **Bucket名称**:`ai-clinical-research`
|
||||
- **地域**:华北2(北京)
|
||||
- **存储类型**:标准存储
|
||||
- **访问控制**:私有
|
||||
- **存储冗余类型**:同城冗余存储
|
||||
- **内网Endpoint**:`oss-cn-beijing-internal.aliyuncs.com`
|
||||
- **公网Endpoint**:`oss-cn-beijing.aliyuncs.com`
|
||||
- **Bucket域名(内网)**:`ai-clinical-research.oss-cn-beijing-internal.aliyuncs.com`
|
||||
- **创建时间**:2025-12-16 20:22
|
||||
|
||||
**RAM用户访问凭证**(用于OSS写入):
|
||||
- **RAM用户名**:`oss-bucket-put-object@1991407246109125.onaliyun.com`
|
||||
- **AccessKeyId**:`LTAI5tB2Dt3NdvBL3G7nYGv7`
|
||||
- **AccessKeySecret**:`1iSN9k39RkApP93QjUhC1DcPIeMG4V`
|
||||
- **UID**:`203256565888301026`
|
||||
- **创建时间**:2025-12-16 20:31:41
|
||||
|
||||
⚠️ **安全警告**:AccessKey是敏感信息,请勿提交到公开Git仓库!
|
||||
|
||||
**存储用途**:
|
||||
- `/uploads/pdfs/` - PDF文件上传
|
||||
- `/uploads/docx/` - Word文档上传
|
||||
- `/uploads/txt/` - 文本文件上传
|
||||
- `/exports/` - 导出文件临时存储
|
||||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
### 2.6 镜像配置文件存储位置
|
||||
|
||||
**前端Nginx服务**:
|
||||
```
|
||||
AIclinicalresearch/frontend-v2/
|
||||
├── Dockerfile ✅ 已提交Git(多阶段构建)
|
||||
├── .dockerignore ✅ 已提交Git(优化构建上下文)
|
||||
├── nginx.conf ✅ 已提交Git(Nginx配置)
|
||||
└── docker-entrypoint.sh ✅ 已提交Git(启动脚本)
|
||||
|
||||
镜像存储位置:
|
||||
公网:crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.0
|
||||
VPC:crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.0
|
||||
```
|
||||
|
||||
**Python微服务**:
|
||||
```
|
||||
AIclinicalresearch/extraction_service/
|
||||
├── Dockerfile ✅ 已提交Git(多阶段构建 + 阿里云源)
|
||||
├── .dockerignore ✅ 已提交Git(优化构建上下文)
|
||||
└── requirements-prod.txt ✅ 已提交Git(精简依赖列表)
|
||||
|
||||
镜像存储位置:
|
||||
公网:crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0
|
||||
VPC:crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0
|
||||
```
|
||||
|
||||
**Node.js后端**(待构建):
|
||||
```
|
||||
AIclinicalresearch/backend/
|
||||
├── Dockerfile ⏳ 待创建
|
||||
├── .dockerignore ⏳ 待创建
|
||||
└── (配置文件) ⏳ 待确认
|
||||
|
||||
镜像存储位置(预留):
|
||||
公网:crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/nodejs-backend:v1.0
|
||||
VPC:crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/nodejs-backend:v1.0
|
||||
```
|
||||
|
||||
**Git提交规范**:
|
||||
| 文件类型 | 是否提交Git | 说明 |
|
||||
|---------|------------|------|
|
||||
| ✅ Dockerfile | 必须提交 | 镜像构建蓝图,团队协作必需 |
|
||||
| ✅ .dockerignore | 必须提交 | 优化构建性能,避免传输无用文件 |
|
||||
| ✅ requirements*.txt / package.json | 必须提交 | 依赖清单,可复现构建 |
|
||||
| ✅ nginx.conf / 配置文件 | 必须提交 | 服务配置 |
|
||||
| ❌ .env | 禁止提交 | 敏感信息(密码、API密钥) |
|
||||
| ❌ Docker镜像文件(.tar) | 禁止提交 | 镜像文件很大,存储在ACR云端 |
|
||||
|
||||
---
|
||||
|
||||
## 📦 三、已部署服务详情
|
||||
|
||||
### 3.1 PostgreSQL数据库
|
||||
|
||||
**部署概要**:
|
||||
- **部署时间**:2024-12-24
|
||||
- **部署方式**:`pg_dump` 全量导出 → RDS导入
|
||||
- **数据迁移时长**:约12秒(90MB SQL文件)
|
||||
- **验证状态**:✅ Schema完整、数据一致、关系正确
|
||||
|
||||
**关键配置**:
|
||||
```bash
|
||||
# 本地PostgreSQL Docker容器
|
||||
docker run --name ai-clinical-postgres \
|
||||
-e POSTGRES_PASSWORD=postgres \
|
||||
-e POSTGRES_DB=ai_clinical_research \
|
||||
-p 5432:5432 \
|
||||
-d postgres:15-alpine
|
||||
```
|
||||
|
||||
**备份信息**:
|
||||
- **本地备份文件**:`AIclinicalresearch/rds_init_20251224_154529.sql`(90MB)
|
||||
- **备份内容**:完整Schema + 全量数据
|
||||
- **RDS自动备份**:每日凌晨2点(保留7天)
|
||||
|
||||
**操作文档**:
|
||||
- [08-PostgreSQL数据库部署操作手册.md](./08-PostgreSQL数据库部署操作手册.md) - 完整部署流程
|
||||
- [PostgreSQL部署策略-摸底报告.md](./PostgreSQL部署策略-摸底报告.md) - 数据库分析报告
|
||||
|
||||
**重要提示**:
|
||||
- ⚠️ **安全**:外网访问已关闭,生产环境禁止长期开启
|
||||
- ⚠️ **连接**:需通过VPC内网或SSH隧道连接
|
||||
- ⚠️ **密码**:环境变量中的 `@` 需转义为 `%40`
|
||||
|
||||
---
|
||||
|
||||
### 3.2 前端Nginx服务
|
||||
|
||||
**镜像信息**:
|
||||
- **仓库名称**:`ai-clinical_frontend-nginx`
|
||||
- **镜像版本**:`v1.0`
|
||||
- **镜像大小**:约50MB
|
||||
- **基础镜像**:`nginx:alpine`
|
||||
- **构建时间**:2024-12-24
|
||||
|
||||
**镜像功能**:
|
||||
- ✅ 托管React单页应用(SPA)
|
||||
- ✅ Nginx反向代理API请求
|
||||
- ✅ 支持动态环境变量注入
|
||||
- ✅ Gzip压缩优化
|
||||
- ✅ 健康检查端点
|
||||
|
||||
**构建产物**:
|
||||
- **Dockerfile**:`AIclinicalresearch/frontend-v2/Dockerfile`
|
||||
- **Nginx配置**:`AIclinicalresearch/frontend-v2/nginx.conf`
|
||||
- **.dockerignore**:`AIclinicalresearch/frontend-v2/.dockerignore`
|
||||
|
||||
**SAE配置建议**:
|
||||
- **CPU/内存**:1核 / 2GB
|
||||
- **实例数**:1
|
||||
- **端口**:80
|
||||
- **健康检查**:`GET /` 返回200
|
||||
- **环境变量**:
|
||||
```bash
|
||||
VITE_API_BASE_URL=http://nodejs-backend-service.cn-beijing:test-airesearch.svc.cluster.local:3001
|
||||
```
|
||||
⚠️ 注意:内网地址需在SAE部署后获取
|
||||
|
||||
**操作文档**:
|
||||
- [07-前端Nginx-SAE部署操作手册.md](./07-前端Nginx-SAE部署操作手册.md) - 完整SAE部署步骤
|
||||
- [06-前端Nginx-SAE容器部署指南.md](./06-前端Nginx-SAE容器部署指南.md) - 技术架构详解
|
||||
|
||||
**部署状态**:
|
||||
- ✅ Docker镜像构建成功
|
||||
- ✅ 镜像已推送至ACR
|
||||
- ⏳ 待部署到SAE
|
||||
|
||||
---
|
||||
|
||||
### 3.3 Python微服务
|
||||
|
||||
**镜像信息**:
|
||||
- **仓库名称**:`python-extraction`
|
||||
- **镜像版本**:`v1.0`
|
||||
- **镜像大小**:1.12GB
|
||||
- **基础镜像**:`python:3-slim`
|
||||
- **构建时间**:2024-12-24
|
||||
|
||||
**功能模块**:
|
||||
- ✅ **PDF文本提取**:PyMuPDF 1.26.7
|
||||
- ✅ **Docx文档提取**:Mammoth + python-docx
|
||||
- ✅ **数据清洗(DC工具)**:pandas 2.0+ / numpy 1.24+ / polars 0.19+
|
||||
- ✅ **语言检测**:langdetect
|
||||
- ❌ **Nougat OCR**:已移除(减小镜像1.5GB)
|
||||
|
||||
**依赖优化**:
|
||||
```txt
|
||||
# 精简版依赖(requirements-prod.txt)
|
||||
- 移除:nougat-ocr (1.5GB)
|
||||
- 移除:albumentations (Nougat依赖)
|
||||
- 保留:pandas、numpy、polars(DC工具必需)
|
||||
- 保留:PyMuPDF、pdfplumber(PDF提取)
|
||||
```
|
||||
|
||||
**健康检查**:
|
||||
- **端点**:`GET /api/health`
|
||||
- **响应示例**:
|
||||
```json
|
||||
{
|
||||
"status": "healthy",
|
||||
"checks": {
|
||||
"pymupdf": {"available": true, "version": "1.26.7"},
|
||||
"nougat": {"available": false, "error": "Nougat未安装"},
|
||||
"temp_dir": {"path": "/tmp/extraction_service", "writable": true}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**SAE配置建议**:
|
||||
- **CPU/内存**:1核 / 2GB
|
||||
- **实例数**:1
|
||||
- **端口**:8000
|
||||
- **启动命令**:默认(Dockerfile中的CMD)
|
||||
- **健康检查**:`GET /api/health` 返回200
|
||||
- **环境变量**:
|
||||
```bash
|
||||
LOG_LEVEL=INFO
|
||||
TEMP_DIR=/tmp/extraction_service
|
||||
```
|
||||
⚠️ 生产环境建议根据实际负载调整实例数
|
||||
|
||||
**构建产物**:
|
||||
- **Dockerfile**:`AIclinicalresearch/extraction_service/Dockerfile`
|
||||
- **依赖文件**:`AIclinicalresearch/extraction_service/requirements-prod.txt`
|
||||
- **.dockerignore**:`AIclinicalresearch/extraction_service/.dockerignore`
|
||||
|
||||
**关键技术方案**:
|
||||
- ✅ 使用阿里云Debian镜像源(解决apt-get网络问题)
|
||||
- ✅ 多阶段构建(优化镜像大小)
|
||||
- ✅ 运行时依赖安装(libgl1、libglib2.0)
|
||||
|
||||
**操作文档**:
|
||||
- [04-Python微服务-SAE容器部署指南.md](./04-Python微服务-SAE容器部署指南.md) - 技术架构详解
|
||||
- 待创建:Python微服务SAE部署操作手册
|
||||
|
||||
**部署状态**:
|
||||
- ✅ Docker镜像构建成功(本地测试通过)
|
||||
- ✅ 镜像已推送至ACR
|
||||
- ⏳ 待部署到SAE
|
||||
|
||||
---
|
||||
|
||||
## ⚡ 四、快速命令参考
|
||||
|
||||
### 4.1 Docker镜像管理
|
||||
|
||||
**登录ACR**:
|
||||
```bash
|
||||
# 使用公网地址登录
|
||||
docker login --username=gofeng117@163.com \
|
||||
--password=fengzhibo117 \
|
||||
crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com
|
||||
```
|
||||
|
||||
**拉取镜像**:
|
||||
```bash
|
||||
# 前端Nginx
|
||||
docker pull crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.0
|
||||
|
||||
# Python微服务
|
||||
docker pull crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0
|
||||
```
|
||||
|
||||
**本地测试**:
|
||||
```bash
|
||||
# 前端Nginx(端口3000)
|
||||
docker run -d -p 3000:80 \
|
||||
-e VITE_API_BASE_URL=http://localhost:3001 \
|
||||
--name frontend-test \
|
||||
ai-clinical_frontend-nginx:v1.0
|
||||
|
||||
# Python微服务(端口8000)
|
||||
docker run -d -p 8000:8000 \
|
||||
--name python-test \
|
||||
python-extraction:v1.0
|
||||
|
||||
# 查看日志
|
||||
docker logs -f frontend-test
|
||||
docker logs -f python-test
|
||||
|
||||
# 健康检查
|
||||
curl http://localhost:3000 # 前端
|
||||
curl http://localhost:8000/api/health # Python微服务
|
||||
```
|
||||
|
||||
**构建新版本**:
|
||||
```bash
|
||||
# 前端Nginx
|
||||
cd AIclinicalresearch/frontend-v2
|
||||
docker build -t ai-clinical_frontend-nginx:v1.1 .
|
||||
docker tag ai-clinical_frontend-nginx:v1.1 \
|
||||
crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.1
|
||||
docker push crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/ai-clinical_frontend-nginx:v1.1
|
||||
|
||||
# Python微服务
|
||||
cd AIclinicalresearch/extraction_service
|
||||
docker build -t python-extraction:v1.1 -f Dockerfile .
|
||||
docker tag python-extraction:v1.1 \
|
||||
crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.1
|
||||
docker push crpi-cd5ij4pjt65mweeo.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.2 数据库连接与管理
|
||||
|
||||
**通过本地Docker容器连接RDS**:
|
||||
```bash
|
||||
# 启动本地PostgreSQL容器(用于psql客户端)
|
||||
docker run --name ai-clinical-postgres \
|
||||
-e POSTGRES_PASSWORD=postgres \
|
||||
-p 5432:5432 \
|
||||
-d postgres:15-alpine
|
||||
|
||||
# 连接到RDS(需临时开启外网或使用VPC)
|
||||
docker exec -e PGPASSWORD="Xibahe@fengzhibo117" \
|
||||
ai-clinical-postgres psql \
|
||||
-h pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com \
|
||||
-p 5432 \
|
||||
-U airesearch \
|
||||
-d ai_clinical_research
|
||||
|
||||
# 查看Schema列表
|
||||
docker exec -e PGPASSWORD="Xibahe@fengzhibo117" \
|
||||
ai-clinical-postgres psql \
|
||||
-h pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com \
|
||||
-p 5432 -U airesearch -d ai_clinical_research \
|
||||
-c "SELECT nspname FROM pg_namespace WHERE nspname LIKE '%_schema' ORDER BY nspname;"
|
||||
|
||||
# 查看表数量统计
|
||||
docker exec -e PGPASSWORD="Xibahe@fengzhibo117" \
|
||||
ai-clinical-postgres psql \
|
||||
-h pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com \
|
||||
-p 5432 -U airesearch -d ai_clinical_research \
|
||||
-c "SELECT schemaname, COUNT(*) FROM pg_tables WHERE schemaname NOT IN ('pg_catalog', 'information_schema') GROUP BY schemaname;"
|
||||
```
|
||||
|
||||
**数据库备份**:
|
||||
```bash
|
||||
# 导出完整数据库
|
||||
docker exec ai-clinical-postgres pg_dump \
|
||||
-U postgres \
|
||||
-d ai_clinical_research \
|
||||
--format=plain \
|
||||
--no-owner \
|
||||
--no-acl \
|
||||
--encoding=UTF8 \
|
||||
> "backup_$(date +%Y%m%d_%H%M%S).sql"
|
||||
|
||||
# 导入到RDS(需临时开启外网)
|
||||
cat backup_20241224_154529.sql | \
|
||||
docker exec -i -e PGPASSWORD="Xibahe@fengzhibo117" \
|
||||
ai-clinical-postgres psql \
|
||||
-h pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com \
|
||||
-p 5432 -U airesearch -d ai_clinical_research
|
||||
```
|
||||
|
||||
**Node.js应用连接字符串**:
|
||||
```bash
|
||||
# .env 文件配置
|
||||
DATABASE_URL="postgresql://airesearch:Xibahe%40fengzhibo117@pgm-2zex1m2y3r23hdn5.pg.rds.aliyuncs.com:5432/ai_clinical_research"
|
||||
|
||||
# 注意:@ 符号需要URL编码为 %40
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4.3 SAE常用操作
|
||||
|
||||
**通过阿里云CLI部署**(可选):
|
||||
```bash
|
||||
# 安装阿里云CLI
|
||||
# https://help.aliyun.com/document_detail/121541.html
|
||||
|
||||
# 配置访问凭证
|
||||
aliyun configure
|
||||
|
||||
# 部署应用(示例)
|
||||
aliyun sae DeployApplication \
|
||||
--AppId xxx \
|
||||
--ImageUrl crpi-cd5ij4pjt65mweeo-vpc.cn-beijing.personal.cr.aliyuncs.com/ai-clinical/python-extraction:v1.0 \
|
||||
--Namespace cn-beijing:xxx
|
||||
```
|
||||
|
||||
**通过Web控制台部署**(推荐):
|
||||
1. 登录阿里云SAE控制台
|
||||
2. 选择应用 → 部署应用
|
||||
3. 选择镜像版本
|
||||
4. 配置环境变量
|
||||
5. 确认部署
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ 五、注意事项与最佳实践
|
||||
|
||||
### 5.1 安全规范 🔒
|
||||
|
||||
**数据库安全**:
|
||||
- ❌ **禁止**:RDS外网地址长期开启
|
||||
- ✅ **推荐**:使用VPC内网或SSH隧道连接
|
||||
- ✅ **白名单**:仅允许必要的IP访问
|
||||
- ✅ **密码强度**:使用复杂密码(大小写+数字+特殊字符)
|
||||
|
||||
**镜像仓库安全**:
|
||||
- ❌ **禁止**:在代码中硬编码ACR密码
|
||||
- ✅ **推荐**:定期轮换Registry密码(每季度)
|
||||
- ✅ **访问控制**:仅团队成员拥有推送权限
|
||||
|
||||
**环境变量管理**:
|
||||
- ❌ **禁止**:在Git中提交 `.env` 文件
|
||||
- ✅ **推荐**:使用SAE的环境变量配置功能
|
||||
- ✅ **敏感信息**:数据库密码、API密钥等使用环境变量
|
||||
|
||||
**SSL/TLS**:
|
||||
- ✅ 生产环境必须启用HTTPS
|
||||
- ✅ 使用阿里云免费SSL证书或Let's Encrypt
|
||||
|
||||
---
|
||||
|
||||
### 5.2 部署顺序 📋
|
||||
|
||||
**推荐的部署顺序**:
|
||||
1. ✅ **基础设施**:VPC、NAT网关、安全组
|
||||
2. ✅ **数据库**:RDS PostgreSQL(数据迁移+验证)
|
||||
3. ✅ **对象存储**:OSS Bucket创建+权限配置
|
||||
4. 🔄 **后端服务**:
|
||||
- Python微服务(文档提取+数据清洗)
|
||||
- Node.js后端(API服务器)
|
||||
5. 🔄 **前端服务**:Nginx静态资源托管
|
||||
6. 🔄 **AI服务**:Dify部署
|
||||
7. 🔄 **验证测试**:全链路功能验证
|
||||
|
||||
**依赖关系**:
|
||||
```
|
||||
RDS PostgreSQL (必需)
|
||||
↓
|
||||
Node.js 后端 (依赖数据库)
|
||||
↓
|
||||
Python 微服务 (可独立,但后端会调用)
|
||||
↓
|
||||
前端 Nginx (依赖后端API)
|
||||
↓
|
||||
Dify AI (可独立)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 5.3 回滚策略 🔄
|
||||
|
||||
**镜像版本管理**:
|
||||
- ACR保留所有历史版本(v1.0, v1.1, v1.2...)
|
||||
- 使用语义化版本号:`major.minor.patch`
|
||||
- 生产环境使用固定版本号,禁止使用 `:latest`
|
||||
|
||||
**SAE应用回滚**:
|
||||
- SAE支持一键回滚到上一版本
|
||||
- 回滚时长:约1-2分钟
|
||||
- 建议:部署前先在测试环境验证
|
||||
|
||||
**数据库回滚**:
|
||||
- RDS自动备份:每日凌晨2点(保留7天)
|
||||
- 手动备份:重大变更前务必手动备份
|
||||
- 回滚时长:取决于数据量(约10分钟/10GB)
|
||||
|
||||
---
|
||||
|
||||
### 5.4 监控与日志 📊
|
||||
|
||||
**应用监控**(建议配置):
|
||||
- **健康检查**:每个服务配置健康检查端点
|
||||
- **日志收集**:SAE自动收集stdout/stderr日志
|
||||
- **告警规则**:CPU>80%、内存>80%、健康检查失败
|
||||
|
||||
**日志查看**:
|
||||
```bash
|
||||
# SAE控制台 → 应用详情 → 日志查询
|
||||
# 或使用阿里云CLI
|
||||
aliyun sae DescribeApplicationInstances --AppId xxx
|
||||
```
|
||||
|
||||
**性能指标**:
|
||||
- **响应时间**:API平均响应<500ms
|
||||
- **错误率**:<1%
|
||||
- **可用性**:>99.9%
|
||||
|
||||
---
|
||||
|
||||
### 5.5 成本优化 💰
|
||||
|
||||
**SAE资源配置**:
|
||||
- **开发环境**:0.5核 / 1GB(约¥30/月)
|
||||
- **生产环境**:1核 / 2GB(约¥60/月)
|
||||
- **弹性伸缩**:根据流量自动扩缩容
|
||||
|
||||
**RDS成本**:
|
||||
- **按量付费**:测试阶段使用
|
||||
- **包年包月**:生产环境更优惠(约¥800/月)
|
||||
|
||||
**OSS成本**:
|
||||
- **存储费用**:约¥0.12/GB/月
|
||||
- **流量费用**:内网流量免费,外网流量¥0.5/GB
|
||||
|
||||
---
|
||||
|
||||
## 📝 六、待办事项清单
|
||||
|
||||
### 高优先级 🔴
|
||||
- [ ] **Python微服务**:部署到SAE并验证
|
||||
- [ ] **Python微服务**:创建SAE部署操作手册
|
||||
- [ ] **Node.js后端**:Docker镜像构建
|
||||
- [ ] **Node.js后端**:部署到SAE
|
||||
|
||||
### 中优先级 🟡
|
||||
- [ ] **前端Nginx**:部署到SAE并配置域名
|
||||
- [ ] **内网通信**:配置前端→后端→Python微服务的内网调用
|
||||
- [ ] **SSL证书**:申请并配置HTTPS
|
||||
- [ ] **监控告警**:配置SAE健康检查和告警规则
|
||||
|
||||
### 低优先级 🟢
|
||||
- [ ] **Dify AI**:评估部署方案
|
||||
- [ ] **负载测试**:压力测试各服务性能
|
||||
- [ ] **文档完善**:补充故障排查手册
|
||||
- [ ] **CI/CD**:配置自动化部署流程
|
||||
|
||||
---
|
||||
|
||||
## 📚 七、相关文档索引
|
||||
|
||||
### 部署指南
|
||||
- [01-快速部署SOP-零基础版.md](./01-快速部署SOP-零基础版.md) - **完整部署流程总纲**
|
||||
- [02-SAE部署完全指南(产品经理版).md](./02-SAE部署完全指南(产品经理版).md) - SAE基础知识
|
||||
|
||||
### 服务部署手册
|
||||
- [07-前端Nginx-SAE部署操作手册.md](./07-前端Nginx-SAE部署操作手册.md) - 前端Nginx部署步骤
|
||||
- [08-PostgreSQL数据库部署操作手册.md](./08-PostgreSQL数据库部署操作手册.md) - PostgreSQL部署步骤
|
||||
- 待创建:Python微服务SAE部署操作手册
|
||||
- 待创建:Node.js后端SAE部署操作手册
|
||||
|
||||
### 技术架构文档
|
||||
- [00-部署架构总览.md](./00-部署架构总览.md) - 架构设计与技术选型
|
||||
- [04-Python微服务-SAE容器部署指南.md](./04-Python微服务-SAE容器部署指南.md) - Python服务技术详解
|
||||
- [06-前端Nginx-SAE容器部署指南.md](./06-前端Nginx-SAE容器部署指南.md) - 前端Nginx技术详解
|
||||
|
||||
### 分析报告
|
||||
- [PostgreSQL部署策略-摸底报告.md](./PostgreSQL部署策略-摸底报告.md) - 数据库结构分析
|
||||
|
||||
---
|
||||
|
||||
## 🔄 八、更新日志
|
||||
|
||||
### 2024-12-24
|
||||
- ✅ PostgreSQL数据库部署完成
|
||||
- ✅ 前端Nginx Docker镜像构建并推送至ACR
|
||||
- ✅ Python微服务Docker镜像构建并推送至ACR
|
||||
- ✅ 创建部署进度总览文档
|
||||
|
||||
---
|
||||
|
||||
## 📞 九、联系与支持
|
||||
|
||||
**技术支持**:
|
||||
- 开发团队内部文档
|
||||
- 阿里云工单:https://workorder.console.aliyun.com/
|
||||
|
||||
**紧急联系**:
|
||||
- 数据库问题:先检查 [08-PostgreSQL数据库部署操作手册.md]
|
||||
- SAE部署问题:先检查 [01-快速部署SOP-零基础版.md]
|
||||
- 镜像构建问题:查看本文档"快速命令参考"章节
|
||||
|
||||
---
|
||||
|
||||
> **提示**:本文档会随着部署进度持续更新,请定期查看最新版本!
|
||||
> **最后更新**:2024-12-24
|
||||
|
||||
@@ -862,3 +862,5 @@ ACR镜像仓库:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
1353
docs/05-部署文档/07-前端Nginx-SAE部署操作手册.md
Normal file
1353
docs/05-部署文档/07-前端Nginx-SAE部署操作手册.md
Normal file
File diff suppressed because it is too large
Load Diff
1169
docs/05-部署文档/08-PostgreSQL数据库部署操作手册.md
Normal file
1169
docs/05-部署文档/08-PostgreSQL数据库部署操作手册.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -473,3 +473,5 @@ NAT网关成本¥100/月,对初创团队是一笔开销
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -378,3 +378,5 @@ curl http://你的SAE地址:3001/health
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -710,3 +710,5 @@ const job = await queue.getJob(jobId);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -477,3 +477,5 @@ processLiteraturesInBackground(task.id, projectId, testLiteratures);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -954,3 +954,5 @@ ROI = (¥22,556 - ¥144) / ¥144 × 100% = 15,564%
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1011,3 +1011,5 @@ Redis 实例:¥500/月
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -466,6 +466,8 @@ import { ChatContainer } from '@/shared/components/Chat';
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
48
extraction_service/.dockerignore
Normal file
48
extraction_service/.dockerignore
Normal file
@@ -0,0 +1,48 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
venv/
|
||||
env/
|
||||
ENV/
|
||||
.venv
|
||||
|
||||
# 测试
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
*.log
|
||||
|
||||
# IDE
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# 文档
|
||||
*.md
|
||||
docs/
|
||||
|
||||
# Git
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# 环境变量
|
||||
.env
|
||||
.env.local
|
||||
|
||||
# 临时文件
|
||||
*.tmp
|
||||
temp/
|
||||
tmp/
|
||||
uploads/
|
||||
|
||||
# 模型缓存 (避免打包Nougat模型)
|
||||
.cache/
|
||||
models/
|
||||
*.pth
|
||||
*.pt
|
||||
*.onnx
|
||||
|
||||
56
extraction_service/Dockerfile
Normal file
56
extraction_service/Dockerfile
Normal file
@@ -0,0 +1,56 @@
|
||||
# ========================================
|
||||
# 多阶段构建:Python微服务
|
||||
# ========================================
|
||||
|
||||
# -------------------- 阶段1: 构建阶段 --------------------
|
||||
FROM python:3-slim AS builder
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /app
|
||||
|
||||
# 替换为阿里云镜像源(为将来可能的构建依赖做准备)
|
||||
RUN sed -i 's|http://deb.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources \
|
||||
&& sed -i 's|http://security.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources
|
||||
|
||||
# 复制依赖文件
|
||||
COPY requirements-prod.txt .
|
||||
|
||||
# 安装Python依赖到临时目录(使用预编译wheel,无需编译依赖)
|
||||
RUN pip install --no-cache-dir --user -r requirements-prod.txt
|
||||
|
||||
# -------------------- 阶段2: 运行阶段 --------------------
|
||||
FROM python:3-slim
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /app
|
||||
|
||||
# 替换为阿里云镜像源并安装运行时依赖
|
||||
RUN sed -i 's|http://deb.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources \
|
||||
&& sed -i 's|http://security.debian.org|http://mirrors.aliyun.com|g' /etc/apt/sources.list.d/debian.sources \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 从构建阶段复制Python包
|
||||
COPY --from=builder /root/.local /root/.local
|
||||
|
||||
# 复制应用代码
|
||||
COPY . .
|
||||
|
||||
# 设置Python路径
|
||||
ENV PATH=/root/.local/bin:$PATH
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 8000
|
||||
|
||||
# 健康检查
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:8000/api/health', timeout=5)"
|
||||
|
||||
# 启动命令
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
|
||||
|
||||
@@ -31,4 +31,6 @@ __version__ = '1.0.0'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -164,4 +164,6 @@ def get_missing_summary(df: pd.DataFrame) -> dict:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -124,4 +124,6 @@ def apply_filter(
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -289,3 +289,5 @@ def get_unpivot_preview(
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
43
extraction_service/requirements-prod.txt
Normal file
43
extraction_service/requirements-prod.txt
Normal file
@@ -0,0 +1,43 @@
|
||||
# ========================================
|
||||
# 生产环境依赖 (移除Nougat和重量级依赖)
|
||||
# ========================================
|
||||
|
||||
# Web框架
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
python-multipart==0.0.6
|
||||
|
||||
# 数据处理 (DC工具必需)
|
||||
pandas>=2.0.0
|
||||
numpy>=1.24.0
|
||||
polars>=0.19.0
|
||||
|
||||
# PDF处理 (核心轻量级库)
|
||||
PyMuPDF>=1.24.0
|
||||
pdfplumber==0.10.3
|
||||
|
||||
# Docx处理
|
||||
mammoth==1.6.0
|
||||
python-docx==1.1.0
|
||||
|
||||
# 语言检测
|
||||
langdetect==1.0.9
|
||||
|
||||
# 编码检测
|
||||
chardet==5.2.0
|
||||
|
||||
# 工具
|
||||
python-dotenv==1.0.0
|
||||
pydantic>=2.10.0
|
||||
|
||||
# 日志
|
||||
loguru==0.7.2
|
||||
|
||||
# 测试工具
|
||||
requests==2.31.0
|
||||
|
||||
# ========================================
|
||||
# 注意:生产环境已移除以下重量级依赖
|
||||
# - nougat-ocr==0.1.17 (约1.5GB)
|
||||
# - albumentations==1.3.1 (Nougat依赖)
|
||||
# ========================================
|
||||
@@ -296,6 +296,8 @@ if __name__ == "__main__":
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -62,6 +62,8 @@ except Exception as e:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -42,6 +42,8 @@ except Exception as e:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
68
frontend-v2/.dockerignore
Normal file
68
frontend-v2/.dockerignore
Normal file
@@ -0,0 +1,68 @@
|
||||
# Node.js
|
||||
node_modules
|
||||
npm-debug.log
|
||||
yarn-error.log
|
||||
.npm
|
||||
.yarn
|
||||
|
||||
# 开发文件
|
||||
.env
|
||||
.env.*
|
||||
*.local
|
||||
|
||||
# 构建产物(Dockerfile 中会重新生成)
|
||||
dist
|
||||
|
||||
# 测试文件
|
||||
test
|
||||
tests
|
||||
*.test.ts
|
||||
*.test.tsx
|
||||
*.spec.ts
|
||||
*.spec.tsx
|
||||
coverage
|
||||
.nyc_output
|
||||
|
||||
# 文档和临时文件
|
||||
docs
|
||||
*.md
|
||||
!README.md
|
||||
.vscode
|
||||
.idea
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
.gitattributes
|
||||
|
||||
# CI/CD
|
||||
.github
|
||||
.gitlab-ci.yml
|
||||
.travis.yml
|
||||
|
||||
# 日志
|
||||
*.log
|
||||
logs
|
||||
|
||||
# 临时文件
|
||||
temp
|
||||
tmp
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# 编辑器配置
|
||||
.editorconfig
|
||||
.prettierrc
|
||||
.eslintrc*
|
||||
|
||||
# TypeScript 配置(保留 tsconfig.json,其他忽略)
|
||||
tsconfig.tsbuildinfo
|
||||
|
||||
# Vite
|
||||
.vite
|
||||
vite.config.*.timestamp-*
|
||||
|
||||
|
||||
64
frontend-v2/Dockerfile
Normal file
64
frontend-v2/Dockerfile
Normal file
@@ -0,0 +1,64 @@
|
||||
# ==================== 阶段 1: 构建阶段 ====================
|
||||
# ⚠️ 使用 Node Alpine 最新版(包含 Node 22+)
|
||||
FROM node:alpine AS builder
|
||||
|
||||
# 设置工作目录
|
||||
WORKDIR /app
|
||||
|
||||
# 1. 复制依赖文件
|
||||
COPY package*.json ./
|
||||
|
||||
# 2. 安装依赖
|
||||
# 使用国内镜像加速(可选,如果网络慢可以取消注释)
|
||||
# RUN npm config set registry https://registry.npmmirror.com
|
||||
RUN npm ci --only=production=false
|
||||
|
||||
# 3. 复制源代码
|
||||
COPY . .
|
||||
|
||||
# 4. 构建生产版本
|
||||
# ⚠️ 注意:如果需要在构建时注入环境变量,在这里设置 ARG
|
||||
# ARG VITE_API_BASE_URL
|
||||
# ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
|
||||
RUN npm run build
|
||||
|
||||
# 验证构建产物
|
||||
RUN ls -la /app/dist/
|
||||
|
||||
# ==================== 阶段 2: 运行阶段 ====================
|
||||
FROM nginx:alpine
|
||||
|
||||
# 安装必要工具(包括时区数据)
|
||||
RUN apk add --no-cache \
|
||||
bash \
|
||||
gettext \
|
||||
curl \
|
||||
tzdata
|
||||
|
||||
# 设置容器时区为上海(否则日志时间会比北京时间慢 8 小时)
|
||||
RUN cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
|
||||
echo "Asia/Shanghai" > /etc/timezone
|
||||
|
||||
# 创建 Nginx 配置目录
|
||||
RUN mkdir -p /etc/nginx/templates
|
||||
|
||||
# 1. 复制 Nginx 配置模板(支持环境变量替换)
|
||||
COPY nginx.conf /etc/nginx/templates/nginx.conf.template
|
||||
|
||||
# 2. 复制构建产物到 Nginx 默认目录
|
||||
COPY --from=builder /app/dist /usr/share/nginx/html
|
||||
|
||||
# 3. 复制启动脚本
|
||||
COPY docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh
|
||||
|
||||
# 健康检查
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \
|
||||
CMD curl -f http://localhost/health || exit 1
|
||||
|
||||
# 暴露端口
|
||||
EXPOSE 80
|
||||
|
||||
# 启动命令
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
|
||||
35
frontend-v2/docker-entrypoint.sh
Normal file
35
frontend-v2/docker-entrypoint.sh
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# ⚠️ 关键:不给默认值,强制在 SAE 控制台配置
|
||||
# 如果未配置,报错退出(避免使用错误的后端地址)
|
||||
if [ -z "$BACKEND_SERVICE_HOST" ]; then
|
||||
echo "❌ ERROR: BACKEND_SERVICE_HOST environment variable is required!"
|
||||
echo "Please configure it in SAE console with backend internal IP (e.g., 172.17.x.x)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$BACKEND_SERVICE_PORT" ]; then
|
||||
echo "⚠️ WARNING: BACKEND_SERVICE_PORT not set, using default: 3001"
|
||||
export BACKEND_SERVICE_PORT=3001
|
||||
fi
|
||||
|
||||
echo "============================================"
|
||||
echo "Starting Frontend Nginx Service"
|
||||
echo "Backend Service: ${BACKEND_SERVICE_HOST}:${BACKEND_SERVICE_PORT}"
|
||||
echo "Container Timezone: $(cat /etc/timezone)"
|
||||
echo "Current Time: $(date)"
|
||||
echo "============================================"
|
||||
|
||||
# 使用 envsubst 替换 Nginx 配置中的环境变量
|
||||
envsubst '${BACKEND_SERVICE_HOST} ${BACKEND_SERVICE_PORT}' \
|
||||
< /etc/nginx/templates/nginx.conf.template \
|
||||
> /etc/nginx/nginx.conf
|
||||
|
||||
# 验证 Nginx 配置
|
||||
nginx -t
|
||||
|
||||
# 启动 Nginx
|
||||
exec nginx -g 'daemon off;'
|
||||
|
||||
|
||||
191
frontend-v2/nginx.conf
Normal file
191
frontend-v2/nginx.conf
Normal file
@@ -0,0 +1,191 @@
|
||||
# Nginx 配置文件 - AI临床研究平台前端服务
|
||||
# 用途:托管 React SPA + 反向代理后端 API
|
||||
|
||||
user nginx;
|
||||
worker_processes auto; # 自动根据 CPU 核心数调整
|
||||
|
||||
# ⚠️ 日志输出到 stderr(SAE 会自动收集)
|
||||
error_log /dev/stderr warn;
|
||||
pid /var/run/nginx.pid;
|
||||
|
||||
events {
|
||||
worker_connections 1024; # 每个 worker 进程的最大连接数
|
||||
use epoll; # Linux 下使用 epoll(高性能)
|
||||
}
|
||||
|
||||
http {
|
||||
include /etc/nginx/mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
# 日志格式
|
||||
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
'$status $body_bytes_sent "$http_referer" '
|
||||
'"$http_user_agent" "$http_x_forwarded_for"';
|
||||
|
||||
# ⚠️ 日志输出到 stdout(SAE 会自动收集,避免磁盘写满)
|
||||
access_log /dev/stdout main;
|
||||
|
||||
# 性能优化
|
||||
sendfile on;
|
||||
tcp_nopush on;
|
||||
tcp_nodelay on;
|
||||
keepalive_timeout 65;
|
||||
types_hash_max_size 2048;
|
||||
|
||||
# Gzip 压缩(减少传输大小)
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_proxied any;
|
||||
gzip_comp_level 6;
|
||||
gzip_types text/plain text/css text/xml text/javascript
|
||||
application/json application/javascript application/xml+rss
|
||||
application/rss+xml font/truetype font/opentype
|
||||
application/vnd.ms-fontobject image/svg+xml;
|
||||
gzip_disable "msie6";
|
||||
|
||||
# 上游后端服务(Backend Service)
|
||||
upstream backend {
|
||||
# ⚠️ 重要:这里的地址在部署时需要替换为真实的后端内网地址
|
||||
# SAE 部署时,通过环境变量注入,详见 Dockerfile
|
||||
server ${BACKEND_SERVICE_HOST}:${BACKEND_SERVICE_PORT} fail_timeout=30s max_fails=3;
|
||||
|
||||
# 如果有多个后端实例(负载均衡)
|
||||
# server 172.17.x.x:3001 weight=1;
|
||||
# server 172.17.x.y:3001 weight=1;
|
||||
|
||||
keepalive 32; # 保持连接池
|
||||
}
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name _; # 接受所有域名
|
||||
|
||||
# 根目录(React 构建产物)
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
|
||||
# 字符集
|
||||
charset utf-8;
|
||||
|
||||
# ==================== 静态资源处理 ====================
|
||||
|
||||
# 主页面(index.html)- 不缓存
|
||||
location = / {
|
||||
try_files /index.html =404;
|
||||
add_header Cache-Control "no-cache, no-store, must-revalidate";
|
||||
add_header Pragma "no-cache";
|
||||
add_header Expires "0";
|
||||
}
|
||||
|
||||
location = /index.html {
|
||||
add_header Cache-Control "no-cache, no-store, must-revalidate";
|
||||
add_header Pragma "no-cache";
|
||||
add_header Expires "0";
|
||||
}
|
||||
|
||||
# JS/CSS 文件 - 强缓存(文件名带 hash)
|
||||
location ~* \.(js|css)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
access_log off;
|
||||
}
|
||||
|
||||
# 图片/字体文件 - 强缓存
|
||||
location ~* \.(png|jpg|jpeg|gif|ico|svg|webp|woff|woff2|ttf|eot)$ {
|
||||
expires 1y;
|
||||
add_header Cache-Control "public, immutable";
|
||||
access_log off;
|
||||
}
|
||||
|
||||
# ==================== API 反向代理 ====================
|
||||
|
||||
# 后端 API 代理(关键配置)
|
||||
location /api/ {
|
||||
# 代理到后端服务
|
||||
proxy_pass http://backend;
|
||||
|
||||
# 保留原始请求信息
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# 超时配置(AI 对话、文件处理可能耗时较长)
|
||||
proxy_connect_timeout 300s;
|
||||
proxy_send_timeout 300s;
|
||||
proxy_read_timeout 300s;
|
||||
|
||||
# 缓冲配置
|
||||
proxy_buffering off; # 关闭缓冲(实时流式响应)
|
||||
proxy_request_buffering off; # 支持大文件上传
|
||||
|
||||
# WebSocket 支持(如果后续需要)
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# 错误处理
|
||||
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503;
|
||||
proxy_next_upstream_tries 2;
|
||||
}
|
||||
|
||||
# ==================== SPA 路由支持 ====================
|
||||
|
||||
# React Router 路由回退
|
||||
# 所有非文件请求都返回 index.html(SPA 的核心)
|
||||
location / {
|
||||
try_files $uri $uri/ /index.html;
|
||||
|
||||
# 禁用缓存(用户刷新时总是获取最新页面)
|
||||
add_header Cache-Control "no-cache, no-store, must-revalidate";
|
||||
}
|
||||
|
||||
# ==================== 安全加固 ====================
|
||||
|
||||
# 隐藏 Nginx 版本号
|
||||
server_tokens off;
|
||||
|
||||
# 禁止访问隐藏文件
|
||||
location ~ /\. {
|
||||
deny all;
|
||||
access_log off;
|
||||
log_not_found off;
|
||||
}
|
||||
|
||||
# 禁止访问特定文件
|
||||
location ~* \.(bak|config|sql|fla|psd|ini|log|sh|inc|swp|dist)$ {
|
||||
deny all;
|
||||
}
|
||||
|
||||
# ==================== 健康检查 ====================
|
||||
|
||||
# SAE 健康检查端点
|
||||
location /health {
|
||||
access_log off;
|
||||
return 200 "healthy\n";
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
|
||||
# Nginx 状态页(用于监控)
|
||||
location /nginx_status {
|
||||
stub_status on;
|
||||
access_log off;
|
||||
# 仅允许内网访问
|
||||
allow 10.0.0.0/8;
|
||||
allow 172.17.0.0/16;
|
||||
allow 192.168.0.0/16;
|
||||
deny all;
|
||||
}
|
||||
|
||||
# ==================== 错误页面 ====================
|
||||
|
||||
error_page 404 /index.html; # SPA 路由回退
|
||||
error_page 500 502 503 504 /50x.html;
|
||||
|
||||
location = /50x.html {
|
||||
root /usr/share/nginx/html;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ const ModuleErrorFallback = ({
|
||||
const navigate = useNavigate()
|
||||
|
||||
// 是否显示详细错误信息(开发环境显示,生产环境隐藏)
|
||||
const isDevelopment = import.meta.env?.DEV ?? false
|
||||
const isDevelopment = import.meta.env.DEV
|
||||
|
||||
/**
|
||||
* 处理重试
|
||||
|
||||
@@ -10,7 +10,7 @@ ReactDOM.createRoot(document.getElementById('root')!).render(
|
||||
<ConfigProvider
|
||||
locale={zhCN}
|
||||
theme={{
|
||||
cssVar: true, // ⭐ 启用 CSS 变量(Ant Design 6.0 新特性)
|
||||
cssVar: { prefix: 'ant' }, // ⭐ 启用 CSS 变量(Ant Design 6.0 新特性)
|
||||
token: {
|
||||
colorPrimary: '#10b981', // emerald-500(工具C主题色)
|
||||
borderRadius: 8,
|
||||
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
import type { ConclusionType } from '../types';
|
||||
|
||||
interface ConclusionTagProps {
|
||||
conclusion: ConclusionType;
|
||||
conclusion: ConclusionType | 'pending';
|
||||
showIcon?: boolean;
|
||||
size?: 'small' | 'middle' | 'large';
|
||||
}
|
||||
@@ -42,6 +42,12 @@ const ConclusionTag: React.FC<ConclusionTagProps> = ({
|
||||
icon: <QuestionCircleOutlined />,
|
||||
text: '不确定',
|
||||
};
|
||||
case 'pending':
|
||||
return {
|
||||
color: 'processing',
|
||||
icon: <QuestionCircleOutlined />,
|
||||
text: '待处理',
|
||||
};
|
||||
default:
|
||||
return {
|
||||
color: 'default',
|
||||
|
||||
@@ -531,6 +531,8 @@ export default FulltextDetailDrawer;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
* 4. 人工复核
|
||||
*/
|
||||
|
||||
import { useState } from 'react';
|
||||
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
|
||||
import { message } from 'antd';
|
||||
import { aslApi } from '../api';
|
||||
@@ -131,5 +130,6 @@ export function useFulltextResults({
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -35,10 +35,10 @@ export function useFulltextTask({
|
||||
enabled: enabled && !!taskId,
|
||||
refetchInterval: refetchInterval !== undefined
|
||||
? refetchInterval
|
||||
: ((data) => {
|
||||
: ((data: any) => {
|
||||
// 默认行为:任务进行中时每2秒轮询,否则停止
|
||||
if (!data?.data) return false;
|
||||
const status = (data.data as any).status;
|
||||
const status = data.data.status;
|
||||
return status === 'processing' || status === 'pending' ? 2000 : false;
|
||||
}),
|
||||
retry: 1,
|
||||
@@ -94,5 +94,6 @@ export function useFulltextTask({
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -33,12 +33,11 @@ export function useScreeningResults({
|
||||
queryFn: () => aslApi.getScreeningResultsList(projectId, { page, pageSize, filter }),
|
||||
enabled: enabled && !!projectId,
|
||||
staleTime: 1000 * 30, // 30秒内认为数据是新鲜的
|
||||
keepPreviousData: true, // 切换页面时保留上一页数据,避免闪烁
|
||||
});
|
||||
|
||||
const results = data?.data?.items || [];
|
||||
const total = data?.data?.total || 0;
|
||||
const totalPages = data?.data?.totalPages || 0;
|
||||
const results = (data as any)?.data?.items || [];
|
||||
const total = (data as any)?.data?.total || 0;
|
||||
const totalPages = (data as any)?.data?.totalPages || 0;
|
||||
|
||||
// 人工复核Mutation
|
||||
const reviewMutation = useMutation({
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
import { useState } from 'react';
|
||||
import { useParams, useSearchParams } from 'react-router-dom';
|
||||
import { useParams } from 'react-router-dom';
|
||||
import {
|
||||
Card,
|
||||
Statistic,
|
||||
@@ -59,8 +59,6 @@ interface FulltextResultItem {
|
||||
|
||||
const FulltextResults = () => {
|
||||
const { taskId } = useParams<{ taskId: string }>();
|
||||
const [searchParams] = useSearchParams();
|
||||
const projectId = searchParams.get('projectId') || '';
|
||||
|
||||
const [activeTab, setActiveTab] = useState<'all' | 'included' | 'excluded' | 'pending'>('all');
|
||||
const [selectedRowKeys, setSelectedRowKeys] = useState<string[]>([]);
|
||||
@@ -108,7 +106,7 @@ const FulltextResults = () => {
|
||||
const results = resultsData?.items || [];
|
||||
|
||||
// 导出Excel
|
||||
const handleExport = async (filter: 'all' | 'included' | 'excluded' | 'pending') => {
|
||||
const handleExport = async (_filter: 'all' | 'included' | 'excluded' | 'pending') => {
|
||||
try {
|
||||
message.loading({ content: '正在生成Excel...', key: 'export' });
|
||||
|
||||
@@ -432,7 +430,7 @@ const FulltextResults = () => {
|
||||
expandable={{
|
||||
expandedRowRender,
|
||||
expandedRowKeys,
|
||||
onExpand: (expanded, record) => toggleRowExpanded(record.resultId),
|
||||
onExpand: (_expanded, record) => toggleRowExpanded(record.resultId),
|
||||
expandIcon: () => null,
|
||||
}}
|
||||
pagination={{
|
||||
@@ -485,5 +483,6 @@ export default FulltextResults;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user