feat(aia): Protocol Agent MVP complete with one-click generation and Word export
- Add one-click research protocol generation with streaming output - Implement Word document export via Pandoc integration - Add dynamic dual-panel layout with resizable split pane - Implement collapsible content for StatePanel stages - Add conversation history management with title auto-update - Fix scroll behavior, markdown rendering, and UI layout issues - Simplify conversation creation logic for reliability
This commit is contained in:
@@ -90,5 +90,6 @@ models/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
- 健康检查
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
@@ -63,6 +63,8 @@ from services.dc_executor import validate_code, execute_pandas_code
|
||||
# 新增:统一文档处理器(RAG 引擎使用)
|
||||
from services.document_processor import DocumentProcessor, convert_to_markdown
|
||||
from services.pdf_markdown_processor import PdfMarkdownProcessor, extract_pdf_to_markdown
|
||||
# 新增:文档导出服务(Markdown → Word)
|
||||
from services.doc_export_service import check_pandoc_available, convert_markdown_to_docx, create_protocol_docx
|
||||
|
||||
# 兼容:nougat 相关(已废弃,保留空实现避免报错)
|
||||
def check_nougat_available(): return False
|
||||
@@ -243,6 +245,19 @@ class FillnaMiceRequest(BaseModel):
|
||||
random_state: int = 42
|
||||
|
||||
|
||||
class MarkdownToDocxRequest(BaseModel):
|
||||
"""Markdown转Word请求模型"""
|
||||
content: str # Markdown 内容
|
||||
use_template: bool = True # 是否使用模板
|
||||
title: str = "临床研究方案" # 文档标题
|
||||
|
||||
|
||||
class ProtocolToDocxRequest(BaseModel):
|
||||
"""研究方案转Word请求模型"""
|
||||
sections: Dict[str, str] # 章节内容
|
||||
title: str = "临床研究方案" # 文档标题
|
||||
|
||||
|
||||
# ==================== API路由 ====================
|
||||
|
||||
@app.get("/")
|
||||
@@ -2106,6 +2121,160 @@ async def operation_fillna_mice(request: FillnaMiceRequest):
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
# ==================== Word 导出 API ====================
|
||||
|
||||
@app.get("/api/pandoc/status")
|
||||
async def pandoc_status():
|
||||
"""
|
||||
检查 Pandoc 可用性
|
||||
|
||||
Returns:
|
||||
{
|
||||
"available": bool,
|
||||
"version": str,
|
||||
"message": str
|
||||
}
|
||||
"""
|
||||
try:
|
||||
result = check_pandoc_available()
|
||||
logger.info(f"Pandoc 状态检查: {result}")
|
||||
return JSONResponse(content=result)
|
||||
except Exception as e:
|
||||
logger.error(f"Pandoc 状态检查失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"available": False,
|
||||
"version": None,
|
||||
"message": f"检查失败: {str(e)}"
|
||||
})
|
||||
|
||||
|
||||
@app.post("/api/convert/docx")
|
||||
async def convert_to_docx(request: MarkdownToDocxRequest):
|
||||
"""
|
||||
Markdown 转 Word 接口
|
||||
|
||||
将 Markdown 文本转换为 Word 文档(.docx)
|
||||
|
||||
Args:
|
||||
request: MarkdownToDocxRequest
|
||||
- content: Markdown 内容
|
||||
- use_template: 是否使用模板(默认 True)
|
||||
- title: 文档标题
|
||||
|
||||
Returns:
|
||||
Word 文档二进制数据(application/vnd.openxmlformats-officedocument.wordprocessingml.document)
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始转换 Markdown → Word, 内容长度: {len(request.content)} 字符")
|
||||
|
||||
# 执行转换
|
||||
result = convert_markdown_to_docx(
|
||||
markdown_text=request.content,
|
||||
use_template=request.use_template
|
||||
)
|
||||
|
||||
if not result["success"]:
|
||||
logger.error(f"转换失败: {result.get('error', 'Unknown error')}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=result.get("error", "转换失败")
|
||||
)
|
||||
|
||||
# 读取生成的文件
|
||||
output_path = result["output_path"]
|
||||
with open(output_path, 'rb') as f:
|
||||
content = f.read()
|
||||
|
||||
# 清理临时文件
|
||||
try:
|
||||
os.remove(output_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"清理临时文件失败: {e}")
|
||||
|
||||
logger.info(f"Markdown → Word 转换成功, 文件大小: {len(content)} bytes")
|
||||
|
||||
# 返回文件
|
||||
return Response(
|
||||
content=content,
|
||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="document.docx"'
|
||||
}
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Markdown → Word 转换失败: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"转换失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/protocol/export/docx")
|
||||
async def export_protocol_to_docx(request: ProtocolToDocxRequest):
|
||||
"""
|
||||
研究方案导出为 Word 接口
|
||||
|
||||
将分章节的研究方案内容导出为格式化的 Word 文档
|
||||
|
||||
Args:
|
||||
request: ProtocolToDocxRequest
|
||||
- sections: 章节内容字典
|
||||
- title: 文档标题
|
||||
|
||||
Returns:
|
||||
Word 文档二进制数据
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始导出研究方案, 章节数: {len(request.sections)}")
|
||||
|
||||
# 执行转换
|
||||
result = create_protocol_docx(
|
||||
sections=request.sections,
|
||||
title=request.title
|
||||
)
|
||||
|
||||
if not result["success"]:
|
||||
logger.error(f"导出失败: {result.get('error', 'Unknown error')}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=result.get("error", "导出失败")
|
||||
)
|
||||
|
||||
# 读取生成的文件
|
||||
output_path = result["output_path"]
|
||||
with open(output_path, 'rb') as f:
|
||||
content = f.read()
|
||||
|
||||
# 清理临时文件
|
||||
try:
|
||||
os.remove(output_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"清理临时文件失败: {e}")
|
||||
|
||||
logger.info(f"研究方案导出成功, 文件大小: {len(content)} bytes")
|
||||
|
||||
# 返回文件
|
||||
return Response(
|
||||
content=content,
|
||||
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
headers={
|
||||
"Content-Disposition": f'attachment; filename="research_protocol.docx"'
|
||||
}
|
||||
)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"研究方案导出失败: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"导出失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# ==================== 启动配置 ====================
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -78,5 +78,6 @@ __version__ = '1.0.0'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -211,5 +211,6 @@ def get_missing_summary(df: pd.DataFrame) -> dict:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -171,5 +171,6 @@ def apply_filter(
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -335,5 +335,6 @@ def get_unpivot_preview(
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ pdfplumber==0.10.3 # 备用 PDF 处理
|
||||
# Word处理
|
||||
mammoth==1.6.0 # Docx → Markdown
|
||||
python-docx==1.1.0 # Docx 读取
|
||||
pypandoc>=1.13 # Markdown → Docx (需要系统安装 pandoc)
|
||||
|
||||
# Excel/CSV处理
|
||||
pandas>=2.0.0 # 表格处理
|
||||
|
||||
218
extraction_service/services/doc_export_service.py
Normal file
218
extraction_service/services/doc_export_service.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""
|
||||
文档导出服务 - Markdown 转 Word
|
||||
|
||||
功能:
|
||||
- Markdown → Docx 转换(使用 Pandoc)
|
||||
- 支持自定义 Word 模板(Reference Doc)
|
||||
- 保证输出格式符合伦理委员会要求
|
||||
"""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from loguru import logger
|
||||
|
||||
# 尝试导入 pypandoc
|
||||
try:
|
||||
import pypandoc
|
||||
PANDOC_AVAILABLE = True
|
||||
except ImportError:
|
||||
PANDOC_AVAILABLE = False
|
||||
logger.warning("pypandoc 未安装,Word 导出功能不可用")
|
||||
|
||||
|
||||
# 模板目录
|
||||
ASSETS_DIR = Path(__file__).parent / "assets"
|
||||
DEFAULT_TEMPLATE = ASSETS_DIR / "protocol_template.docx"
|
||||
|
||||
|
||||
def check_pandoc_available() -> dict:
|
||||
"""
|
||||
检查 Pandoc 是否可用
|
||||
|
||||
Returns:
|
||||
{
|
||||
"available": bool,
|
||||
"version": str,
|
||||
"message": str
|
||||
}
|
||||
"""
|
||||
if not PANDOC_AVAILABLE:
|
||||
return {
|
||||
"available": False,
|
||||
"version": None,
|
||||
"message": "pypandoc 未安装,请运行: pip install pypandoc"
|
||||
}
|
||||
|
||||
try:
|
||||
version = pypandoc.get_pandoc_version()
|
||||
return {
|
||||
"available": True,
|
||||
"version": version,
|
||||
"message": f"Pandoc {version} 已就绪"
|
||||
}
|
||||
except OSError as e:
|
||||
return {
|
||||
"available": False,
|
||||
"version": None,
|
||||
"message": f"Pandoc 未安装或不在 PATH 中: {str(e)}。请安装 Pandoc: https://pandoc.org/installing.html"
|
||||
}
|
||||
|
||||
|
||||
def convert_markdown_to_docx(
|
||||
markdown_text: str,
|
||||
output_path: Optional[str] = None,
|
||||
use_template: bool = True,
|
||||
template_path: Optional[str] = None
|
||||
) -> dict:
|
||||
"""
|
||||
将 Markdown 文本转换为 Word 文档
|
||||
|
||||
Args:
|
||||
markdown_text: Markdown 格式的文本
|
||||
output_path: 输出文件路径(可选,不提供则创建临时文件)
|
||||
use_template: 是否使用模板
|
||||
template_path: 自定义模板路径(可选)
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"output_path": str, # 生成的文件路径
|
||||
"file_size": int, # 文件大小(字节)
|
||||
"message": str,
|
||||
"error": str # 仅在失败时存在
|
||||
}
|
||||
"""
|
||||
# 检查 Pandoc 可用性
|
||||
pandoc_status = check_pandoc_available()
|
||||
if not pandoc_status["available"]:
|
||||
return {
|
||||
"success": False,
|
||||
"output_path": None,
|
||||
"file_size": 0,
|
||||
"message": pandoc_status["message"],
|
||||
"error": "Pandoc 不可用"
|
||||
}
|
||||
|
||||
try:
|
||||
# 确定输出路径
|
||||
if output_path is None:
|
||||
# 创建临时文件
|
||||
fd, output_path = tempfile.mkstemp(suffix='.docx')
|
||||
os.close(fd)
|
||||
|
||||
# 构建 Pandoc 参数
|
||||
extra_args = []
|
||||
|
||||
# 使用模板
|
||||
if use_template:
|
||||
if template_path and Path(template_path).exists():
|
||||
extra_args.append(f'--reference-doc={template_path}')
|
||||
elif DEFAULT_TEMPLATE.exists():
|
||||
extra_args.append(f'--reference-doc={DEFAULT_TEMPLATE}')
|
||||
logger.info(f"使用默认模板: {DEFAULT_TEMPLATE}")
|
||||
else:
|
||||
logger.warning("未找到 Word 模板,将使用 Pandoc 默认样式")
|
||||
|
||||
# 注意:不自动添加目录(TOC),因为:
|
||||
# 1. Pandoc 的 TOC 标题是 "Table of Contents",不符合中文要求
|
||||
# 2. 研究方案正文已有章节结构,无需额外目录
|
||||
# 如需目录,用户可在 Word 中手动插入
|
||||
# extra_args.append('--toc')
|
||||
# extra_args.append('--toc-depth=3')
|
||||
|
||||
logger.info(f"开始转换 Markdown → Docx, 文本长度: {len(markdown_text)} 字符")
|
||||
|
||||
# 执行转换
|
||||
pypandoc.convert_text(
|
||||
markdown_text,
|
||||
'docx',
|
||||
format='markdown',
|
||||
outputfile=output_path,
|
||||
extra_args=extra_args
|
||||
)
|
||||
|
||||
# 获取文件大小
|
||||
file_size = os.path.getsize(output_path)
|
||||
|
||||
logger.info(f"转换成功: {output_path}, 大小: {file_size} bytes")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"output_path": output_path,
|
||||
"file_size": file_size,
|
||||
"message": f"成功生成 Word 文档 ({file_size} bytes)"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Markdown → Docx 转换失败: {str(e)}")
|
||||
return {
|
||||
"success": False,
|
||||
"output_path": None,
|
||||
"file_size": 0,
|
||||
"message": "转换失败",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
|
||||
def create_protocol_docx(
|
||||
sections: dict,
|
||||
output_path: Optional[str] = None,
|
||||
title: str = "临床研究方案"
|
||||
) -> dict:
|
||||
"""
|
||||
根据分章节内容生成研究方案 Word 文档
|
||||
|
||||
Args:
|
||||
sections: 章节内容字典
|
||||
{
|
||||
"title": "研究题目",
|
||||
"background": "研究背景内容...",
|
||||
"objectives": "研究目的内容...",
|
||||
...
|
||||
}
|
||||
output_path: 输出文件路径
|
||||
title: 文档标题
|
||||
|
||||
Returns:
|
||||
转换结果
|
||||
"""
|
||||
# 章节配置(顺序和标题)
|
||||
section_config = [
|
||||
("title", "1. 研究题目"),
|
||||
("background", "2. 研究背景与立题依据"),
|
||||
("objectives", "3. 研究目的"),
|
||||
("design", "4. 研究设计"),
|
||||
("subjects", "5. 研究对象(纳入/排除标准)"),
|
||||
("sample_size", "6. 样本量估算"),
|
||||
("implementation", "7. 研究实施步骤与技术路线"),
|
||||
("endpoints", "8. 观察指标"),
|
||||
("data_management", "9. 数据管理与质量控制"),
|
||||
("safety", "10. 安全性评价"),
|
||||
("statistics", "11. 统计分析计划"),
|
||||
("ethics", "12. 伦理与知情同意"),
|
||||
("timeline", "13. 研究时间表"),
|
||||
("references", "14. 参考文献"),
|
||||
]
|
||||
|
||||
# 组装 Markdown
|
||||
markdown_parts = [f"# {title}\n\n"]
|
||||
|
||||
for key, heading in section_config:
|
||||
content = sections.get(key, "")
|
||||
if content:
|
||||
markdown_parts.append(f"## {heading}\n\n{content}\n\n")
|
||||
|
||||
markdown_text = "".join(markdown_parts)
|
||||
|
||||
return convert_markdown_to_docx(markdown_text, output_path)
|
||||
|
||||
|
||||
# 导出函数
|
||||
__all__ = [
|
||||
"check_pandoc_available",
|
||||
"convert_markdown_to_docx",
|
||||
"create_protocol_docx",
|
||||
]
|
||||
|
||||
@@ -150,3 +150,4 @@ def extract_pdf_to_markdown(pdf_path: str) -> Dict[str, Any]:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -345,5 +345,6 @@ if __name__ == "__main__":
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -111,5 +111,6 @@ except Exception as e:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -91,5 +91,6 @@ except Exception as e:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user