feat(aia): Protocol Agent MVP complete with one-click generation and Word export

- Add one-click research protocol generation with streaming output

- Implement Word document export via Pandoc integration

- Add dynamic dual-panel layout with resizable split pane

- Implement collapsible content for StatePanel stages

- Add conversation history management with title auto-update

- Fix scroll behavior, markdown rendering, and UI layout issues

- Simplify conversation creation logic for reliability
This commit is contained in:
2026-01-25 19:16:36 +08:00
parent 4d7d97ca19
commit 303dd78c54
332 changed files with 6204 additions and 617 deletions

View File

@@ -90,5 +90,6 @@ models/

View File

@@ -9,7 +9,7 @@
- 健康检查
"""
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi import FastAPI, File, UploadFile, HTTPException, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
@@ -63,6 +63,8 @@ from services.dc_executor import validate_code, execute_pandas_code
# 新增统一文档处理器RAG 引擎使用)
from services.document_processor import DocumentProcessor, convert_to_markdown
from services.pdf_markdown_processor import PdfMarkdownProcessor, extract_pdf_to_markdown
# 新增文档导出服务Markdown → Word
from services.doc_export_service import check_pandoc_available, convert_markdown_to_docx, create_protocol_docx
# 兼容nougat 相关(已废弃,保留空实现避免报错)
def check_nougat_available(): return False
@@ -243,6 +245,19 @@ class FillnaMiceRequest(BaseModel):
random_state: int = 42
class MarkdownToDocxRequest(BaseModel):
"""Markdown转Word请求模型"""
content: str # Markdown 内容
use_template: bool = True # 是否使用模板
title: str = "临床研究方案" # 文档标题
class ProtocolToDocxRequest(BaseModel):
"""研究方案转Word请求模型"""
sections: Dict[str, str] # 章节内容
title: str = "临床研究方案" # 文档标题
# ==================== API路由 ====================
@app.get("/")
@@ -2106,6 +2121,160 @@ async def operation_fillna_mice(request: FillnaMiceRequest):
}, status_code=400)
# ==================== Word 导出 API ====================
@app.get("/api/pandoc/status")
async def pandoc_status():
"""
检查 Pandoc 可用性
Returns:
{
"available": bool,
"version": str,
"message": str
}
"""
try:
result = check_pandoc_available()
logger.info(f"Pandoc 状态检查: {result}")
return JSONResponse(content=result)
except Exception as e:
logger.error(f"Pandoc 状态检查失败: {str(e)}")
return JSONResponse(content={
"available": False,
"version": None,
"message": f"检查失败: {str(e)}"
})
@app.post("/api/convert/docx")
async def convert_to_docx(request: MarkdownToDocxRequest):
"""
Markdown 转 Word 接口
将 Markdown 文本转换为 Word 文档(.docx
Args:
request: MarkdownToDocxRequest
- content: Markdown 内容
- use_template: 是否使用模板(默认 True
- title: 文档标题
Returns:
Word 文档二进制数据application/vnd.openxmlformats-officedocument.wordprocessingml.document
"""
try:
logger.info(f"开始转换 Markdown → Word, 内容长度: {len(request.content)} 字符")
# 执行转换
result = convert_markdown_to_docx(
markdown_text=request.content,
use_template=request.use_template
)
if not result["success"]:
logger.error(f"转换失败: {result.get('error', 'Unknown error')}")
raise HTTPException(
status_code=500,
detail=result.get("error", "转换失败")
)
# 读取生成的文件
output_path = result["output_path"]
with open(output_path, 'rb') as f:
content = f.read()
# 清理临时文件
try:
os.remove(output_path)
except Exception as e:
logger.warning(f"清理临时文件失败: {e}")
logger.info(f"Markdown → Word 转换成功, 文件大小: {len(content)} bytes")
# 返回文件
return Response(
content=content,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={
"Content-Disposition": f'attachment; filename="document.docx"'
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Markdown → Word 转换失败: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"转换失败: {str(e)}"
)
@app.post("/api/protocol/export/docx")
async def export_protocol_to_docx(request: ProtocolToDocxRequest):
"""
研究方案导出为 Word 接口
将分章节的研究方案内容导出为格式化的 Word 文档
Args:
request: ProtocolToDocxRequest
- sections: 章节内容字典
- title: 文档标题
Returns:
Word 文档二进制数据
"""
try:
logger.info(f"开始导出研究方案, 章节数: {len(request.sections)}")
# 执行转换
result = create_protocol_docx(
sections=request.sections,
title=request.title
)
if not result["success"]:
logger.error(f"导出失败: {result.get('error', 'Unknown error')}")
raise HTTPException(
status_code=500,
detail=result.get("error", "导出失败")
)
# 读取生成的文件
output_path = result["output_path"]
with open(output_path, 'rb') as f:
content = f.read()
# 清理临时文件
try:
os.remove(output_path)
except Exception as e:
logger.warning(f"清理临时文件失败: {e}")
logger.info(f"研究方案导出成功, 文件大小: {len(content)} bytes")
# 返回文件
return Response(
content=content,
media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
headers={
"Content-Disposition": f'attachment; filename="research_protocol.docx"'
}
)
except HTTPException:
raise
except Exception as e:
logger.error(f"研究方案导出失败: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"导出失败: {str(e)}"
)
# ==================== 启动配置 ====================
if __name__ == "__main__":

View File

@@ -78,5 +78,6 @@ __version__ = '1.0.0'

View File

@@ -211,5 +211,6 @@ def get_missing_summary(df: pd.DataFrame) -> dict:

View File

@@ -171,5 +171,6 @@ def apply_filter(

View File

@@ -335,5 +335,6 @@ def get_unpivot_preview(

View File

@@ -10,6 +10,7 @@ pdfplumber==0.10.3 # 备用 PDF 处理
# Word处理
mammoth==1.6.0 # Docx → Markdown
python-docx==1.1.0 # Docx 读取
pypandoc>=1.13 # Markdown → Docx (需要系统安装 pandoc)
# Excel/CSV处理
pandas>=2.0.0 # 表格处理

View File

@@ -0,0 +1,218 @@
"""
文档导出服务 - Markdown 转 Word
功能:
- Markdown → Docx 转换(使用 Pandoc
- 支持自定义 Word 模板Reference Doc
- 保证输出格式符合伦理委员会要求
"""
import os
import tempfile
from pathlib import Path
from typing import Optional
from loguru import logger
# 尝试导入 pypandoc
try:
import pypandoc
PANDOC_AVAILABLE = True
except ImportError:
PANDOC_AVAILABLE = False
logger.warning("pypandoc 未安装Word 导出功能不可用")
# 模板目录
ASSETS_DIR = Path(__file__).parent / "assets"
DEFAULT_TEMPLATE = ASSETS_DIR / "protocol_template.docx"
def check_pandoc_available() -> dict:
"""
检查 Pandoc 是否可用
Returns:
{
"available": bool,
"version": str,
"message": str
}
"""
if not PANDOC_AVAILABLE:
return {
"available": False,
"version": None,
"message": "pypandoc 未安装,请运行: pip install pypandoc"
}
try:
version = pypandoc.get_pandoc_version()
return {
"available": True,
"version": version,
"message": f"Pandoc {version} 已就绪"
}
except OSError as e:
return {
"available": False,
"version": None,
"message": f"Pandoc 未安装或不在 PATH 中: {str(e)}。请安装 Pandoc: https://pandoc.org/installing.html"
}
def convert_markdown_to_docx(
markdown_text: str,
output_path: Optional[str] = None,
use_template: bool = True,
template_path: Optional[str] = None
) -> dict:
"""
将 Markdown 文本转换为 Word 文档
Args:
markdown_text: Markdown 格式的文本
output_path: 输出文件路径(可选,不提供则创建临时文件)
use_template: 是否使用模板
template_path: 自定义模板路径(可选)
Returns:
{
"success": bool,
"output_path": str, # 生成的文件路径
"file_size": int, # 文件大小(字节)
"message": str,
"error": str # 仅在失败时存在
}
"""
# 检查 Pandoc 可用性
pandoc_status = check_pandoc_available()
if not pandoc_status["available"]:
return {
"success": False,
"output_path": None,
"file_size": 0,
"message": pandoc_status["message"],
"error": "Pandoc 不可用"
}
try:
# 确定输出路径
if output_path is None:
# 创建临时文件
fd, output_path = tempfile.mkstemp(suffix='.docx')
os.close(fd)
# 构建 Pandoc 参数
extra_args = []
# 使用模板
if use_template:
if template_path and Path(template_path).exists():
extra_args.append(f'--reference-doc={template_path}')
elif DEFAULT_TEMPLATE.exists():
extra_args.append(f'--reference-doc={DEFAULT_TEMPLATE}')
logger.info(f"使用默认模板: {DEFAULT_TEMPLATE}")
else:
logger.warning("未找到 Word 模板,将使用 Pandoc 默认样式")
# 注意不自动添加目录TOC因为
# 1. Pandoc 的 TOC 标题是 "Table of Contents",不符合中文要求
# 2. 研究方案正文已有章节结构,无需额外目录
# 如需目录,用户可在 Word 中手动插入
# extra_args.append('--toc')
# extra_args.append('--toc-depth=3')
logger.info(f"开始转换 Markdown → Docx, 文本长度: {len(markdown_text)} 字符")
# 执行转换
pypandoc.convert_text(
markdown_text,
'docx',
format='markdown',
outputfile=output_path,
extra_args=extra_args
)
# 获取文件大小
file_size = os.path.getsize(output_path)
logger.info(f"转换成功: {output_path}, 大小: {file_size} bytes")
return {
"success": True,
"output_path": output_path,
"file_size": file_size,
"message": f"成功生成 Word 文档 ({file_size} bytes)"
}
except Exception as e:
logger.error(f"Markdown → Docx 转换失败: {str(e)}")
return {
"success": False,
"output_path": None,
"file_size": 0,
"message": "转换失败",
"error": str(e)
}
def create_protocol_docx(
sections: dict,
output_path: Optional[str] = None,
title: str = "临床研究方案"
) -> dict:
"""
根据分章节内容生成研究方案 Word 文档
Args:
sections: 章节内容字典
{
"title": "研究题目",
"background": "研究背景内容...",
"objectives": "研究目的内容...",
...
}
output_path: 输出文件路径
title: 文档标题
Returns:
转换结果
"""
# 章节配置(顺序和标题)
section_config = [
("title", "1. 研究题目"),
("background", "2. 研究背景与立题依据"),
("objectives", "3. 研究目的"),
("design", "4. 研究设计"),
("subjects", "5. 研究对象(纳入/排除标准)"),
("sample_size", "6. 样本量估算"),
("implementation", "7. 研究实施步骤与技术路线"),
("endpoints", "8. 观察指标"),
("data_management", "9. 数据管理与质量控制"),
("safety", "10. 安全性评价"),
("statistics", "11. 统计分析计划"),
("ethics", "12. 伦理与知情同意"),
("timeline", "13. 研究时间表"),
("references", "14. 参考文献"),
]
# 组装 Markdown
markdown_parts = [f"# {title}\n\n"]
for key, heading in section_config:
content = sections.get(key, "")
if content:
markdown_parts.append(f"## {heading}\n\n{content}\n\n")
markdown_text = "".join(markdown_parts)
return convert_markdown_to_docx(markdown_text, output_path)
# 导出函数
__all__ = [
"check_pandoc_available",
"convert_markdown_to_docx",
"create_protocol_docx",
]

View File

@@ -150,3 +150,4 @@ def extract_pdf_to_markdown(pdf_path: str) -> Dict[str, Any]:

View File

@@ -345,5 +345,6 @@ if __name__ == "__main__":

View File

@@ -111,5 +111,6 @@ except Exception as e:

View File

@@ -91,5 +91,6 @@ except Exception as e: