- Add one-click research protocol generation with streaming output - Implement Word document export via Pandoc integration - Add dynamic dual-panel layout with resizable split pane - Implement collapsible content for StatePanel stages - Add conversation history management with title auto-update - Fix scroll behavior, markdown rendering, and UI layout issues - Simplify conversation creation logic for reliability
219 lines
6.3 KiB
Python
219 lines
6.3 KiB
Python
"""
|
||
文档导出服务 - Markdown 转 Word
|
||
|
||
功能:
|
||
- Markdown → Docx 转换(使用 Pandoc)
|
||
- 支持自定义 Word 模板(Reference Doc)
|
||
- 保证输出格式符合伦理委员会要求
|
||
"""
|
||
|
||
import os
|
||
import tempfile
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
from loguru import logger
|
||
|
||
# 尝试导入 pypandoc
|
||
try:
|
||
import pypandoc
|
||
PANDOC_AVAILABLE = True
|
||
except ImportError:
|
||
PANDOC_AVAILABLE = False
|
||
logger.warning("pypandoc 未安装,Word 导出功能不可用")
|
||
|
||
|
||
# 模板目录
|
||
ASSETS_DIR = Path(__file__).parent / "assets"
|
||
DEFAULT_TEMPLATE = ASSETS_DIR / "protocol_template.docx"
|
||
|
||
|
||
def check_pandoc_available() -> dict:
|
||
"""
|
||
检查 Pandoc 是否可用
|
||
|
||
Returns:
|
||
{
|
||
"available": bool,
|
||
"version": str,
|
||
"message": str
|
||
}
|
||
"""
|
||
if not PANDOC_AVAILABLE:
|
||
return {
|
||
"available": False,
|
||
"version": None,
|
||
"message": "pypandoc 未安装,请运行: pip install pypandoc"
|
||
}
|
||
|
||
try:
|
||
version = pypandoc.get_pandoc_version()
|
||
return {
|
||
"available": True,
|
||
"version": version,
|
||
"message": f"Pandoc {version} 已就绪"
|
||
}
|
||
except OSError as e:
|
||
return {
|
||
"available": False,
|
||
"version": None,
|
||
"message": f"Pandoc 未安装或不在 PATH 中: {str(e)}。请安装 Pandoc: https://pandoc.org/installing.html"
|
||
}
|
||
|
||
|
||
def convert_markdown_to_docx(
|
||
markdown_text: str,
|
||
output_path: Optional[str] = None,
|
||
use_template: bool = True,
|
||
template_path: Optional[str] = None
|
||
) -> dict:
|
||
"""
|
||
将 Markdown 文本转换为 Word 文档
|
||
|
||
Args:
|
||
markdown_text: Markdown 格式的文本
|
||
output_path: 输出文件路径(可选,不提供则创建临时文件)
|
||
use_template: 是否使用模板
|
||
template_path: 自定义模板路径(可选)
|
||
|
||
Returns:
|
||
{
|
||
"success": bool,
|
||
"output_path": str, # 生成的文件路径
|
||
"file_size": int, # 文件大小(字节)
|
||
"message": str,
|
||
"error": str # 仅在失败时存在
|
||
}
|
||
"""
|
||
# 检查 Pandoc 可用性
|
||
pandoc_status = check_pandoc_available()
|
||
if not pandoc_status["available"]:
|
||
return {
|
||
"success": False,
|
||
"output_path": None,
|
||
"file_size": 0,
|
||
"message": pandoc_status["message"],
|
||
"error": "Pandoc 不可用"
|
||
}
|
||
|
||
try:
|
||
# 确定输出路径
|
||
if output_path is None:
|
||
# 创建临时文件
|
||
fd, output_path = tempfile.mkstemp(suffix='.docx')
|
||
os.close(fd)
|
||
|
||
# 构建 Pandoc 参数
|
||
extra_args = []
|
||
|
||
# 使用模板
|
||
if use_template:
|
||
if template_path and Path(template_path).exists():
|
||
extra_args.append(f'--reference-doc={template_path}')
|
||
elif DEFAULT_TEMPLATE.exists():
|
||
extra_args.append(f'--reference-doc={DEFAULT_TEMPLATE}')
|
||
logger.info(f"使用默认模板: {DEFAULT_TEMPLATE}")
|
||
else:
|
||
logger.warning("未找到 Word 模板,将使用 Pandoc 默认样式")
|
||
|
||
# 注意:不自动添加目录(TOC),因为:
|
||
# 1. Pandoc 的 TOC 标题是 "Table of Contents",不符合中文要求
|
||
# 2. 研究方案正文已有章节结构,无需额外目录
|
||
# 如需目录,用户可在 Word 中手动插入
|
||
# extra_args.append('--toc')
|
||
# extra_args.append('--toc-depth=3')
|
||
|
||
logger.info(f"开始转换 Markdown → Docx, 文本长度: {len(markdown_text)} 字符")
|
||
|
||
# 执行转换
|
||
pypandoc.convert_text(
|
||
markdown_text,
|
||
'docx',
|
||
format='markdown',
|
||
outputfile=output_path,
|
||
extra_args=extra_args
|
||
)
|
||
|
||
# 获取文件大小
|
||
file_size = os.path.getsize(output_path)
|
||
|
||
logger.info(f"转换成功: {output_path}, 大小: {file_size} bytes")
|
||
|
||
return {
|
||
"success": True,
|
||
"output_path": output_path,
|
||
"file_size": file_size,
|
||
"message": f"成功生成 Word 文档 ({file_size} bytes)"
|
||
}
|
||
|
||
except Exception as e:
|
||
logger.error(f"Markdown → Docx 转换失败: {str(e)}")
|
||
return {
|
||
"success": False,
|
||
"output_path": None,
|
||
"file_size": 0,
|
||
"message": "转换失败",
|
||
"error": str(e)
|
||
}
|
||
|
||
|
||
def create_protocol_docx(
|
||
sections: dict,
|
||
output_path: Optional[str] = None,
|
||
title: str = "临床研究方案"
|
||
) -> dict:
|
||
"""
|
||
根据分章节内容生成研究方案 Word 文档
|
||
|
||
Args:
|
||
sections: 章节内容字典
|
||
{
|
||
"title": "研究题目",
|
||
"background": "研究背景内容...",
|
||
"objectives": "研究目的内容...",
|
||
...
|
||
}
|
||
output_path: 输出文件路径
|
||
title: 文档标题
|
||
|
||
Returns:
|
||
转换结果
|
||
"""
|
||
# 章节配置(顺序和标题)
|
||
section_config = [
|
||
("title", "1. 研究题目"),
|
||
("background", "2. 研究背景与立题依据"),
|
||
("objectives", "3. 研究目的"),
|
||
("design", "4. 研究设计"),
|
||
("subjects", "5. 研究对象(纳入/排除标准)"),
|
||
("sample_size", "6. 样本量估算"),
|
||
("implementation", "7. 研究实施步骤与技术路线"),
|
||
("endpoints", "8. 观察指标"),
|
||
("data_management", "9. 数据管理与质量控制"),
|
||
("safety", "10. 安全性评价"),
|
||
("statistics", "11. 统计分析计划"),
|
||
("ethics", "12. 伦理与知情同意"),
|
||
("timeline", "13. 研究时间表"),
|
||
("references", "14. 参考文献"),
|
||
]
|
||
|
||
# 组装 Markdown
|
||
markdown_parts = [f"# {title}\n\n"]
|
||
|
||
for key, heading in section_config:
|
||
content = sections.get(key, "")
|
||
if content:
|
||
markdown_parts.append(f"## {heading}\n\n{content}\n\n")
|
||
|
||
markdown_text = "".join(markdown_parts)
|
||
|
||
return convert_markdown_to_docx(markdown_text, output_path)
|
||
|
||
|
||
# 导出函数
|
||
__all__ = [
|
||
"check_pandoc_available",
|
||
"convert_markdown_to_docx",
|
||
"create_protocol_docx",
|
||
]
|
||
|