Files
AIclinicalresearch/extraction_service/services/file_utils.py

89 lines
1.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
文件工具函数
"""
import os
from pathlib import Path
from loguru import logger
def detect_file_type(filename: str) -> str:
"""
根据文件名检测文件类型
Args:
filename: 文件名
Returns:
文件类型: 'pdf' | 'docx' | 'txt'
Raises:
ValueError: 不支持的文件格式
"""
ext = filename.lower().split('.')[-1]
if ext == 'pdf':
return 'pdf'
elif ext == 'docx':
return 'docx'
elif ext == 'txt':
return 'txt'
else:
raise ValueError(f"不支持的文件格式: .{ext}")
def cleanup_temp_file(file_path: Path | str) -> None:
"""
清理临时文件
Args:
file_path: 文件路径
"""
try:
if isinstance(file_path, str):
file_path = Path(file_path)
if file_path.exists():
file_path.unlink()
logger.debug(f"清理临时文件: {file_path}")
except Exception as e:
logger.warning(f"清理临时文件失败: {str(e)}")
def get_file_size_mb(file_path: Path | str) -> float:
"""
获取文件大小MB
Args:
file_path: 文件路径
Returns:
文件大小MB
"""
if isinstance(file_path, str):
file_path = Path(file_path)
if file_path.exists():
return file_path.stat().st_size / (1024 * 1024)
return 0.0
def validate_file_size(file_size: int, max_size: int = 52428800) -> bool:
"""
验证文件大小
Args:
file_size: 文件大小(字节)
max_size: 最大允许大小字节默认50MB
Returns:
是否通过验证
"""
return file_size <= max_size