feat(asl): Week 2 Day 2 - Excel import with template download and intelligent dedup

Features:
- feat: Excel template generation and download (with examples)
- feat: Excel file parsing in memory (cloud-native, no disk write)
- feat: Field validation (title + abstract required)
- feat: Smart deduplication (DOI priority + Title fallback)
- feat: Literature preview table with statistics
- feat: Complete submission flow (create project + import literatures)

Components:
- feat: Create excelUtils.ts with full Excel processing toolkit
- feat: Enhance TitleScreeningSettings page with upload/preview/submit
- feat: Update API interface signatures and export unified aslApi object

Dependencies:
- chore: Add xlsx library for Excel file processing

Ref: Week 2 Frontend Development - Day 2
Scope: ASL Module MVP - Title Abstract Screening
Cloud-Native: Memory parsing, no file persistence
This commit is contained in:
2025-11-19 10:24:47 +08:00
parent 3634933ece
commit 2e8699c217
178 changed files with 1937 additions and 108 deletions

View File

@@ -142,3 +142,4 @@ export const PermissionProvider = ({ children }: PermissionProviderProps) => {

View File

@@ -17,3 +17,4 @@ export { VERSION_LEVEL, checkVersionLevel } from './types'

View File

@@ -89,3 +89,4 @@ export const checkVersionLevel = (

View File

@@ -46,3 +46,4 @@ export type { UserInfo, UserVersion, PermissionContextType } from './types'

View File

@@ -156,3 +156,4 @@ export default PermissionDenied

View File

@@ -145,3 +145,4 @@ export default RouteGuard

View File

@@ -15,3 +15,4 @@ export { default as PermissionDenied } from './PermissionDenied'

View File

@@ -20,3 +20,4 @@ export default AIAModule

View File

@@ -101,15 +101,13 @@ export async function deleteProject(
/**
* 批量导入文献JSON格式
*/
export async function importLiteratures(
projectId: string,
data: ImportLiteraturesRequest
): Promise<ApiResponse<{
imported: number;
duplicates: number;
failed: number;
export async function importLiteratures(data: {
projectId: string;
literatures: Array<Omit<Literature, 'id' | 'projectId' | 'createdAt'>>;
}): Promise<ApiResponse<{
importedCount: number;
}>> {
return request(`/projects/${projectId}/literatures/import-json`, {
return request('/literatures/import', {
method: 'POST',
body: JSON.stringify(data),
});
@@ -265,3 +263,39 @@ export async function healthCheck(): Promise<ApiResponse<{
return request('/health');
}
// ==================== 统一导出API对象 ====================
/**
* ASL API统一导出对象
*/
export const aslApi = {
// 项目管理
createProject,
listProjects,
getProject,
updateProject,
deleteProject,
// 文献管理
importLiteratures,
listLiteratures,
deleteLiterature,
// 筛选任务
startScreening,
getTaskProgress,
// 筛选结果
getScreeningResults,
updateScreeningResult,
batchUpdateScreeningResults,
// 导出
exportScreeningResults,
// 统计
getProjectStatistics,
// 健康检查
healthCheck,
};

View File

@@ -150,3 +150,4 @@ const ASLLayout = () => {
export default ASLLayout;

View File

@@ -2,11 +2,12 @@
* 标题摘要初筛 - 设置与启动页面
*
* 功能:
* 1. Excel文献导入上传 + 模板下载
* 2. PICOS标准配置
* 3. 纳入/排除标准配置
* 4. 筛选风格选择
* 5. 启动AI筛选
* 1. Excel文献导入上传 + 解析 + 去重
* 2. Excel模板下载
* 3. PICOS标准配置
* 4. 纳入/排除标准配置
* 5. 筛选风格选择
* 6. 启动AI筛选
*/
import { useState } from 'react';
@@ -25,13 +26,26 @@ import {
Divider,
Row,
Col,
Table,
Statistic,
Tag,
} from 'antd';
import type { UploadFile } from 'antd';
import {
InboxOutlined,
QuestionCircleOutlined,
DownloadOutlined,
PlayCircleOutlined,
CheckCircleOutlined,
ExclamationCircleOutlined,
} from '@ant-design/icons';
import {
downloadExcelTemplate,
processExcelFile,
LiteratureData,
ParseStatistics
} from '../utils/excelUtils';
import { aslApi } from '../api';
const { TextArea } = Input;
const { Dragger } = Upload;
@@ -39,69 +53,267 @@ const { Dragger } = Upload;
const TitleScreeningSettings = () => {
const navigate = useNavigate();
const [form] = Form.useForm();
const [fileList, setFileList] = useState<any[]>([]);
const [literatureCount, setLiteratureCount] = useState(0);
// Excel上传相关状态
const [fileList, setFileList] = useState<UploadFile[]>([]);
const [literatures, setLiteratures] = useState<LiteratureData[]>([]);
const [parseStats, setParseStats] = useState<ParseStatistics | null>(null);
const [isUploading, setIsUploading] = useState(false);
const [canStart, setCanStart] = useState(false);
// 提交相关状态
const [isSubmitting, setIsSubmitting] = useState(false);
// 处理Excel上传
const handleFileUpload = async (file: File) => {
/**
* 下载Excel模板
*/
const handleDownloadTemplate = () => {
try {
// TODO: Week 2 Day 2 实现Excel解析
// 这里只是占位逻辑
setFileList([file]);
setLiteratureCount(100); // 模拟导入100篇文献
setCanStart(true);
message.success(`成功导入 100 篇文献`);
downloadExcelTemplate();
message.success('Excel模板下载成功');
} catch (error) {
message.error('模板下载失败');
}
};
/**
* 处理Excel文件上传
*/
const handleFileUpload = async (file: File): Promise<boolean> => {
try {
setIsUploading(true);
message.loading({ content: '正在解析Excel文件...', key: 'parsing' });
// 解析Excel文件
const { valid, statistics } = await processExcelFile(file);
setLiteratures(valid);
setParseStats(statistics);
setFileList([{
uid: String(Date.now()),
name: file.name,
status: 'done',
originFileObj: file as any,
}]);
// 检查是否可以启动筛选
const formValid = await form.validateFields()
.then(() => true)
.catch(() => false);
setCanStart(formValid && valid.length > 0);
message.success({
content: `Excel解析成功${statistics.total} 条,有效 ${statistics.afterDedup}`,
key: 'parsing',
duration: 3,
});
// 如果有错误,显示警告
if (statistics.invalid > 0) {
message.warning(`${statistics.invalid} 条数据验证失败,已自动过滤`, 3);
}
if (statistics.duplicates > 0) {
message.info(`检测到 ${statistics.duplicates} 条重复数据,已自动去重`, 3);
}
return false; // 阻止自动上传
} catch (error) {
message.error('文献导入失败');
message.error({
content: `文件解析失败: ${(error as Error).message}`,
key: 'parsing',
});
return false;
} finally {
setIsUploading(false);
}
};
// 下载Excel模板
const handleDownloadTemplate = () => {
// TODO: Week 2 Day 2 实现模板下载
message.info('Excel模板下载功能开发中...');
/**
* 移除上传的文件
*/
const handleFileRemove = () => {
setFileList([]);
setLiteratures([]);
setParseStats(null);
setCanStart(false);
};
// 启动筛选
const handleStartScreening = async () => {
/**
* 提交表单并启动筛选
*/
const handleSubmit = async () => {
try {
setIsSubmitting(true);
// 1. 验证表单
const values = await form.validateFields();
// TODO: Week 2 调用后端API启动筛选
console.log('启动筛选:', values);
// 2. 检查是否已导入文献
if (literatures.length === 0) {
message.warning('请先导入文献');
return;
}
message.success('AI筛选已启动正在处理中...');
message.loading({ content: '正在创建项目...', key: 'submit' });
// 3. 创建项目
const createProjectResponse = await aslApi.createProject({
projectName: `标题摘要初筛 - ${new Date().toLocaleDateString()}`,
picoCriteria: values.picoCriteria,
inclusionCriteria: values.inclusionCriteria,
exclusionCriteria: values.exclusionCriteria,
screeningConfig: {
style: values.screeningConfig.style,
models: ['DeepSeek-V3', 'Qwen-Max'], // 默认双模型
},
});
if (!createProjectResponse.success || !createProjectResponse.data) {
throw new Error('项目创建失败');
}
const projectId = createProjectResponse.data.id;
message.loading({ content: '正在导入文献...', key: 'submit' });
// 4. 导入文献
const importResponse = await aslApi.importLiteratures({
projectId,
literatures: literatures.map(lit => ({
title: lit.title,
abstract: lit.abstract,
pmid: lit.pmid,
authors: lit.authors,
journal: lit.journal,
publicationYear: lit.publicationYear,
doi: lit.doi,
})),
});
if (!importResponse.success) {
throw new Error('文献导入失败');
}
// 5. TODO: 启动筛选任务Week 2 Day 3-4实现
// await aslApi.startScreening(projectId);
message.success({
content: '项目创建成功!正在跳转到审核工作台...',
key: 'submit',
duration: 2,
});
// 6. 跳转到审核工作台
setTimeout(() => {
navigate('/literature/screening/title/workbench', {
state: { projectId },
});
}, 1000);
// 跳转到审核工作台
navigate('/literature/screening/title/workbench');
} catch (error) {
message.error('请完整填写筛选标准');
message.error({
content: `操作失败: ${(error as Error).message}`,
key: 'submit',
});
} finally {
setIsSubmitting(false);
}
};
/**
* 文献预览表格列定义
*/
const literatureColumns = [
{
title: '#',
dataIndex: 'tempId',
key: 'index',
width: 60,
render: (_: any, __: any, index: number) => index + 1,
},
{
title: '标题',
dataIndex: 'title',
key: 'title',
width: '35%',
ellipsis: { showTitle: false },
render: (text: string) => (
<Tooltip title={text} placement="topLeft">
<span>{text}</span>
</Tooltip>
),
},
{
title: '摘要',
dataIndex: 'abstract',
key: 'abstract',
width: '30%',
ellipsis: { showTitle: false },
render: (text: string) => (
<Tooltip title={text} placement="topLeft">
<span>{text.substring(0, 100)}...</span>
</Tooltip>
),
},
{
title: 'PMID',
dataIndex: 'pmid',
key: 'pmid',
width: 100,
render: (text: string) => text || '-',
},
{
title: '年份',
dataIndex: 'publicationYear',
key: 'year',
width: 80,
render: (year: number) => year || '-',
},
{
title: '作者',
dataIndex: 'authors',
key: 'authors',
ellipsis: { showTitle: false },
render: (text: string) => (
<Tooltip title={text}>
<span>{text || '-'}</span>
</Tooltip>
),
},
];
return (
<div className="p-6 max-w-7xl mx-auto">
{/* 页面标题 */}
<div className="mb-6">
<h1 className="text-2xl font-bold mb-2"> - </h1>
<p className="text-gray-500">
PICOS标准/AI筛选
AI智能初筛流程
</p>
</div>
<Form
form={form}
layout="vertical"
onFinish={handleSubmit}
initialValues={{
screeningStyle: 'standard',
picoCriteria: {
P: '',
I: '',
C: '',
O: '',
S: '',
},
inclusionCriteria: '',
exclusionCriteria: '',
screeningConfig: {
style: 'standard',
},
}}
>
{/* 步骤1: 配置筛选标准 */}
<Card title="步骤1: 配置筛选标准" className="mb-6">
{/* PICOS标准 */}
<Alert
message="PICOS标准"
description="系统评价研究问题的标准化框架,请详细填写每个维度"
@@ -117,7 +329,7 @@ const TitleScreeningSettings = () => {
label={
<span className="text-base font-semibold">
P - (Population)
<Tooltip title="研究对象的特征,如年龄、性别、疾病类型等。可以包含主要人群和亚组人群。">
<Tooltip title="研究对象的特征,如年龄、性别、疾病类型等">
<QuestionCircleOutlined className="ml-2 text-gray-400" />
</Tooltip>
</span>
@@ -127,7 +339,7 @@ const TitleScreeningSettings = () => {
>
<TextArea
rows={10}
placeholder="例如:&#10;Patients with non-cardioembolic ischemic stroke (NCIS) 非心源性缺血性卒中、亚洲人群&#10;亚组人群:&#10;1. NIHSS评分亚组卒中人群mild/moderate stroke&#10;2. 不同TOAST分型different TOAST subtypesexcluding cardioembolic stroke&#10;3. 高危TIA人群high-risk TIA population&#10;..."
placeholder="例如:&#10;Patients with non-cardioembolic ischemic stroke (NCIS) 非心源性缺血性卒中、亚洲人群&#10;亚组人群:&#10;1. NIHSS评分亚组卒中人群mild/moderate stroke&#10;..."
className="font-mono text-sm"
/>
</Form.Item>
@@ -213,7 +425,7 @@ const TitleScreeningSettings = () => {
<Divider />
{/* 纳入标准 & 排除标准 - 并排显示 */}
{/* 纳入标准 & 排除标准 */}
<Row gutter={16}>
<Col span={12}>
<Form.Item
@@ -230,7 +442,7 @@ const TitleScreeningSettings = () => {
>
<TextArea
rows={10}
placeholder="详细的纳入标准,例如:&#10;1. 非心源性缺血性卒中、亚洲患者&#10;2. 包含二级预防相关研究&#10;3. 涉及抗血小板或抗凝药物&#10;4. 研究类型SR、RCT、RWE、OBS&#10;5. 近五年2020年之后的文献&#10;..."
placeholder="详细的纳入标准,例如:&#10;1. 非心源性缺血性卒中、亚洲患者&#10;2. 包含二级预防相关研究&#10;3. 涉及抗血小板或抗凝药物&#10;..."
className="font-mono text-sm"
/>
</Form.Item>
@@ -251,7 +463,7 @@ const TitleScreeningSettings = () => {
>
<TextArea
rows={10}
placeholder="详细的排除标准,例如:&#10;1. 心源性卒中患者、非亚洲人群&#10;2. 急性期治疗研究(无二级预防关键词)&#10;3. 病例报告、会议摘要&#10;4. 非中英文文献&#10;5. 混合人群研究&#10;..."
placeholder="详细的排除标准,例如:&#10;1. 心源性卒中患者、非亚洲人群&#10;2. 急性期治疗研究(无二级预防关键词)&#10;3. 病例报告、会议摘要&#10;..."
className="font-mono text-sm"
/>
</Form.Item>
@@ -262,86 +474,194 @@ const TitleScreeningSettings = () => {
{/* 筛选风格 */}
<Form.Item
label={<span className="text-base font-semibold"></span>}
name="screeningStyle"
extra="提示:初筛推荐宽松模式,精筛推荐严格模式"
label={
<span className="text-base font-semibold">
<Tooltip title="选择AI筛选的严格程度">
<QuestionCircleOutlined className="ml-2 text-gray-400" />
</Tooltip>
</span>
}
name={['screeningConfig', 'style']}
>
<Radio.Group size="large">
<Space direction="vertical" className="w-full">
<Radio value="lenient">
🔓 -
</Radio>
<Radio value="standard">
-
</Radio>
<Radio value="strict">
🔒 -
</Radio>
</Space>
<Radio.Group>
<Radio.Button value="lenient">
<Space>
🔓
<Tooltip title="初筛推荐,宁可多纳入不错过">
<QuestionCircleOutlined />
</Tooltip>
</Space>
</Radio.Button>
<Radio.Button value="standard">
</Radio.Button>
<Radio.Button value="strict">
<Space>
🔒
<Tooltip title="精筛推荐,保证质量">
<QuestionCircleOutlined />
</Tooltip>
</Space>
</Radio.Button>
</Radio.Group>
</Form.Item>
</Card>
{/* 步骤2: 导入文献 */}
<Card title="步骤2: 导入文献" className="mb-6">
<div className="text-center">
<Dragger
accept=".xlsx,.xls"
maxCount={1}
fileList={fileList}
beforeUpload={handleFileUpload}
onRemove={() => {
setFileList([]);
setLiteratureCount(0);
setCanStart(false);
}}
>
<p className="ant-upload-drag-icon">
<InboxOutlined style={{ fontSize: 48, color: '#1890ff' }} />
</p>
<p className="ant-upload-text">Excel文件到此区域</p>
<p className="ant-upload-hint">
.xlsx .xls Title Abstract
</p>
</Dragger>
<Alert
message="支持Excel格式"
description="请上传包含文献标题和摘要的Excel文件。Title和Abstract为必填字段其他字段可选。系统会自动根据DOI和Title去重。"
type="info"
showIcon
className="mb-4"
/>
<div className="mt-4">
<Button
icon={<DownloadOutlined />}
onClick={handleDownloadTemplate}
<div className="flex items-start justify-between space-x-4 mb-6">
<div className="flex-1">
<Dragger
name="file"
multiple={false}
accept=".xlsx,.xls"
fileList={fileList}
beforeUpload={handleFileUpload as any}
onRemove={handleFileRemove}
disabled={isUploading}
>
Excel模板
</Button>
<p className="ant-upload-drag-icon">
<InboxOutlined />
</p>
<p className="ant-upload-text">Excel文件到此区域</p>
<p className="ant-upload-hint">
.xlsx .xls
</p>
</Dragger>
</div>
{literatureCount > 0 && (
<Alert
message={`已导入 ${literatureCount} 篇文献`}
type="success"
showIcon
className="mt-4"
/>
)}
<Button
icon={<DownloadOutlined />}
size="large"
onClick={handleDownloadTemplate}
className="flex-shrink-0"
>
Excel模板
</Button>
</div>
{/* 解析统计信息 */}
{parseStats && (
<div className="bg-gray-50 p-4 rounded-lg mb-4">
<Row gutter={16}>
<Col span={6}>
<Statistic
title="总数"
value={parseStats.total}
suffix="篇"
prefix={<CheckCircleOutlined style={{ color: '#52c41a' }} />}
/>
</Col>
<Col span={6}>
<Statistic
title="有效"
value={parseStats.afterDedup}
suffix="篇"
valueStyle={{ color: '#3f8600' }}
prefix={<CheckCircleOutlined />}
/>
</Col>
<Col span={6}>
<Statistic
title="重复"
value={parseStats.duplicates}
suffix="篇"
valueStyle={{ color: '#faad14' }}
/>
</Col>
<Col span={6}>
<Statistic
title="无效"
value={parseStats.invalid}
suffix="篇"
valueStyle={{ color: '#cf1322' }}
prefix={parseStats.invalid > 0 ? <ExclamationCircleOutlined /> : undefined}
/>
</Col>
</Row>
{parseStats.invalid > 0 && parseStats.errors.length > 0 && (
<Alert
message="部分数据验证失败"
description={
<div className="text-xs">
{parseStats.errors.map((err, idx) => (
<div key={idx}> {err}</div>
))}
{parseStats.errors.length >= 5 && (
<div className="mt-1 text-gray-400">...</div>
)}
</div>
}
type="warning"
showIcon
className="mt-4"
/>
)}
</div>
)}
{/* 文献预览表格 */}
{literatures.length > 0 && (
<div className="mt-4">
<div className="flex items-center justify-between mb-3">
<h4 className="text-base font-semibold">
<Tag color="blue" className="ml-2"> {literatures.length} </Tag>
</h4>
</div>
<Table
columns={literatureColumns}
dataSource={literatures}
rowKey="tempId"
pagination={{
pageSize: 50,
showSizeChanger: false,
showTotal: (total) => `${total} 篇文献`,
}}
size="small"
scroll={{ x: 'max-content' }}
/>
</div>
)}
</Card>
{/* 步骤3: 启动AI初筛 */}
<Card title="步骤3: 启动AI初筛">
<Button
type="primary"
size="large"
icon={<PlayCircleOutlined />}
htmlType="submit"
disabled={!canStart}
loading={isSubmitting}
className="w-full"
>
{isSubmitting ? '正在创建项目并导入文献...' : '开始AI标题摘要初筛'}
</Button>
{!canStart && literatures.length === 0 && (
<Alert
message="请先完成以上步骤"
description="填写PICOS标准、纳入/排除标准,并导入文献后,即可开始筛选"
type="warning"
showIcon
className="mt-4"
/>
)}
</Card>
</Form>
{/* 启动按钮 */}
<div className="text-center">
<Button
type="primary"
size="large"
icon={<PlayCircleOutlined />}
onClick={handleStartScreening}
disabled={!canStart}
className="px-12"
>
{canStart ? '开始AI筛选' : '请先导入文献'}
</Button>
</div>
</div>
);
};
export default TitleScreeningSettings;

View File

@@ -221,3 +221,4 @@ export interface ExclusionReasons {
other: number;
}

View File

@@ -0,0 +1,276 @@
/**
* Excel工具函数
* 包含模板生成、文件解析、去重等功能
*/
import * as XLSX from 'xlsx';
/**
* 文献数据接口临时用于Excel解析
*/
export interface LiteratureData {
tempId?: string;
title: string;
abstract: string;
pmid?: string;
authors?: string;
journal?: string;
publicationYear?: number;
doi?: string;
}
/**
* 解析统计信息
*/
export interface ParseStatistics {
total: number; // 总数
afterDedup: number; // 去重后数量
duplicates: number; // 重复数量
invalid: number; // 无效数量
errors: string[]; // 错误信息列表
}
/**
* 生成并下载Excel模板
*/
export function downloadExcelTemplate(): void {
// 创建工作簿
const wb = XLSX.utils.book_new();
// 模板数据(包含示例)
const templateData = [
{
'Title': 'Effect of Empagliflozin on Cardiovascular Outcomes in Type 2 Diabetes',
'Abstract': 'Background: The effects of empagliflozin, a sodium-glucose cotransporter 2 inhibitor, in addition to standard care, on cardiovascular morbidity and mortality in patients with type 2 diabetes at high cardiovascular risk are not known. Methods: We randomly assigned patients...',
'PMID': '26378978',
'Authors': 'Zinman B, Wanner C, Lachin JM, et al',
'Journal': 'N Engl J Med',
'Year': 2015,
'DOI': '10.1056/NEJMoa1504720'
},
{
'Title': 'Dapagliflozin and Cardiovascular Outcomes in Type 2 Diabetes',
'Abstract': 'Background: Additional therapeutic interventions are needed to reduce the risk of cardiovascular events in patients with type 2 diabetes mellitus. Methods: We randomly assigned patients with type 2 diabetes...',
'PMID': '30415602',
'Authors': 'Wiviott SD, Raz I, Bonaca MP, et al',
'Journal': 'N Engl J Med',
'Year': 2019,
'DOI': '10.1056/NEJMoa1812389'
},
{
'Title': '请删除此示例行,并填写您自己的文献数据',
'Abstract': '摘要至少需要50个字符。Title和Abstract是必填字段其他字段可选。系统会自动根据DOI和Title去重。',
'PMID': '',
'Authors': '',
'Journal': '',
'Year': '',
'DOI': ''
}
];
// 创建工作表
const ws = XLSX.utils.json_to_sheet(templateData);
// 设置列宽
ws['!cols'] = [
{ wch: 60 }, // Title
{ wch: 80 }, // Abstract
{ wch: 12 }, // PMID
{ wch: 40 }, // Authors
{ wch: 30 }, // Journal
{ wch: 8 }, // Year
{ wch: 25 }, // DOI
];
// 添加工作表到工作簿
XLSX.utils.book_append_sheet(wb, ws, '文献列表');
// 添加说明工作表
const instructionData = [
{ '字段名': 'Title', '是否必填': '✅ 是', '说明': '文献标题至少10个字符' },
{ '字段名': 'Abstract', '是否必填': '✅ 是', '说明': '文献摘要至少50个字符' },
{ '字段名': 'PMID', '是否必填': '❌ 否', '说明': 'PubMed ID' },
{ '字段名': 'Authors', '是否必填': '❌ 否', '说明': '作者列表' },
{ '字段名': 'Journal', '是否必填': '❌ 否', '说明': '期刊名称' },
{ '字段名': 'Year', '是否必填': '❌ 否', '说明': '发表年份' },
{ '字段名': 'DOI', '是否必填': '❌ 否', '说明': 'DOI编号用于去重' },
];
const wsInstruction = XLSX.utils.json_to_sheet(instructionData);
wsInstruction['!cols'] = [
{ wch: 15 },
{ wch: 12 },
{ wch: 50 },
];
XLSX.utils.book_append_sheet(wb, wsInstruction, '字段说明');
// 生成Excel文件并下载
XLSX.writeFile(wb, '文献导入模板.xlsx');
}
/**
* 解析Excel文件内存中不落盘
*/
export async function parseExcelFile(file: File): Promise<LiteratureData[]> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e) => {
try {
const buffer = e.target?.result as ArrayBuffer;
const workbook = XLSX.read(buffer, { type: 'array' });
// 读取第一个工作表
const firstSheetName = workbook.SheetNames[0];
const worksheet = workbook.Sheets[firstSheetName];
const jsonData = XLSX.utils.sheet_to_json<any>(worksheet);
// 字段映射(支持中英文)
const literatures: LiteratureData[] = jsonData.map((row, index) => {
// 处理年份字段
let year: number | undefined = undefined;
const yearValue = row.Year || row.year || row['年份'];
if (yearValue) {
const parsed = parseInt(String(yearValue));
if (!isNaN(parsed)) {
year = parsed;
}
}
return {
tempId: `temp-${Date.now()}-${index}`,
title: String(row.Title || row.title || row['标题'] || '').trim(),
abstract: String(row.Abstract || row.abstract || row['摘要'] || '').trim(),
pmid: String(row.PMID || row.pmid || row['PMID编号'] || '').trim() || undefined,
authors: String(row.Authors || row.authors || row['作者'] || '').trim() || undefined,
journal: String(row.Journal || row.journal || row['期刊'] || '').trim() || undefined,
publicationYear: year,
doi: String(row.DOI || row.doi || '').trim() || undefined,
};
});
resolve(literatures);
} catch (error) {
reject(new Error(`Excel文件解析失败: ${(error as Error).message}`));
}
};
reader.onerror = () => {
reject(new Error('文件读取失败'));
};
reader.readAsArrayBuffer(file);
});
}
/**
* 验证单条文献数据
*/
export function validateLiterature(lit: LiteratureData): string[] {
const errors: string[] = [];
if (!lit.title) {
errors.push('标题不能为空');
} else if (lit.title.length < 10) {
errors.push('标题太短至少10个字符');
}
if (!lit.abstract) {
errors.push('摘要不能为空');
} else if (lit.abstract.length < 50) {
errors.push('摘要太短至少50个字符');
}
return errors;
}
/**
* 批量验证文献数据
*/
export function validateLiteratures(literatures: LiteratureData[]): {
valid: LiteratureData[];
invalid: Array<{ literature: LiteratureData; errors: string[] }>;
} {
const valid: LiteratureData[] = [];
const invalid: Array<{ literature: LiteratureData; errors: string[] }> = [];
literatures.forEach((lit) => {
const errors = validateLiterature(lit);
if (errors.length === 0) {
valid.push(lit);
} else {
invalid.push({ literature: lit, errors });
}
});
return { valid, invalid };
}
/**
* 去重逻辑
* 优先级DOI > Title标准化
*/
export function deduplicateLiteratures(literatures: LiteratureData[]): {
unique: LiteratureData[];
duplicates: LiteratureData[];
} {
const seen = new Map<string, LiteratureData>();
const unique: LiteratureData[] = [];
const duplicates: LiteratureData[] = [];
for (const lit of literatures) {
let key: string;
// 优先使用DOI作为去重键
if (lit.doi && lit.doi.trim() !== '') {
key = `doi:${lit.doi.toLowerCase().trim()}`;
} else {
// 使用标题(标准化:转小写、去掉所有空白字符)
key = `title:${lit.title.toLowerCase().replace(/\s+/g, '')}`;
}
if (seen.has(key)) {
duplicates.push(lit);
} else {
seen.set(key, lit);
unique.push(lit);
}
}
return { unique, duplicates };
}
/**
* 完整的Excel处理流程
* 解析 → 验证 → 去重
*/
export async function processExcelFile(file: File): Promise<{
valid: LiteratureData[];
statistics: ParseStatistics;
}> {
// 1. 解析Excel
const parsedData = await parseExcelFile(file);
// 2. 验证数据
const { valid: validData, invalid: invalidData } = validateLiteratures(parsedData);
// 3. 去重
const { unique: uniqueData, duplicates: duplicateData } = deduplicateLiteratures(validData);
// 4. 统计信息
const statistics: ParseStatistics = {
total: parsedData.length,
afterDedup: uniqueData.length,
duplicates: duplicateData.length,
invalid: invalidData.length,
errors: invalidData.slice(0, 5).map((item, index) =>
`${index + 1}: ${item.errors.join(', ')}`
),
};
return {
valid: uniqueData,
statistics,
};
}

View File

@@ -20,3 +20,4 @@ export default DCModule

View File

@@ -20,3 +20,4 @@ export default PKBModule

View File

@@ -24,3 +24,4 @@ export default SSAModule

View File

@@ -24,3 +24,4 @@ export default STModule

View File

@@ -50,3 +50,4 @@ export default Placeholder