feat(pkb): implement complete batch processing workflow and frontend optimization

- Frontend V3 architecture migration to modules/pkb
- Implement three work modes: full-text reading, deep reading, batch processing
- Complete batch processing: template selection, progress display, result export (CSV)
- Integrate Ant Design X Chat component with streaming support
- Add document upload modal with drag-and-drop support
- Optimize UI: multi-line table display, citation formatting, auto-scroll
- Fix 10+ technical issues: API mapping, state sync, form clearing
- Update documentation: development records and module status

Performance: 3 docs batch processing ~17-28s
Status: PKB module now production-ready (90% complete)
This commit is contained in:
2026-01-07 18:23:43 +08:00
parent e59676342a
commit 06028c6952
195 changed files with 1405 additions and 272 deletions

View File

@@ -4,7 +4,7 @@
*/
import React, { useState, useEffect } from 'react';
import { Button, Table, Progress, Alert, message, Card, Steps, Checkbox, Radio } from 'antd';
import { Button, Table, Progress, message, Card, Steps, Checkbox, Radio, Tooltip } from 'antd';
import { Play, Download, RotateCw, FileText, CheckCircle2, Zap } from 'lucide-react';
import type { KnowledgeBase, Document } from '../../api/knowledgeBaseApi';
@@ -31,42 +31,21 @@ interface BatchResult {
error?: string;
}
// 模板配置ID必须与后端 PRESET_TEMPLATES 匹配)
const TEMPLATES: BatchTemplate[] = [
{
id: 'clinicalResearch',
id: 'clinical_research', // ✅ 必须与后端模板ID匹配
name: '临床研究信息提取',
description: '提取研究目的、方法、样本量、结论等核心信息',
description: '提取研究目的、设计、对象、样本量、干预、对照、结果、证据等级',
fields: [
{ key: 'title', label: '研究标题' },
{ key: 'purpose', label: '研究目的' },
{ key: 'method', label: '研究方法' },
{ key: 'sampleSize', label: '样本量' },
{ key: 'intervention', label: '干预措施' },
{ key: 'outcome', label: '主要结局' },
{ key: 'conclusion', label: '研究结论' },
{ key: 'limitation', label: '研究局限' },
],
},
{
id: 'drug_safety',
name: '药物安全性分析',
description: '提取药物不良反应、禁忌症、注意事项等',
fields: [
{ key: 'drugName', label: '药物名称' },
{ key: 'adverseReactions', label: '不良反应' },
{ key: 'contraindications', label: '禁忌症' },
{ key: 'warnings', label: '警告事项' },
],
},
{
id: 'patient_baseline',
name: '患者基线特征',
description: '提取患者年龄、性别、诊断、既往史等基线信息',
fields: [
{ key: 'age', label: '年龄' },
{ key: 'gender', label: '性别' },
{ key: 'diagnosis', label: '主要诊断' },
{ key: 'comorbidities', label: '合并症' },
{ key: 'research_purpose', label: '研究目的' },
{ key: 'research_design', label: '研究设计' },
{ key: 'research_subjects', label: '研究对象' },
{ key: 'sample_size', label: '样本量' },
{ key: 'intervention_group', label: '干预' },
{ key: 'control_group', label: '对照组' },
{ key: 'results_data', label: '结果及数据' },
{ key: 'oxford_level', label: '牛津评级' },
],
},
];
@@ -80,9 +59,19 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
const [selectedTemplate, setSelectedTemplate] = useState<BatchTemplate | null>(TEMPLATES[0]);
const [selectedDocs, setSelectedDocs] = useState<string[]>([]);
const [results, setResults] = useState<BatchResult[]>([]);
const [, setIsExecuting] = useState(false);
const [isExecuting, setIsExecuting] = useState(false);
const completedDocs = documents.filter(doc => doc.status === 'completed');
// 过滤出已完成解析的文档,并去重(确保唯一性)
const completedDocs = React.useMemo(() => {
const seen = new Set<string>();
return documents.filter(doc => {
if (doc.status === 'completed' && !seen.has(doc.id)) {
seen.add(doc.id);
return true;
}
return false;
});
}, [documents]);
useEffect(() => {
// 初始化模板(如果有传入则使用,否则默认第一个)
@@ -92,16 +81,18 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
}
}, [initialTemplate]);
// 处理文档选择
const handleDocSelect = (docId: string, checked: boolean) => {
if (checked) {
// 切换文档选择状态toggle模式参考旧版实现
const handleToggleDocument = (docId: string) => {
if (selectedDocs.includes(docId)) {
// 已选中 -> 取消选择
setSelectedDocs(prev => prev.filter(id => id !== docId));
} else {
// 未选中 -> 添加选择
if (selectedDocs.length >= 50) {
message.warning('最多选择50篇文档');
return;
}
setSelectedDocs(prev => [...prev, docId]);
} else {
setSelectedDocs(prev => prev.filter(id => id !== docId));
}
};
@@ -130,11 +121,11 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
setStep(1);
setIsExecuting(true);
// 初始化结果
const initialResults: BatchResult[] = selectedDocs.map(docId => {
const doc = documents.find(d => d.id === docId);
// 初始化结果 - 使用唯一索引确保key唯一
const initialResults: BatchResult[] = selectedDocs.map((docId, index) => {
const doc = completedDocs.find(d => d.id === docId);
return {
documentId: docId,
documentId: `${docId}-${index}`, // 确保唯一性
documentName: doc?.filename || '未知文档',
status: 'pending',
progress: 0,
@@ -143,81 +134,185 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
setResults(initialResults);
try {
// 调用批处理API
// 调用批处理API使用v2新版API
// 完整路径: /api/v2/pkb/batch-tasks/batch/execute
// 请求体格式必须匹配后端 ExecuteBatchBody 接口
const response = await fetch('/api/v2/pkb/batch-tasks/batch/execute', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
kb_id: kbId,
template_id: selectedTemplate.id,
document_ids: selectedDocs,
kb_id: kbId, // 后端期望 kb_id
document_ids: selectedDocs, // 后端期望 document_ids
template_type: 'preset', // 使用预设模板
template_id: selectedTemplate.id, // 后端期望 template_id
model_type: 'qwen-long', // 使用qwen-long模型
task_name: `${selectedTemplate.name}_${new Date().toLocaleString('zh-CN')}`,
}),
});
if (!response.ok) {
throw new Error('批处理执行失败');
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.message || '批处理执行失败');
}
const data = await response.json();
const taskId = data.task_id;
const responseData = await response.json();
console.log('[BatchMode] API响应:', responseData);
// 后端返回格式: { success: true, data: { task_id: xxx } }
const taskId = responseData.data?.task_id || responseData.taskId || responseData.task_id;
if (!taskId) {
console.error('[BatchMode] 未获取到taskId:', responseData);
message.error('批处理任务创建失败未获取到任务ID');
setIsExecuting(false);
setStep(0);
return;
}
console.log('[BatchMode] 任务已创建,开始轮询:', taskId);
// 轮询任务状态
const pollInterval = setInterval(async () => {
try {
const statusRes = await fetch(`/api/v2/pkb/batch-tasks/${taskId}`);
const statusData = await statusRes.json();
const statusRes = await fetch(`/api/v2/pkb/batch-tasks/batch/tasks/${taskId}`);
if (!statusRes.ok) {
console.error('[BatchMode] 获取任务状态失败:', statusRes.status);
return;
}
const statusJson = await statusRes.json();
const taskData = statusJson.data || statusJson;
console.log('[BatchMode] 任务状态:', taskData);
// 更新进度
// 计算进度 - 基于后端返回的completed_count
const completedCount = taskData.completed_count || 0;
const failedCount = taskData.failed_count || 0;
const processedCount = completedCount + failedCount;
// 更新每个文档的状态(根据处理进度模拟)
setResults(prev => prev.map((r, idx) => {
const docResult = statusData.results?.[idx];
if (docResult) {
return {
...r,
status: docResult.status,
progress: docResult.progress || 0,
result: docResult.result,
error: docResult.error,
};
if (idx < completedCount) {
// 已完成
return { ...r, status: 'completed' as const, progress: 100 };
} else if (idx < processedCount) {
// 失败
return { ...r, status: 'error' as const, progress: 100 };
} else if (idx === processedCount && taskData.status === 'processing') {
// 正在处理
return { ...r, status: 'processing' as const, progress: 50 };
}
return r;
}));
// 检查是否全部完成
if (statusData.status === 'completed' || statusData.status === 'failed') {
if (taskData.status === 'completed' || taskData.status === 'failed') {
clearInterval(pollInterval);
setIsExecuting(false);
setStep(2);
if (statusData.status === 'completed') {
message.success('批处理完成!');
// 获取最终结果
try {
const resultsRes = await fetch(`/api/v2/pkb/batch-tasks/batch/tasks/${taskId}/results`);
console.log('[BatchMode] 获取结果响应状态:', resultsRes.status);
if (resultsRes.ok) {
const resultsJson = await resultsRes.json();
console.log('[BatchMode] 结果数据:', JSON.stringify(resultsJson, null, 2));
const resultsData = resultsJson.data?.results || [];
console.log('[BatchMode] 解析到的结果数量:', resultsData.length);
if (resultsData.length > 0) {
// 构建新的结果数组 - 后端返回的提取数据在 data 字段中
const newResults: BatchResult[] = resultsData.map((docResult: any, idx: number) => {
console.log(`[BatchMode] 文档 ${idx}:`, {
id: docResult.document_id,
name: docResult.document_name,
status: docResult.status,
hasData: !!docResult.data,
dataKeys: docResult.data ? Object.keys(docResult.data) : [],
});
// 🔑 后端返回的状态是 "success" 或 "failed",需要映射为前端的 "completed" 或 "error"
const isSuccess = docResult.status === 'success' || docResult.status === 'completed';
return {
documentId: docResult.document_id || `doc-${idx}`,
documentName: docResult.document_name || `文档${idx + 1}`,
status: isSuccess ? 'completed' as const : 'error' as const,
progress: 100,
result: docResult.data, // 后端返回的提取数据
error: docResult.error_message,
};
});
console.log('[BatchMode] 更新结果:', newResults);
setResults(newResults);
} else {
console.warn('[BatchMode] 没有结果数据');
}
} else {
console.error('[BatchMode] 获取结果失败,状态码:', resultsRes.status);
}
} catch (e) {
console.error('[BatchMode] 获取结果异常:', e);
}
// 延迟设置step确保状态更新完成
setTimeout(() => {
setStep(2);
}, 100);
if (taskData.status === 'completed') {
message.success(`批处理完成!成功 ${completedCount} 篇,失败 ${failedCount}`);
} else {
message.error('批处理失败');
message.error('批处理失败: ' + (taskData.error || '未知错误'));
}
}
} catch (error) {
console.error('轮询任务状态失败:', error);
console.error('[BatchMode] 轮询任务状态失败:', error);
}
}, 2000);
// 设置超时保护5分钟
setTimeout(() => {
clearInterval(pollInterval);
if (isExecuting) {
setIsExecuting(false);
setStep(2);
message.warning('任务执行超时,请稍后查看结果');
}
}, 5 * 60 * 1000);
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : '执行失败';
message.error(errorMessage);
setIsExecuting(false);
setStep(0); // 返回配置步骤
}
};
// 导出Excel
// 导出Excel (CSV格式)
const handleExport = () => {
if (!selectedTemplate) return;
// 构建CSV数据
const completedResults = results.filter(r => r.status === 'completed' && r.result);
if (completedResults.length === 0) {
message.warning('没有可导出的结果');
return;
}
// 构建CSV数据 - 使用模板定义的字段
const headers = ['文档名称', ...selectedTemplate.fields.map(f => f.label)];
const rows = results
.filter(r => r.status === 'completed' && r.result)
.map(r => [
r.documentName,
...selectedTemplate.fields.map(f => r.result?.[f.key] || '-'),
]);
const rows = completedResults.map(r => [
r.documentName,
...selectedTemplate.fields.map(f => {
const value = r.result?.[f.key];
if (!value) return '-';
// 处理换行符和引号
return String(value).replace(/"/g, '""').replace(/\n/g, ' ');
}),
]);
const csvContent = [
headers.join(','),
@@ -228,8 +323,11 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
const blob = new Blob(['\ufeff' + csvContent], { type: 'text/csv;charset=utf-8;' });
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = `批处理结果_${selectedTemplate.name}_${new Date().toISOString().split('T')[0]}.csv`;
const timestamp = new Date().toISOString().split('T')[0];
link.download = `批处理结果_${selectedTemplate.name}_${timestamp}.csv`;
link.click();
message.success(`已导出 ${completedResults.length} 条结果`);
};
// 重置
@@ -247,9 +345,9 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
current={step}
size="small"
items={[
{ title: '配置任务', description: '选择模板和文档' },
{ title: '执行中', description: '正在处理文档' },
{ title: '查看结果', description: '导出数据' },
{ title: '配置任务', subTitle: '选择模板和文档' },
{ title: '执行中', subTitle: '正在处理文档' },
{ title: '查看结果', subTitle: '导出数据' },
]}
/>
</div>
@@ -348,14 +446,12 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
? 'bg-blue-50 border border-blue-200'
: 'hover:bg-gray-50 border border-transparent'
}`}
onClick={() => handleDocSelect(doc.id, !selectedDocs.includes(doc.id))}
onClick={() => handleToggleDocument(doc.id)}
>
<Checkbox
checked={selectedDocs.includes(doc.id)}
onChange={(e) => {
e.stopPropagation();
handleDocSelect(doc.id, e.target.checked);
}}
onChange={() => handleToggleDocument(doc.id)}
onClick={(e) => e.stopPropagation()}
/>
<div className="w-8 h-8 bg-red-50 text-red-500 rounded flex items-center justify-center ml-3 mr-3 flex-shrink-0">
<FileText className="w-4 h-4" />
@@ -375,29 +471,32 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
</div>
<div className="mt-4 pt-4 border-t border-gray-100">
<Alert
message={
<span className="text-sm">
<strong className="text-blue-600">{selectedDocs.length}</strong>
{selectedDocs.length < 3 && <span className="text-orange-600 ml-2">3</span>}
</span>
}
type={selectedDocs.length >= 3 ? 'success' : 'warning'}
showIcon
/>
<div className={`flex items-center p-3 rounded-lg ${
selectedDocs.length >= 3
? 'bg-green-50 border border-green-200'
: 'bg-yellow-50 border border-yellow-200'
}`}>
<CheckCircle2 className={`w-4 h-4 mr-2 ${
selectedDocs.length >= 3 ? 'text-green-500' : 'text-yellow-500'
}`} />
<span className="text-sm">
<strong className="text-blue-600">{selectedDocs.length}</strong>
{selectedDocs.length < 3 && <span className="text-orange-600 ml-2">3</span>}
</span>
</div>
</div>
</Card>
{/* 开始按钮 */}
{/* 开始按钮 - 显示实际选择的文档数量 */}
<Button
type="primary"
size="large"
icon={<Play className="w-4 h-4" />}
onClick={handleExecute}
disabled={selectedDocs.length < 3 || !selectedTemplate}
disabled={selectedDocs.length < 3 || selectedDocs.length > 50 || !selectedTemplate}
className="w-full h-12 text-base font-medium shadow-lg"
>
({selectedDocs.length} )
🚀 ({selectedDocs.length} )
</Button>
</div>
</div>
@@ -471,37 +570,84 @@ export const BatchModeComplete: React.FC<BatchModeCompleteProps> = ({
</div>
<div className="flex-1 overflow-auto bg-white border border-gray-200 rounded-lg shadow-sm">
<Table
dataSource={results.filter(r => r.status === 'completed')}
rowKey="documentId"
pagination={false}
scroll={{ x: 'max-content', y: '100%' }}
size="small"
columns={[
{
title: '文档名称',
dataIndex: 'documentName',
key: 'documentName',
fixed: 'left',
width: 200,
ellipsis: true,
render: (text) => (
<div className="flex items-center">
<FileText className="w-4 h-4 text-red-400 mr-2 flex-shrink-0" />
<span className="truncate">{text}</span>
</div>
),
},
...selectedTemplate.fields.map(field => ({
title: field.label,
dataIndex: ['result', field.key],
key: field.key,
width: 180,
ellipsis: true,
render: (text: string) => text || '-',
})),
]}
/>
{results.filter(r => r.status === 'completed' && r.result).length === 0 ? (
<div className="flex items-center justify-center h-full text-slate-400">
<div className="text-center">
<FileText className="w-12 h-12 mx-auto mb-3 opacity-50" />
<p></p>
<p className="text-xs mt-1"></p>
</div>
</div>
) : (
<Table
dataSource={results.filter(r => r.status === 'completed' && r.result)}
rowKey="documentId"
pagination={false}
scroll={{ x: 1800, y: 'calc(100vh - 400px)' }}
size="small"
className="batch-results-table"
columns={[
{
title: '文档名称',
dataIndex: 'documentName',
key: 'documentName',
fixed: 'left',
width: 220,
render: (text: string) => (
<Tooltip title={text} placement="topLeft">
<div className="flex items-center">
<FileText className="w-4 h-4 text-red-400 mr-2 flex-shrink-0" />
<span
className="font-medium text-slate-800"
style={{
display: '-webkit-box',
WebkitLineClamp: 2,
WebkitBoxOrient: 'vertical',
overflow: 'hidden',
lineHeight: '1.4',
}}
>
{text}
</span>
</div>
</Tooltip>
),
},
...selectedTemplate.fields.map(field => ({
title: field.label,
key: field.key,
width: field.key === 'results_data' ? 280 : 180, // 结果数据列更宽
render: (_: unknown, record: BatchResult) => {
const value = record.result?.[field.key];
if (!value) return <span className="text-slate-300">-</span>;
const textValue = String(value);
return (
<Tooltip
title={<div style={{ maxWidth: 400, maxHeight: 300, overflow: 'auto' }}>{textValue}</div>}
placement="topLeft"
overlayStyle={{ maxWidth: 450 }}
>
<div
className="text-slate-700 text-sm leading-relaxed cursor-help"
style={{
display: '-webkit-box',
WebkitLineClamp: 3, // 最多显示3行
WebkitBoxOrient: 'vertical',
overflow: 'hidden',
lineHeight: '1.5',
}}
>
{textValue}
</div>
</Tooltip>
);
},
})),
]}
/>
)}
</div>
</div>
)}