feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench

M1 Skeleton Pipeline:
- Scatter-dispatch + Aggregator polling pattern (PgBoss)
- PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs)
- ExtractionSingleWorker with DeepSeek-V3 LLM extraction
- PermanentExtractionError for non-retryable failures
- Phantom Retry Guard (idempotent worker)
- 3-step minimal frontend (Setup -> Progress -> Workbench)
- 4 new DB tables (extraction_templates, project_templates, tasks, results)
- 3 system templates seed (RCT, Cohort, QC)
- M1 integration test suite

M2 HITL Workbench:
- MinerU VLM integration for high-fidelity table extraction
- XML-isolated DynamicPromptBuilder with flat JSON output template
- fuzzyQuoteMatch validator (3-tier confidence scoring)
- SSE real-time logging via ExtractionEventBus
- Schema-driven ExtractionDrawer (dynamic field rendering from template)
- Excel wide-table export with flattenModuleData normalization
- M2 integration test suite

Critical Fixes (data normalization):
- DynamicPromptBuilder: explicit flat key-value output format with example
- ExtractionExcelExporter: handle both array and flat data formats
- ExtractionDrawer: schema-driven rendering instead of hardcoded fields
- ExtractionValidator: array-format quote verification support
- SSE route: Fastify register encapsulation to bypass auth for EventSource
- LLM JSON sanitizer: strip illegal control chars before JSON.parse

Also includes: RVW stats verification spec, SSA expert config guide

Tested: M1 pipeline test + M2 HITL test + manual frontend verification
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-25 18:29:20 +08:00
parent 371fa53956
commit f0736dbca1
40 changed files with 6138 additions and 48 deletions

View File

@@ -528,6 +528,97 @@ export async function getDeepResearchTask(
return request(`/research/tasks/${taskId}`);
}
// ==================== 工具 3全文智能提取 API ====================
export async function getExtractionTemplates(): Promise<ApiResponse<any[]>> {
return request('/extraction/templates');
}
export async function getExtractionTemplate(templateId: string): Promise<ApiResponse<any>> {
return request(`/extraction/templates/${templateId}`);
}
export async function cloneExtractionTemplate(
projectId: string,
baseTemplateId: string
): Promise<ApiResponse<any>> {
return request('/extraction/templates/clone', {
method: 'POST',
body: JSON.stringify({ projectId, baseTemplateId }),
});
}
export async function getExtractionKnowledgeBases(): Promise<ApiResponse<any[]>> {
return request('/extraction/knowledge-bases');
}
export async function getExtractionDocuments(kbId: string): Promise<ApiResponse<any[]>> {
return request(`/extraction/knowledge-bases/${kbId}/documents`);
}
export async function createExtractionTask(params: {
projectId: string;
projectTemplateId: string;
pkbKnowledgeBaseId: string;
documentIds: string[];
idempotencyKey?: string;
}): Promise<ApiResponse<{ taskId: string }>> {
return request('/extraction/tasks', {
method: 'POST',
body: JSON.stringify(params),
});
}
export async function getExtractionTaskStatus(
taskId: string
): Promise<ApiResponse<{
taskId: string;
status: string;
totalCount: number;
completedCount: number;
errorCount: number;
extractingCount: number;
pendingCount: number;
percent: number;
}>> {
return request(`/extraction/tasks/${taskId}`);
}
export async function getExtractionTaskResults(
taskId: string
): Promise<ApiResponse<any[]>> {
return request(`/extraction/tasks/${taskId}/results`);
}
export async function getExtractionResultDetail(
resultId: string
): Promise<ApiResponse<any>> {
return request(`/extraction/results/${resultId}`);
}
export async function reviewExtractionResult(
resultId: string,
data: { reviewStatus: 'approved' | 'rejected' }
): Promise<ApiResponse<any>> {
return request(`/extraction/results/${resultId}/review`, {
method: 'PUT',
body: JSON.stringify(data),
});
}
export async function exportExtractionResults(
taskId: string
): Promise<Blob> {
const response = await fetch(
`${API_BASE_URL}/extraction/tasks/${taskId}/export`,
{ headers: getAuthHeaders() }
);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
return response.blob();
}
// ==================== 统一导出API对象 ====================
/**
@@ -584,4 +675,17 @@ export const aslApi = {
generateRequirement,
executeDeepResearchTask,
getDeepResearchTask,
// 工具 3全文智能提取
getExtractionTemplates,
getExtractionTemplate,
cloneExtractionTemplate,
getExtractionKnowledgeBases,
getExtractionDocuments,
createExtractionTask,
getExtractionTaskStatus,
getExtractionTaskResults,
getExtractionResultDetail,
reviewExtractionResult,
exportExtractionResults,
};

View File

@@ -95,11 +95,21 @@ const ASLLayout = () => {
],
},
{
key: 'data-extraction',
key: 'extraction',
icon: <DatabaseOutlined />,
label: '6. 全文解析与数据提取',
disabled: true,
title: '敬请期待'
label: '6. 全文智能提取',
children: [
{
key: '/literature/extraction/setup',
icon: <SettingOutlined />,
label: '配置与启动',
},
{
key: '/literature/extraction/workbench',
icon: <CheckSquareOutlined />,
label: '审核工作台',
},
],
},
{
key: 'data-analysis',
@@ -125,6 +135,7 @@ const ASLLayout = () => {
const getOpenKeys = () => {
if (currentPath.includes('screening/title')) return ['title-screening'];
if (currentPath.includes('screening/fulltext')) return ['fulltext-screening'];
if (currentPath.includes('/extraction')) return ['extraction'];
return [];
};
const openKeys = getOpenKeys();

View File

@@ -0,0 +1,302 @@
/**
* 智能审核抽屉 — 700px 右侧 Drawer
* 动态读取后端返回的 schema模板字段定义来构建审核面板
* 每个字段下方附带 QuoteBlock 溯源展示
* 底部 Footer: [取消] + [核准保存]
*/
import { useState, useEffect, useMemo } from 'react';
import { Drawer, Collapse, Button, Space, Spin, Typography, message } from 'antd';
import {
CheckCircleOutlined,
FileTextOutlined,
UserOutlined,
SafetyCertificateOutlined,
BarChartOutlined,
FilePdfOutlined,
} from '@ant-design/icons';
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { aslApi } from '../../api';
import ExtractionStatusBadge from './ExtractionStatusBadge';
import FieldGroup from './FieldGroup';
const { Text } = Typography;
const MODULE_ICONS: Record<string, React.ReactNode> = {
metadata: <FileTextOutlined />,
baseline: <UserOutlined />,
rob: <SafetyCertificateOutlined />,
outcomes: <BarChartOutlined />,
};
const MODULE_LABELS: Record<string, string> = {
metadata: '基础元数据',
baseline: '基线特征',
rob: '偏倚风险评估',
outcomes_survival: '结局指标(生存)',
outcomes_dichotomous: '结局指标(二分类)',
outcomes_continuous: '结局指标(连续)',
};
/**
* 将 LLM 返回的模块数据归一化为扁平 key-value 映射
* 兼容: [{key, value, quote}] 数组 | {field: value, field_quote: "..."} | {field: {value, quote}}
*/
function flattenModuleData(moduleData: any): Record<string, any> {
if (!moduleData) return {};
if (Array.isArray(moduleData)) {
const flat: Record<string, any> = {};
for (const item of moduleData) {
if (typeof item === 'object' && item !== null && 'key' in item) {
flat[item.key] = item.value ?? null;
if (item.quote) flat[`${item.key}_quote`] = item.quote;
}
}
return flat;
}
if (typeof moduleData === 'object') {
const flat: Record<string, any> = {};
for (const [k, v] of Object.entries(moduleData)) {
if (typeof v === 'object' && v !== null && !Array.isArray(v) && 'value' in (v as any)) {
flat[k] = (v as any).value ?? null;
if ((v as any).quote) flat[`${k}_quote`] = (v as any).quote;
} else {
flat[k] = v;
}
}
return flat;
}
return {};
}
function getStudyId(data: any): string {
if (!data) return '';
const meta = data.metadata;
if (!meta) return '';
const flat = flattenModuleData(meta);
const v = flat.study_id;
if (v === null || v === undefined) return '';
return String(v);
}
interface Props {
open: boolean;
resultId: string | null;
taskId: string;
onClose: () => void;
onSaved: () => void;
}
const ExtractionDrawer: React.FC<Props> = ({ open, resultId, taskId: _taskId, onClose, onSaved }) => {
const queryClient = useQueryClient();
const [activeKeys, setActiveKeys] = useState<string[]>(['metadata']);
const { data: resultResp, isLoading } = useQuery({
queryKey: ['extraction-result-detail', resultId],
queryFn: () => aslApi.getExtractionResultDetail(resultId!),
enabled: open && !!resultId,
});
const result = resultResp?.data;
useEffect(() => {
if (open) setActiveKeys(['metadata']);
}, [open, resultId]);
const reviewMutation = useMutation({
mutationFn: (status: 'approved' | 'rejected') =>
aslApi.reviewExtractionResult(resultId!, { reviewStatus: status }),
onSuccess: () => {
message.success('审核已保存');
queryClient.invalidateQueries({ queryKey: ['extraction-result-detail', resultId] });
onSaved();
},
onError: (err: any) => {
message.error(err.message || '审核保存失败');
},
});
const collapseItems = useMemo(() => {
if (!result?.extractedData) return [];
const data = result.extractedData as Record<string, any>;
const quoteVerification = (result.quoteVerification || {}) as Record<string, any>;
const schema = result.schema as Record<string, any[]> | undefined;
// Determine module list from schema or fallback to data keys
const moduleKeys = schema
? Object.keys(schema)
: Object.keys(data);
return moduleKeys.map((modKey) => {
const schemaFields = schema?.[modKey] as Array<{ key: string; label?: string; type?: string }> | undefined;
// Find matching data (handle outcomes_* prefix matching)
const rawModule = data[modKey]
|| (modKey.startsWith('outcomes') ? Object.entries(data).find(([k]) => k.startsWith('outcomes_'))?.[1] : null);
const flat = flattenModuleData(rawModule);
const rawQuotes = quoteVerification[modKey]
|| (modKey.startsWith('outcomes') ? Object.entries(quoteVerification).find(([k]) => k.startsWith('outcomes_'))?.[1] : null);
const flatQuotes = flattenModuleData(rawQuotes);
// Build field list from schema (ordered) + extra keys from data
const seenKeys = new Set<string>();
const fields: Array<{ key: string; label: string; value: any; quoteVerification?: any }> = [];
if (schemaFields) {
for (const sf of schemaFields) {
if (!sf.key) continue;
seenKeys.add(sf.key);
const val = flat[sf.key];
if (val === undefined || val === null) continue;
fields.push({
key: sf.key,
label: sf.label || sf.key.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase()),
value: val,
quoteVerification: flatQuotes[sf.key]
|| (flat[`${sf.key}_quote`] ? { confidence: 'medium' as const, quote: flat[`${sf.key}_quote`] } : undefined),
});
}
}
// Append extra fields not in schema
for (const k of Object.keys(flat)) {
if (k.endsWith('_quote') || seenKeys.has(k)) continue;
seenKeys.add(k);
fields.push({
key: k,
label: k.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase()),
value: flat[k],
quoteVerification: flatQuotes[k]
|| (flat[`${k}_quote`] ? { confidence: 'medium' as const, quote: flat[`${k}_quote`] } : undefined),
});
}
const baseModKey = modKey.startsWith('outcomes_') ? 'outcomes' : modKey;
const icon = MODULE_ICONS[baseModKey] || <FileTextOutlined />;
const defaultLabel = MODULE_LABELS[modKey] || modKey.replace(/_/g, ' ').replace(/\b\w/g, c => c.toUpperCase());
return {
key: modKey,
label: (
<span>
{icon}
<span className="ml-2">{defaultLabel}</span>
<span className="text-gray-400 text-xs ml-2">({fields.length} )</span>
</span>
),
children: <FieldGroup fields={fields} readOnly={result.reviewStatus === 'approved'} />,
};
});
}, [result]);
const isApproved = result?.reviewStatus === 'approved';
const studyId = result ? getStudyId(result.extractedData) : '';
return (
<Drawer
title={
result ? (
<div className="flex items-center justify-between w-full pr-4">
<Space>
<ExtractionStatusBadge status={result.reviewStatus || 'pending_review'} />
<Text strong className="text-base">
{studyId || result.snapshotFilename}
</Text>
</Space>
</div>
) : (
'加载中...'
)
}
styles={{ wrapper: { width: 700 } }}
open={open}
onClose={onClose}
destroyOnClose
footer={
result && (
<div className="flex justify-between">
<Button onClick={onClose}></Button>
<Space>
{!isApproved && (
<Button
danger
onClick={() => reviewMutation.mutate('rejected')}
loading={reviewMutation.isPending}
>
</Button>
)}
<Button
type="primary"
icon={<CheckCircleOutlined />}
onClick={() => reviewMutation.mutate('approved')}
loading={reviewMutation.isPending}
disabled={isApproved}
>
{isApproved ? '已核准' : '核准保存'}
</Button>
</Space>
</div>
)
}
>
{isLoading || !result ? (
<div className="flex justify-center items-center py-20">
<Spin size="large" tip="加载提取结果..." />
</div>
) : (
<>
{result.quoteVerification && (
<div className="mb-4 px-3 py-2 bg-blue-50 border border-blue-200 rounded flex items-center justify-between">
<Space size="middle">
<Text type="secondary" className="text-xs"></Text>
{(() => {
const qv = result.quoteVerification || {};
let high = 0, medium = 0, low = 0, total = 0;
Object.values(qv).forEach((mod: any) => {
if (typeof mod === 'object' && mod !== null) {
Object.values(mod).forEach((field: any) => {
if (field && typeof field === 'object' && 'confidence' in field) {
total++;
if (field.confidence === 'high') high++;
else if (field.confidence === 'medium') medium++;
else low++;
}
});
}
});
return (
<Space size={4}>
<span className="text-green-600 text-xs font-medium">{high} </span>
<span className="text-orange-500 text-xs font-medium">{medium} </span>
<span className="text-red-500 text-xs font-medium">{low} </span>
<span className="text-gray-400 text-xs">/ {total} </span>
</Space>
);
})()}
</Space>
<Button
size="small"
type="link"
icon={<FilePdfOutlined />}
onClick={() => {
message.info('PDF 预览功能开发中');
}}
>
PDF
</Button>
</div>
)}
<Collapse
activeKey={activeKeys}
onChange={(keys) => setActiveKeys(keys as string[])}
items={collapseItems}
/>
</>
)}
</Drawer>
);
};
export default ExtractionDrawer;

View File

@@ -0,0 +1,28 @@
/**
* 提取审核状态标签组件
* pending_review / approved / rejected
*/
import { Tag } from 'antd';
import { CheckCircleOutlined, ClockCircleOutlined, CloseCircleOutlined } from '@ant-design/icons';
interface Props {
status: string;
}
const statusMap: Record<string, { color: string; text: string; icon: React.ReactNode }> = {
pending_review: { color: 'orange', text: '待审核', icon: <ClockCircleOutlined /> },
approved: { color: 'green', text: '已核准', icon: <CheckCircleOutlined /> },
rejected: { color: 'red', text: '已驳回', icon: <CloseCircleOutlined /> },
};
const ExtractionStatusBadge: React.FC<Props> = ({ status }) => {
const { color, text, icon } = statusMap[status] || statusMap.pending_review;
return (
<Tag color={color} icon={icon}>
{text}
</Tag>
);
};
export default ExtractionStatusBadge;

View File

@@ -0,0 +1,67 @@
/**
* 字段组渲染 — 用于审核抽屉内的 Collapse Panel 内容
* 将 extractedData 中一个模块的字段渲染为 label / value / quote 三行结构
*/
import React from 'react';
import { Descriptions, Typography, Empty } from 'antd';
import QuoteBlock from './QuoteBlock';
const { Text } = Typography;
interface FieldItem {
key: string;
label: string;
value: any;
quoteVerification?: {
confidence: 'high' | 'medium' | 'low';
quote?: string;
matchScore?: number;
};
}
interface Props {
fields: FieldItem[];
readOnly?: boolean;
}
function formatValue(val: any): string {
if (val === null || val === undefined) return '-';
if (typeof val === 'object' && val !== null && 'value' in val) return formatValue(val.value);
if (Array.isArray(val)) return val.map(formatValue).join(', ');
if (typeof val === 'object') return JSON.stringify(val, null, 2);
return String(val);
}
const FieldGroup: React.FC<Props> = ({ fields, readOnly = false }) => {
if (!fields || fields.length === 0) {
return <Empty description="暂无提取字段" image={Empty.PRESENTED_IMAGE_SIMPLE} />;
}
return (
<Descriptions
bordered
size="small"
column={1}
labelStyle={{ width: 160, fontSize: 12, backgroundColor: '#fafafa' }}
contentStyle={{ fontSize: 13 }}
>
{fields.map((f) => (
<Descriptions.Item key={f.key} label={f.label}>
<div>
<Text>{formatValue(f.value)}</Text>
{f.quoteVerification && (
<QuoteBlock
value={f.value}
quoteVerification={f.quoteVerification}
readOnly={readOnly}
/>
)}
</div>
</Descriptions.Item>
))}
</Descriptions>
);
};
export default React.memo(FieldGroup);

View File

@@ -0,0 +1,103 @@
/**
* 深色终端日志组件 — 用于 Step 2 提取进度页
* 使用 SSE 推送实时日志,优雅降级为 "暂无日志" 提示
* 颜色方案: [MinerU] 蓝色 / [DeepSeek] 紫色 / [System] 绿色
*/
import { useEffect, useRef, useState } from 'react';
import { Card } from 'antd';
import { CodeOutlined } from '@ant-design/icons';
import useExtractionLogs from '../../hooks/useExtractionLogs';
interface Props {
taskId: string;
}
interface LogEntry {
timestamp: string;
source: string;
message: string;
level: 'info' | 'warn' | 'error';
}
const sourceColorMap: Record<string, string> = {
MinerU: '#58a6ff',
DeepSeek: '#bc8cff',
System: '#7ee787',
Aggregator: '#ffa657',
Worker: '#79c0ff',
};
function getSourceColor(source: string): string {
return sourceColorMap[source] || '#8b949e';
}
function getLevelColor(level: string): string {
if (level === 'error') return '#f85149';
if (level === 'warn') return '#d29922';
return '#c9d1d9';
}
const ProcessingTerminal: React.FC<Props> = ({ taskId }) => {
const containerRef = useRef<HTMLDivElement>(null);
const { logs, connected } = useExtractionLogs(taskId);
const [autoScroll, setAutoScroll] = useState(true);
useEffect(() => {
if (autoScroll && containerRef.current) {
containerRef.current.scrollTop = containerRef.current.scrollHeight;
}
}, [logs, autoScroll]);
const handleScroll = () => {
if (!containerRef.current) return;
const { scrollTop, scrollHeight, clientHeight } = containerRef.current;
setAutoScroll(scrollHeight - scrollTop - clientHeight < 40);
};
return (
<Card
title={
<span className="text-gray-300">
<CodeOutlined className="mr-2" />
{connected && (
<span className="ml-2 inline-block w-2 h-2 rounded-full bg-green-400 animate-pulse" />
)}
</span>
}
className="border-gray-700"
styles={{
header: { backgroundColor: '#1e1e2e', borderBottom: '1px solid #333' },
body: { padding: 0 },
}}
>
<div
ref={containerRef}
onScroll={handleScroll}
className="font-mono text-xs leading-5 overflow-auto"
style={{
backgroundColor: '#0d1117',
height: 320,
padding: '12px 16px',
}}
>
{logs.length === 0 ? (
<div className="text-gray-600 text-center pt-16">
{connected ? '等待日志数据...' : '日志流未连接,进度数据由轮询驱动'}
</div>
) : (
logs.map((log: LogEntry, i: number) => (
<div key={i} className="flex gap-2">
<span style={{ color: '#484f58' }}>{log.timestamp}</span>
<span style={{ color: getSourceColor(log.source) }}>[{log.source}]</span>
<span style={{ color: getLevelColor(log.level) }}>{log.message}</span>
</div>
))
)}
</div>
</Card>
);
};
export default ProcessingTerminal;

View File

@@ -0,0 +1,137 @@
/**
* AI 原文溯源展示块
* - 灰色背景 + 关键数字黄色 mark 高亮
* - 三级置信度 Badgegreen / yellow / red
* - 红色警告时显示 [强制认可] + [手动修改数值] 按钮
*/
import React, { useState } from 'react';
import { Tag, Button, Input, Space, Typography, Tooltip } from 'antd';
import { CheckOutlined, EditOutlined, WarningOutlined } from '@ant-design/icons';
const { Text } = Typography;
interface QuoteVerification {
confidence: 'high' | 'medium' | 'low';
quote?: string;
matchScore?: number;
}
interface Props {
value: any;
quoteVerification?: QuoteVerification;
onForceAccept?: () => void;
onManualEdit?: (newValue: string) => void;
readOnly?: boolean;
}
const confidenceMap = {
high: { color: 'green' as const, text: '高置信度', tooltip: '原文精确匹配' },
medium: { color: 'orange' as const, text: '中置信度', tooltip: '原文模糊匹配,建议核查' },
low: { color: 'red' as const, text: '低置信度', tooltip: '未在原文中找到匹配,需人工校验' },
};
function highlightNumbers(text: string): React.ReactNode[] {
const parts = text.split(/(\d+\.?\d*%?)/g);
return parts.map((part, i) =>
/^\d+\.?\d*%?$/.test(part) ? (
<mark key={i} style={{ backgroundColor: '#fff3b0', padding: '0 2px', borderRadius: 2 }}>
{part}
</mark>
) : (
<React.Fragment key={i}>{part}</React.Fragment>
)
);
}
const QuoteBlock: React.FC<Props> = ({
value,
quoteVerification,
onForceAccept,
onManualEdit,
readOnly = false,
}) => {
const [editing, setEditing] = useState(false);
const [editValue, setEditValue] = useState(String(value ?? ''));
if (!quoteVerification?.quote) return null;
const { confidence, quote, matchScore } = quoteVerification;
const conf = confidenceMap[confidence] || confidenceMap.medium;
return (
<div className="mt-1.5 rounded border border-gray-200 bg-gray-50 px-3 py-2 text-xs">
<div className="flex items-center justify-between mb-1.5">
<Space size={4}>
<Text type="secondary" className="text-xs">AI </Text>
<Tooltip title={conf.tooltip}>
<Tag color={conf.color} className="text-xs m-0">
{conf.text}
{matchScore !== undefined && ` (${(matchScore * 100).toFixed(0)}%)`}
</Tag>
</Tooltip>
</Space>
</div>
<div className="text-gray-600 leading-relaxed italic">
&ldquo;{highlightNumbers(quote)}&rdquo;
</div>
{confidence === 'low' && !readOnly && (
<div className="mt-2 flex gap-2">
{onForceAccept && (
<Button
size="small"
type="dashed"
icon={<CheckOutlined />}
onClick={onForceAccept}
className="text-xs"
>
</Button>
)}
{onManualEdit && !editing && (
<Button
size="small"
danger
icon={<EditOutlined />}
onClick={() => setEditing(true)}
className="text-xs"
>
</Button>
)}
{editing && (
<Space.Compact>
<Input
size="small"
value={editValue}
onChange={(e) => setEditValue(e.target.value)}
style={{ width: 200 }}
/>
<Button
size="small"
type="primary"
onClick={() => {
onManualEdit?.(editValue);
setEditing(false);
}}
>
</Button>
</Space.Compact>
)}
</div>
)}
{confidence === 'low' && (
<div className="mt-1.5 text-red-500 flex items-center gap-1">
<WarningOutlined className="text-xs" />
<span></span>
</div>
)}
</div>
);
};
export default React.memo(QuoteBlock);

View File

@@ -0,0 +1,96 @@
/**
* SSE 连接管理 Hook — 用于 ProcessingTerminal
* 连接 GET /api/v1/asl/extraction/tasks/:taskId/stream
* 优雅降级:连接失败时 connected=false前端仅依赖 React Query 轮询
*/
import { useState, useEffect, useRef, useCallback } from 'react';
import { getAccessToken } from '../../../framework/auth/api';
interface LogEntry {
timestamp: string;
source: string;
message: string;
level: 'info' | 'warn' | 'error';
}
interface UseExtractionLogsResult {
logs: LogEntry[];
connected: boolean;
}
const MAX_LOGS = 500;
export default function useExtractionLogs(taskId: string): UseExtractionLogsResult {
const [logs, setLogs] = useState<LogEntry[]>([]);
const [connected, setConnected] = useState(false);
const eventSourceRef = useRef<EventSource | null>(null);
const retryCountRef = useRef(0);
const maxRetries = 3;
const addLog = useCallback((entry: LogEntry) => {
setLogs((prev) => {
const next = [...prev, entry];
return next.length > MAX_LOGS ? next.slice(next.length - MAX_LOGS) : next;
});
}, []);
useEffect(() => {
if (!taskId) return;
const token = getAccessToken();
const url = `/api/v1/asl/extraction/tasks/${taskId}/stream${token ? `?token=${token}` : ''}`;
function connect() {
const es = new EventSource(url);
eventSourceRef.current = es;
es.onopen = () => {
setConnected(true);
retryCountRef.current = 0;
};
es.addEventListener('sync', (e: MessageEvent) => {
try {
const data = JSON.parse(e.data);
if (Array.isArray(data.logs)) {
setLogs(data.logs.slice(-MAX_LOGS));
}
} catch { /* ignore */ }
});
es.addEventListener('log', (e: MessageEvent) => {
try {
const entry = JSON.parse(e.data) as LogEntry;
addLog(entry);
} catch { /* ignore */ }
});
es.addEventListener('error', (e: MessageEvent) => {
try {
const entry = JSON.parse(e.data) as LogEntry;
addLog({ ...entry, level: 'error' });
} catch { /* ignore */ }
});
es.onerror = () => {
es.close();
setConnected(false);
if (retryCountRef.current < maxRetries) {
retryCountRef.current++;
setTimeout(connect, 2000 * retryCountRef.current);
}
};
}
connect();
return () => {
eventSourceRef.current?.close();
eventSourceRef.current = null;
setConnected(false);
};
}, [taskId, addLog]);
return { logs, connected };
}

View File

@@ -25,6 +25,11 @@ const ResearchSearch = lazy(() => import('./pages/ResearchSearch'));
// Deep Research V2.0
const DeepResearchPage = lazy(() => import('./pages/DeepResearchPage'));
// 工具 3全文智能提取M2 路由拆分)
const ExtractionSetup = lazy(() => import('./pages/ExtractionSetup'));
const ExtractionProgress = lazy(() => import('./pages/ExtractionProgress'));
const ExtractionWorkbench = lazy(() => import('./pages/ExtractionWorkbench'));
const ASLModule = () => {
return (
<Suspense
@@ -60,6 +65,14 @@ const ASLModule = () => {
<Route path="workbench/:taskId" element={<FulltextWorkbench />} />
<Route path="results/:taskId" element={<FulltextResults />} />
</Route>
{/* 工具 3全文智能提取M2 三步路由) */}
<Route path="extraction">
<Route index element={<Navigate to="setup" replace />} />
<Route path="setup" element={<ExtractionSetup />} />
<Route path="progress/:taskId" element={<ExtractionProgress />} />
<Route path="workbench/:taskId" element={<ExtractionWorkbench />} />
</Route>
</Route>
</Routes>
</Suspense>

View File

@@ -0,0 +1,331 @@
/**
* 工具 3全文智能提取 — 状态驱动路由页面
*
* Step 1: 选模板 + 选 PKB 文献 → 创建任务
* Step 2: 轮询进度
* Step 3: 提取结果列表
*/
import { useState, useEffect } from 'react';
import { Card, Steps, Button, Select, Checkbox, Table, Progress, Tag, Empty, Spin, message, Space, Alert, Typography } from 'antd';
import { FileTextOutlined, ThunderboltOutlined, CheckCircleOutlined, DatabaseOutlined, ReloadOutlined } from '@ant-design/icons';
import { useQuery, useMutation, useQueryClient } from '@tanstack/react-query';
import { aslApi } from '../api';
const { Title, Text } = Typography;
type PageStep = 'setup' | 'progress' | 'results';
const ExtractionPage = () => {
const queryClient = useQueryClient();
const [step, setStep] = useState<PageStep>('setup');
const [taskId, setTaskId] = useState<string | null>(null);
// ── Step 1 State ─────────────────────────
const [selectedTemplateId, setSelectedTemplateId] = useState<string>('');
const [selectedKbId, setSelectedKbId] = useState<string>('');
const [selectedDocIds, setSelectedDocIds] = useState<string[]>([]);
const [projectTemplateId, setProjectTemplateId] = useState<string>('');
// 临时 projectIdM1 简化:用 Date.now 生成)
const [projectId] = useState(() => `ext-${Date.now()}`);
// ── 数据查询 ─────────────────────────────
const { data: templatesResp, isLoading: loadingTemplates } = useQuery({
queryKey: ['extraction-templates'],
queryFn: () => aslApi.getExtractionTemplates(),
});
const templates = templatesResp?.data || [];
const { data: kbsResp, isLoading: loadingKbs } = useQuery({
queryKey: ['extraction-knowledge-bases'],
queryFn: () => aslApi.getExtractionKnowledgeBases(),
});
const knowledgeBases = kbsResp?.data || [];
const { data: docsResp, isLoading: loadingDocs } = useQuery({
queryKey: ['extraction-documents', selectedKbId],
queryFn: () => aslApi.getExtractionDocuments(selectedKbId),
enabled: !!selectedKbId,
});
const documents = docsResp?.data || [];
// ── 克隆模板 ─────────────────────────────
const cloneMutation = useMutation({
mutationFn: () => aslApi.cloneExtractionTemplate(projectId, selectedTemplateId),
onSuccess: (resp) => {
setProjectTemplateId(resp.data?.id || '');
},
});
useEffect(() => {
if (selectedTemplateId && projectId) {
cloneMutation.mutate();
}
}, [selectedTemplateId]);
// ── 创建任务 ─────────────────────────────
const createTaskMutation = useMutation({
mutationFn: () =>
aslApi.createExtractionTask({
projectId,
projectTemplateId,
pkbKnowledgeBaseId: selectedKbId,
documentIds: selectedDocIds,
idempotencyKey: `${projectId}-${Date.now()}`,
}),
onSuccess: (resp: any) => {
const id = resp.taskId || resp.data?.taskId;
if (id) {
setTaskId(id);
setStep('progress');
message.success(`任务已创建,正在提取 ${selectedDocIds.length} 篇文献`);
}
},
onError: (err: any) => {
message.error(err.message || '创建任务失败');
},
});
// ── Step 2: 轮询进度 ────────────────────
const { data: statusResp } = useQuery({
queryKey: ['extraction-task-status', taskId],
queryFn: () => aslApi.getExtractionTaskStatus(taskId!),
enabled: step === 'progress' && !!taskId,
refetchInterval: 3000,
});
const taskStatus = statusResp?.data;
useEffect(() => {
if (taskStatus && (taskStatus.status === 'completed' || taskStatus.status === 'failed')) {
setStep('results');
queryClient.invalidateQueries({ queryKey: ['extraction-task-results', taskId] });
}
}, [taskStatus?.status]);
// ── Step 3: 提取结果 ────────────────────
const { data: resultsResp, isLoading: loadingResults } = useQuery({
queryKey: ['extraction-task-results', taskId],
queryFn: () => aslApi.getExtractionTaskResults(taskId!),
enabled: step === 'results' && !!taskId,
});
const results = resultsResp?.data || [];
// ── 渲染 ─────────────────────────────────
const currentStep = step === 'setup' ? 0 : step === 'progress' ? 1 : 2;
return (
<div className="p-6 max-w-5xl mx-auto">
<Title level={4}>
<FileTextOutlined className="mr-2" />
</Title>
<Steps
current={currentStep}
className="mb-6"
items={[
{ title: '配置提取', icon: <DatabaseOutlined /> },
{ title: '提取进行中', icon: <ThunderboltOutlined /> },
{ title: '提取结果', icon: <CheckCircleOutlined /> },
]}
/>
{/* ═══ Step 1: 配置 ═══ */}
{step === 'setup' && (
<Card>
<Space direction="vertical" size="large" style={{ width: '100%' }}>
{/* 模板选择 */}
<div>
<Text strong></Text>
<Select
className="w-full mt-2"
placeholder="请选择研究类型模板"
loading={loadingTemplates}
value={selectedTemplateId || undefined}
onChange={setSelectedTemplateId}
options={templates.map((t: any) => ({
value: t.id,
label: `${t.name}${t.description ? `${t.description}` : ''}`,
}))}
/>
</div>
{/* 知识库选择 */}
<div>
<Text strong> PKB </Text>
<Select
className="w-full mt-2"
placeholder="请选择包含 PDF 文献的知识库"
loading={loadingKbs}
value={selectedKbId || undefined}
onChange={(v) => { setSelectedKbId(v); setSelectedDocIds([]); }}
options={knowledgeBases.map((kb: any) => ({
value: kb.id,
label: `${kb.name} (${kb.fileCount} 篇)`,
}))}
/>
</div>
{/* 文献列表 */}
{selectedKbId && (
<div>
<div className="flex justify-between items-center mb-2">
<Text strong> ({selectedDocIds.length}/{documents.length} )</Text>
<Checkbox
checked={selectedDocIds.length === documents.length && documents.length > 0}
indeterminate={selectedDocIds.length > 0 && selectedDocIds.length < documents.length}
onChange={(e) => {
setSelectedDocIds(
e.target.checked ? documents.map((d: any) => d.documentId) : []
);
}}
>
</Checkbox>
</div>
{loadingDocs ? (
<Spin />
) : documents.length === 0 ? (
<Empty description="该知识库暂无 PDF 文献,请先前往 PKB 上传" />
) : (
<Checkbox.Group
value={selectedDocIds}
onChange={(vals) => setSelectedDocIds(vals as string[])}
style={{ width: '100%' }}
>
<div className="space-y-2 max-h-[400px] overflow-y-auto">
{documents.map((doc: any) => (
<div key={doc.documentId} className="flex items-center p-2 bg-gray-50 rounded hover:bg-gray-100">
<Checkbox value={doc.documentId}>
<span className="ml-1">{doc.filename}</span>
<span className="text-gray-400 text-xs ml-2">
({(doc.fileSizeBytes / 1024 / 1024).toFixed(1)} MB)
</span>
</Checkbox>
</div>
))}
</div>
</Checkbox.Group>
)}
</div>
)}
{/* 提交按钮 */}
<Button
type="primary"
size="large"
icon={<ThunderboltOutlined />}
disabled={!projectTemplateId || selectedDocIds.length === 0}
loading={createTaskMutation.isPending}
onClick={() => createTaskMutation.mutate()}
className="w-full"
>
({selectedDocIds.length} )
</Button>
</Space>
</Card>
)}
{/* ═══ Step 2: 进度 ═══ */}
{step === 'progress' && taskStatus && (
<Card>
<div className="text-center py-8">
<ThunderboltOutlined style={{ fontSize: 48, color: '#10b981' }} className="mb-4" />
<Title level={4}>...</Title>
<Progress
percent={taskStatus.percent}
status="active"
strokeColor="#10b981"
className="max-w-md mx-auto my-6"
/>
<div className="space-y-1 text-gray-500">
<div>: {taskStatus.totalCount} </div>
<div>
: <Tag color="green">{taskStatus.completedCount}</Tag>
: <Tag color="blue">{taskStatus.extractingCount}</Tag>
: <Tag>{taskStatus.pendingCount}</Tag>
{taskStatus.errorCount > 0 && (
<>: <Tag color="red">{taskStatus.errorCount}</Tag></>
)}
</div>
</div>
</div>
</Card>
)}
{/* ═══ Step 3: 结果 ═══ */}
{step === 'results' && (
<Card>
{taskStatus && taskStatus.status === 'failed' && (
<Alert
type="warning"
message="部分文献提取失败"
description={`${taskStatus.errorCount} 篇文献提取失败,已完成 ${taskStatus.completedCount}`}
showIcon
className="mb-4"
/>
)}
<div className="flex justify-between items-center mb-4">
<Title level={5} className="mb-0"></Title>
<Button
icon={<ReloadOutlined />}
onClick={() => queryClient.invalidateQueries({ queryKey: ['extraction-task-results', taskId] })}
>
</Button>
</div>
<Table
loading={loadingResults}
dataSource={results}
rowKey="id"
pagination={false}
columns={[
{
title: '文件名',
dataIndex: 'snapshotFilename',
ellipsis: true,
},
{
title: '状态',
dataIndex: 'status',
width: 100,
render: (status: string) => {
const map: Record<string, { color: string; text: string }> = {
completed: { color: 'green', text: '已完成' },
error: { color: 'red', text: '失败' },
extracting: { color: 'blue', text: '提取中' },
pending: { color: 'default', text: '等待中' },
};
const { color, text } = map[status] || { color: 'default', text: status };
return <Tag color={color}>{text}</Tag>;
},
},
{
title: 'Study ID',
dataIndex: ['extractedData', 'metadata', 'study_id'],
render: (v: any) => v || '-',
},
{
title: '错误信息',
dataIndex: 'errorMessage',
ellipsis: true,
render: (v: any) => v ? <Text type="danger" ellipsis>{v}</Text> : '-',
},
]}
/>
<div className="mt-4">
<Button onClick={() => { setStep('setup'); setTaskId(null); setSelectedDocIds([]); }}>
</Button>
</div>
</Card>
)}
</div>
);
};
export default ExtractionPage;

View File

@@ -0,0 +1,140 @@
/**
* 工具 3 Step 2: 提取进度 + 终端日志
* 原型图 View 2: 居中布局,进度条 + ProcessingTerminal
* 双轨制React Query 轮询驱动进度条/跳转SSE 驱动日志区
*/
import { useEffect } from 'react';
import { useParams, useNavigate } from 'react-router-dom';
import { Card, Progress, Tag, Button, Typography, Spin, Space, Alert } from 'antd';
import { CheckCircleOutlined, CloseCircleOutlined, RocketOutlined } from '@ant-design/icons';
import { useQuery, useQueryClient } from '@tanstack/react-query';
import { aslApi } from '../api';
import ProcessingTerminal from '../components/extraction/ProcessingTerminal';
const { Title, Text } = Typography;
const ExtractionProgress = () => {
const { taskId } = useParams<{ taskId: string }>();
const navigate = useNavigate();
const queryClient = useQueryClient();
const { data: statusResp, isLoading } = useQuery({
queryKey: ['extraction-task-status', taskId],
queryFn: () => aslApi.getExtractionTaskStatus(taskId!),
enabled: !!taskId,
refetchInterval: (query) => {
const st = query.state.data?.data?.status;
if (st === 'completed' || st === 'failed') return false;
return 3000;
},
});
const taskStatus = statusResp?.data;
// Aggregator cron 可能延迟 1-2 分钟才更新 Task 状态,
// 所以同时检查Task 状态已收口 OR 所有 Result 均已结束pending=0 且 extracting=0
const isDone =
taskStatus?.status === 'completed' ||
taskStatus?.status === 'failed' ||
(taskStatus && taskStatus.totalCount > 0 && taskStatus.pendingCount === 0 && taskStatus.extractingCount === 0);
useEffect(() => {
if (isDone && taskId) {
queryClient.invalidateQueries({ queryKey: ['extraction-task-results', taskId] });
}
}, [isDone, taskId, queryClient]);
if (!taskId) {
return <div className="p-6 text-center text-gray-500"> taskId </div>;
}
if (isLoading || !taskStatus) {
return (
<div className="p-6 flex justify-center items-center min-h-[400px]">
<Spin size="large" tip="正在加载任务状态..." />
</div>
);
}
const progressStatus = taskStatus.status === 'failed' ? 'exception' : isDone ? 'success' : 'active';
return (
<div className="p-6 max-w-4xl mx-auto">
<Card className="text-center mb-6">
<div className="py-6">
{isDone ? (
taskStatus.status === 'completed' ? (
<CheckCircleOutlined style={{ fontSize: 56, color: '#52c41a' }} />
) : (
<CloseCircleOutlined style={{ fontSize: 56, color: '#ff4d4f' }} />
)
) : (
<RocketOutlined style={{ fontSize: 56, color: '#1677ff' }} className="animate-pulse" />
)}
<Title level={4} className="mt-4 mb-2">
{isDone
? taskStatus.status === 'completed'
? '提取完成!'
: '提取结束(部分失败)'
: '正在智能提取...'}
</Title>
<Progress
percent={taskStatus.percent}
status={progressStatus}
strokeColor={isDone && taskStatus.status === 'completed' ? '#52c41a' : undefined}
className="max-w-lg mx-auto my-6"
/>
<Space size="middle" className="justify-center">
<div>
<Text type="secondary"></Text>
<div className="text-lg font-semibold">{taskStatus.totalCount}</div>
</div>
<div>
<Tag color="green" className="text-sm px-2 py-0.5">{taskStatus.completedCount} </Tag>
</div>
<div>
<Tag color="processing" className="text-sm px-2 py-0.5">{taskStatus.extractingCount} </Tag>
</div>
<div>
<Tag className="text-sm px-2 py-0.5">{taskStatus.pendingCount} </Tag>
</div>
{taskStatus.errorCount > 0 && (
<div>
<Tag color="error" className="text-sm px-2 py-0.5">{taskStatus.errorCount} </Tag>
</div>
)}
</Space>
</div>
</Card>
<ProcessingTerminal taskId={taskId} />
{isDone && (
<div className="mt-6 space-y-4">
{taskStatus.errorCount > 0 && (
<Alert
type="warning"
message={`${taskStatus.errorCount} 篇文献提取失败,${taskStatus.completedCount} 篇已完成`}
showIcon
/>
)}
<div className="flex justify-center">
<Button
type="primary"
size="large"
icon={<CheckCircleOutlined />}
onClick={() => navigate(`/literature/extraction/workbench/${taskId}`)}
>
</Button>
</div>
</div>
)}
</div>
);
};
export default ExtractionProgress;

View File

@@ -0,0 +1,259 @@
/**
* 工具 3 Step 1: 配置提取模板 + 选择 PKB 文献
* 原型图 View 1: 5:2 双栏 — 左3模板 + 右2文献
*/
import { useState, useEffect } from 'react';
import { useNavigate } from 'react-router-dom';
import { Row, Col, Card, Button, Select, Checkbox, Tag, Empty, Spin, message, Typography } from 'antd';
import { ThunderboltOutlined, LockOutlined, DatabaseOutlined, FilePdfOutlined } from '@ant-design/icons';
import { useQuery, useMutation } from '@tanstack/react-query';
import { aslApi } from '../api';
const { Text } = Typography;
const ExtractionSetup = () => {
const navigate = useNavigate();
const [selectedTemplateId, setSelectedTemplateId] = useState<string>('');
const [selectedKbId, setSelectedKbId] = useState<string>('');
const [selectedDocIds, setSelectedDocIds] = useState<string[]>([]);
const [projectTemplateId, setProjectTemplateId] = useState<string>('');
const [projectId] = useState(() => `ext-${Date.now()}`);
const { data: templatesResp, isLoading: loadingTemplates } = useQuery({
queryKey: ['extraction-templates'],
queryFn: () => aslApi.getExtractionTemplates(),
});
const templates = templatesResp?.data || [];
const selectedTemplate = templates.find((t: any) => t.id === selectedTemplateId);
const { data: kbsResp, isLoading: loadingKbs } = useQuery({
queryKey: ['extraction-knowledge-bases'],
queryFn: () => aslApi.getExtractionKnowledgeBases(),
});
const knowledgeBases = kbsResp?.data || [];
const { data: docsResp, isLoading: loadingDocs } = useQuery({
queryKey: ['extraction-documents', selectedKbId],
queryFn: () => aslApi.getExtractionDocuments(selectedKbId),
enabled: !!selectedKbId,
});
const documents = docsResp?.data || [];
const cloneMutation = useMutation({
mutationFn: () => aslApi.cloneExtractionTemplate(projectId, selectedTemplateId),
onSuccess: (resp) => {
setProjectTemplateId(resp.data?.id || '');
},
});
useEffect(() => {
if (selectedTemplateId && projectId) {
cloneMutation.mutate();
}
}, [selectedTemplateId]);
const createTaskMutation = useMutation({
mutationFn: () =>
aslApi.createExtractionTask({
projectId,
projectTemplateId,
pkbKnowledgeBaseId: selectedKbId,
documentIds: selectedDocIds,
idempotencyKey: `${projectId}-${Date.now()}`,
}),
onSuccess: (resp: any) => {
const id = resp.taskId || resp.data?.taskId;
if (id) {
message.success(`任务已创建,正在提取 ${selectedDocIds.length} 篇文献`);
navigate(`/literature/extraction/progress/${id}`);
}
},
onError: (err: any) => {
message.error(err.message || '创建任务失败');
},
});
const baseFields = selectedTemplate?.baseFields as Record<string, any> | undefined;
const fieldModuleNames: Record<string, string> = {
metadata: '基础元数据',
baseline: '基线特征',
rob: '偏倚风险评估',
outcomes_survival: '结局-生存',
outcomes_dichotomous: '结局-二分类',
outcomes_continuous: '结局-连续型',
};
return (
<div className="p-6 max-w-6xl mx-auto">
<Row gutter={24}>
{/* Left 3/5: Template Configuration */}
<Col span={14}>
<Card
title={
<span>
<DatabaseOutlined className="mr-2 text-blue-500" />
1 (Schema)
</span>
}
>
<div className="mb-5">
<Text strong className="block mb-2"></Text>
<Select
className="w-full"
placeholder="请选择研究类型模板"
loading={loadingTemplates}
value={selectedTemplateId || undefined}
onChange={setSelectedTemplateId}
options={templates.map((t: any) => ({
value: t.id,
label: `${t.name}${t.description ? `${t.description}` : ''}`,
}))}
/>
</div>
{baseFields && (
<div className="bg-slate-50 border border-slate-200 rounded-md p-4 mb-4">
<div className="text-xs text-gray-500 mb-3 flex items-center">
<LockOutlined className="mr-1.5 text-gray-400" />
</div>
<div className="space-y-3">
{Object.entries(baseFields).map(([module, fields]) => (
<div key={module}>
<Text type="secondary" className="text-xs font-medium">
{fieldModuleNames[module] || module}
</Text>
<div className="flex flex-wrap gap-1.5 mt-1">
{(Array.isArray(fields) ? fields : []).map((f: any) => {
const label = typeof f === 'object' && f !== null ? (f.label || f.key || JSON.stringify(f)) : String(f);
const key = typeof f === 'object' && f !== null ? (f.key || f.label || JSON.stringify(f)) : String(f);
return (
<Tag key={key} className="m-0" icon={<LockOutlined />}>
{label}
</Tag>
);
})}
</div>
</div>
))}
</div>
</div>
)}
<div className="border-t border-gray-100 pt-4">
<div className="flex justify-between items-center mb-2">
<div>
<Text strong> (Custom Fields)</Text>
<div className="text-xs text-gray-400 mt-0.5">M3 </div>
</div>
<Button size="small" disabled>
+
</Button>
</div>
<div className="text-center py-4 text-xs text-gray-400 border border-dashed border-gray-200 rounded">
AI
</div>
</div>
</Card>
</Col>
{/* Right 2/5: PKB Document Selection */}
<Col span={10}>
<Card
title={
<span>
<FilePdfOutlined className="mr-2 text-red-500" />
2 PKB
</span>
}
>
<div className="mb-4">
<Text strong className="block mb-2"></Text>
<Select
className="w-full"
placeholder="请选择包含 PDF 文献的知识库"
loading={loadingKbs}
value={selectedKbId || undefined}
onChange={(v) => { setSelectedKbId(v); setSelectedDocIds([]); }}
options={knowledgeBases.map((kb: any) => ({
value: kb.id,
label: `${kb.name} (${kb.fileCount} 篇)`,
}))}
/>
</div>
{selectedKbId && (
<div>
<div className="flex justify-between items-center mb-2">
<Text strong>
({selectedDocIds.length}/{documents.length} )
</Text>
<Checkbox
checked={selectedDocIds.length === documents.length && documents.length > 0}
indeterminate={selectedDocIds.length > 0 && selectedDocIds.length < documents.length}
onChange={(e) => {
setSelectedDocIds(
e.target.checked ? documents.map((d: any) => d.documentId) : []
);
}}
>
</Checkbox>
</div>
{loadingDocs ? (
<div className="text-center py-8"><Spin /></div>
) : documents.length === 0 ? (
<Empty description="该知识库暂无 PDF 文献,请先前往「个人知识库」上传" />
) : (
<Checkbox.Group
value={selectedDocIds}
onChange={(vals) => setSelectedDocIds(vals as string[])}
style={{ width: '100%' }}
>
<div className="space-y-2 max-h-[400px] overflow-y-auto pr-1">
{documents.map((doc: any) => (
<div key={doc.documentId} className="flex items-center p-2.5 bg-gray-50 border border-gray-200 rounded hover:bg-gray-100 transition-colors">
<Checkbox value={doc.documentId}>
<FilePdfOutlined className="text-red-500 mr-1.5" />
<span className="text-sm truncate" style={{ maxWidth: 200 }}>{doc.filename}</span>
<span className="text-gray-400 text-xs ml-2">
{(doc.fileSizeBytes / 1024 / 1024).toFixed(1)} MB
</span>
</Checkbox>
</div>
))}
</div>
</Checkbox.Group>
)}
</div>
)}
{!selectedKbId && (
<div className="text-center py-8 text-gray-400 text-sm">
PKB
</div>
)}
</Card>
</Col>
</Row>
<div className="mt-6 flex justify-end">
<Button
type="primary"
size="large"
icon={<ThunderboltOutlined />}
disabled={!projectTemplateId || selectedDocIds.length === 0}
loading={createTaskMutation.isPending}
onClick={() => createTaskMutation.mutate()}
>
({selectedDocIds.length} )
</Button>
</div>
</div>
);
};
export default ExtractionSetup;

View File

@@ -0,0 +1,281 @@
/**
* 工具 3 Step 3: 全屏审核工作台
* 原型图 View 3: 全宽表格 + 700px 右侧审核抽屉
*/
import { useState } from 'react';
import { useParams, useNavigate } from 'react-router-dom';
import { Table, Tag, Button, Alert, Space, Typography, message } from 'antd';
import {
CheckCircleOutlined,
EyeOutlined,
DownloadOutlined,
ReloadOutlined,
ArrowLeftOutlined,
ExclamationCircleOutlined,
} from '@ant-design/icons';
import { useQuery, useQueryClient } from '@tanstack/react-query';
import { aslApi } from '../api';
import ExtractionDrawer from '../components/extraction/ExtractionDrawer';
import ExtractionStatusBadge from '../components/extraction/ExtractionStatusBadge';
const { Title, Text } = Typography;
/**
* 从 extractedData 中提取某个模块某个字段的实际值
* 兼容数组格式 [{key, value, quote}] 和扁平格式 {field: value}
*/
function getExtractedField(data: any, moduleName: string, fieldKey: string): string {
if (!data) return '';
// 找到模块数据(兼容 outcomes_survival 等)
let modData = data[moduleName];
if (!modData) {
for (const k of Object.keys(data)) {
if (k.startsWith(moduleName) || (moduleName === 'outcomes' && k.startsWith('outcomes_'))) {
modData = data[k];
break;
}
}
}
if (!modData) return '';
// 数组格式
if (Array.isArray(modData)) {
const item = modData.find((f: any) => f?.key === fieldKey);
if (!item) return '';
const v = item.value;
if (v === null || v === undefined) return '';
if (typeof v === 'object') return JSON.stringify(v);
return String(v);
}
// 扁平对象格式
const val = modData[fieldKey];
if (val === null || val === undefined) return '';
if (typeof val === 'object' && 'value' in val) return String(val.value ?? '');
if (typeof val === 'object') return JSON.stringify(val);
return String(val);
}
const ExtractionWorkbench = () => {
const { taskId } = useParams<{ taskId: string }>();
const navigate = useNavigate();
const queryClient = useQueryClient();
const [selectedResultId, setSelectedResultId] = useState<string | null>(null);
const [drawerVisible, setDrawerVisible] = useState(false);
const { data: statusResp } = useQuery({
queryKey: ['extraction-task-status', taskId],
queryFn: () => aslApi.getExtractionTaskStatus(taskId!),
enabled: !!taskId,
});
const taskStatus = statusResp?.data;
const { data: resultsResp, isLoading: loadingResults } = useQuery({
queryKey: ['extraction-task-results', taskId],
queryFn: () => aslApi.getExtractionTaskResults(taskId!),
enabled: !!taskId,
});
const results = (resultsResp?.data || []) as any[];
const approvedCount = results.filter((r: any) => r.reviewStatus === 'approved').length;
const pendingCount = results.filter((r: any) => r.status === 'completed' && r.reviewStatus !== 'approved').length;
const errorCount = results.filter((r: any) => r.status === 'error').length;
const handleOpenDrawer = (resultId: string) => {
setSelectedResultId(resultId);
setDrawerVisible(true);
};
const handleDrawerClose = () => {
setDrawerVisible(false);
setSelectedResultId(null);
};
const handleReviewSaved = () => {
queryClient.invalidateQueries({ queryKey: ['extraction-task-results', taskId] });
};
const handleExportExcel = async () => {
if (!taskId) return;
try {
const blob = await aslApi.exportExtractionResults(taskId);
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `extraction-results-${taskId}.xlsx`;
a.click();
URL.revokeObjectURL(url);
message.success('导出成功');
} catch (err: any) {
message.error(err.message || '导出失败');
}
};
if (!taskId) {
return <div className="p-6 text-center text-gray-500"> taskId </div>;
}
const columns = [
{
title: '#',
width: 50,
render: (_: any, __: any, index: number) => index + 1,
},
{
title: '文件名 / Study ID',
key: 'filename',
ellipsis: true,
render: (_: any, record: any) => {
const studyId = getExtractedField(record.extractedData, 'metadata', 'study_id');
return (
<div>
<div className="font-medium text-gray-800 truncate">
{studyId || record.snapshotFilename}
</div>
{studyId && (
<div className="text-xs text-gray-400 truncate">{record.snapshotFilename}</div>
)}
</div>
);
},
},
{
title: '解析流程',
key: 'parseFlow',
width: 140,
render: () => (
<Space size={4}>
<Tag color="blue">MinerU</Tag>
<Tag color="purple">DeepSeek</Tag>
</Space>
),
},
{
title: '提取状态',
dataIndex: 'status',
width: 100,
render: (status: string) => {
const map: Record<string, { color: string; text: string }> = {
completed: { color: 'green', text: '已完成' },
error: { color: 'red', text: '失败' },
extracting: { color: 'blue', text: '提取中' },
pending: { color: 'default', text: '等待中' },
};
const { color, text } = map[status] || { color: 'default', text: status };
return <Tag color={color}>{text}</Tag>;
},
},
{
title: '审核状态',
key: 'reviewStatus',
width: 110,
render: (_: any, record: any) => (
<ExtractionStatusBadge status={record.reviewStatus || 'pending_review'} />
),
},
{
title: '操作',
key: 'action',
width: 100,
render: (_: any, record: any) => {
if (record.status !== 'completed') return <Text type="secondary">-</Text>;
const isApproved = record.reviewStatus === 'approved';
return (
<Button
type={isApproved ? 'default' : 'primary'}
size="small"
icon={isApproved ? <EyeOutlined /> : <CheckCircleOutlined />}
onClick={() => handleOpenDrawer(record.id)}
>
{isApproved ? '查看' : '审核'}
</Button>
);
},
},
{
title: '错误信息',
dataIndex: 'errorMessage',
width: 180,
ellipsis: true,
render: (v: any) =>
v ? <Text type="danger" ellipsis={{ tooltip: v }}>{v}</Text> : '-',
},
];
return (
<div className="p-4">
{/* Info Banner */}
<div className="flex justify-between items-center mb-4">
<Space>
<Button
icon={<ArrowLeftOutlined />}
onClick={() => navigate('/literature/extraction/setup')}
>
</Button>
<Title level={5} className="!mb-0">
</Title>
</Space>
<Space>
<Button
icon={<ReloadOutlined />}
onClick={() =>
queryClient.invalidateQueries({ queryKey: ['extraction-task-results', taskId] })
}
>
</Button>
<Button
type="primary"
icon={<DownloadOutlined />}
onClick={handleExportExcel}
disabled={approvedCount === 0}
>
Excel ({approvedCount} )
</Button>
</Space>
</div>
{/* Summary banner */}
{taskStatus && (
<Alert
type={errorCount > 0 ? 'warning' : 'success'}
icon={errorCount > 0 ? <ExclamationCircleOutlined /> : <CheckCircleOutlined />}
message={
<span>
<strong>{results.length}</strong>
<Tag color="green" className="mx-1">{approvedCount} </Tag>
<Tag color="orange" className="mx-1">{pendingCount} </Tag>
{errorCount > 0 && <Tag color="red" className="mx-1">{errorCount} </Tag>}
</span>
}
showIcon
className="mb-4"
/>
)}
<Table
loading={loadingResults}
dataSource={results}
rowKey="id"
columns={columns}
pagination={results.length > 50 ? { pageSize: 50 } : false}
size="middle"
scroll={{ x: 900 }}
/>
<ExtractionDrawer
open={drawerVisible}
resultId={selectedResultId}
taskId={taskId}
onClose={handleDrawerClose}
onSaved={handleReviewSaved}
/>
</div>
);
};
export default ExtractionWorkbench;