feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench
M1 Skeleton Pipeline: - Scatter-dispatch + Aggregator polling pattern (PgBoss) - PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs) - ExtractionSingleWorker with DeepSeek-V3 LLM extraction - PermanentExtractionError for non-retryable failures - Phantom Retry Guard (idempotent worker) - 3-step minimal frontend (Setup -> Progress -> Workbench) - 4 new DB tables (extraction_templates, project_templates, tasks, results) - 3 system templates seed (RCT, Cohort, QC) - M1 integration test suite M2 HITL Workbench: - MinerU VLM integration for high-fidelity table extraction - XML-isolated DynamicPromptBuilder with flat JSON output template - fuzzyQuoteMatch validator (3-tier confidence scoring) - SSE real-time logging via ExtractionEventBus - Schema-driven ExtractionDrawer (dynamic field rendering from template) - Excel wide-table export with flattenModuleData normalization - M2 integration test suite Critical Fixes (data normalization): - DynamicPromptBuilder: explicit flat key-value output format with example - ExtractionExcelExporter: handle both array and flat data formats - ExtractionDrawer: schema-driven rendering instead of hardcoded fields - ExtractionValidator: array-format quote verification support - SSE route: Fastify register encapsulation to bypass auth for EventSource - LLM JSON sanitizer: strip illegal control chars before JSON.parse Also includes: RVW stats verification spec, SSA expert config guide Tested: M1 pipeline test + M2 HITL test + manual frontend verification Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -0,0 +1,244 @@
|
||||
import { FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { prisma } from '../../../../config/database.js';
|
||||
import { templateService } from '../services/TemplateService.js';
|
||||
import { extractionService } from '../services/ExtractionService.js';
|
||||
import { pkbBridgeService } from '../services/PkbBridgeService.js';
|
||||
import { extractionEventBus } from '../services/ExtractionEventBus.js';
|
||||
import { extractionExcelExporter } from '../services/ExtractionExcelExporter.js';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
function getUserId(request: FastifyRequest): string {
|
||||
const userId = (request as any).user?.userId;
|
||||
if (!userId) throw new Error('User not authenticated');
|
||||
return userId;
|
||||
}
|
||||
|
||||
/**
|
||||
* 工具 3 全文提取 API 控制器
|
||||
*/
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// 模板 API
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
export async function listTemplates(request: FastifyRequest, reply: FastifyReply) {
|
||||
const templates = await templateService.listSystemTemplates();
|
||||
return reply.send({ success: true, data: templates });
|
||||
}
|
||||
|
||||
export async function getTemplate(
|
||||
request: FastifyRequest<{ Params: { templateId: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const template = await templateService.getSystemTemplate(request.params.templateId);
|
||||
return reply.send({ success: true, data: template });
|
||||
}
|
||||
|
||||
export async function cloneTemplate(
|
||||
request: FastifyRequest<{ Body: { projectId: string; baseTemplateId: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const userId = getUserId(request);
|
||||
const { projectId, baseTemplateId } = request.body;
|
||||
const projectTemplate = await templateService.cloneToProject(projectId, baseTemplateId, userId);
|
||||
return reply.send({ success: true, data: projectTemplate });
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// 提取任务 API
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
export async function createTask(
|
||||
request: FastifyRequest<{
|
||||
Body: {
|
||||
projectId: string;
|
||||
projectTemplateId: string;
|
||||
pkbKnowledgeBaseId: string;
|
||||
documentIds: string[];
|
||||
idempotencyKey?: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const userId = getUserId(request);
|
||||
const { projectId, projectTemplateId, pkbKnowledgeBaseId, documentIds, idempotencyKey } = request.body;
|
||||
|
||||
const result = await extractionService.createTask({
|
||||
projectId,
|
||||
userId,
|
||||
projectTemplateId,
|
||||
pkbKnowledgeBaseId,
|
||||
documentIds,
|
||||
idempotencyKey,
|
||||
pkbBridge: pkbBridgeService,
|
||||
});
|
||||
|
||||
return reply.send({ success: true, ...result });
|
||||
}
|
||||
|
||||
export async function getTaskStatus(
|
||||
request: FastifyRequest<{ Params: { taskId: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const status = await extractionService.getTaskStatus(request.params.taskId);
|
||||
return reply.send({ success: true, data: status });
|
||||
}
|
||||
|
||||
export async function getTaskResults(
|
||||
request: FastifyRequest<{ Params: { taskId: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const results = await extractionService.getResults(request.params.taskId);
|
||||
return reply.send({ success: true, data: results });
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// PKB 数据代理 API(前端通过 ASL 访问,不直接调 PKB)
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
export async function listKnowledgeBases(
|
||||
request: FastifyRequest,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const userId = getUserId(request);
|
||||
const kbs = await pkbBridgeService.listKnowledgeBases(userId);
|
||||
return reply.send({ success: true, data: kbs });
|
||||
}
|
||||
|
||||
export async function listDocuments(
|
||||
request: FastifyRequest<{ Params: { kbId: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const docs = await pkbBridgeService.listPdfDocuments(request.params.kbId);
|
||||
return reply.send({ success: true, data: docs });
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// 单条提取结果详情 + 审核 API(M2 新增)
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
export async function getResultDetail(
|
||||
request: FastifyRequest<{ Params: { resultId: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const result = await prisma.aslExtractionResult.findUnique({
|
||||
where: { id: request.params.resultId },
|
||||
include: {
|
||||
task: {
|
||||
select: {
|
||||
projectTemplate: {
|
||||
include: { baseTemplate: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
if (!result) {
|
||||
return reply.status(404).send({ success: false, error: 'Result not found' });
|
||||
}
|
||||
|
||||
const baseFields = result.task?.projectTemplate?.baseTemplate?.baseFields as Record<string, any[]> | undefined;
|
||||
const outcomeType = result.task?.projectTemplate?.outcomeType || 'survival';
|
||||
|
||||
// Build schema (filtered by outcomeType, same logic as TemplateService.assembleFullSchema)
|
||||
let schema: Record<string, any[]> | undefined;
|
||||
if (baseFields) {
|
||||
schema = {};
|
||||
for (const [mod, fields] of Object.entries(baseFields)) {
|
||||
if (mod.startsWith('outcomes_') && mod !== `outcomes_${outcomeType}`) continue;
|
||||
schema[mod] = fields;
|
||||
}
|
||||
}
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
data: {
|
||||
id: result.id,
|
||||
pkbDocumentId: result.pkbDocumentId,
|
||||
snapshotFilename: result.snapshotFilename,
|
||||
snapshotStorageKey: result.snapshotStorageKey,
|
||||
status: result.status,
|
||||
reviewStatus: result.reviewStatus,
|
||||
extractedData: result.extractedData,
|
||||
quoteVerification: result.quoteVerification,
|
||||
errorMessage: result.errorMessage,
|
||||
processedAt: result.processedAt,
|
||||
createdAt: result.createdAt,
|
||||
schema,
|
||||
outcomeType,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function reviewResult(
|
||||
request: FastifyRequest<{
|
||||
Params: { resultId: string };
|
||||
Body: { reviewStatus: 'approved' | 'rejected' };
|
||||
}>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const { reviewStatus } = request.body;
|
||||
|
||||
const updated = await prisma.aslExtractionResult.update({
|
||||
where: { id: request.params.resultId },
|
||||
data: {
|
||||
reviewStatus,
|
||||
reviewedAt: new Date(),
|
||||
},
|
||||
});
|
||||
return reply.send({ success: true, data: updated });
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// SSE 日志流端点(M2 新增)
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
export async function streamTaskLogs(
|
||||
request: FastifyRequest<{ Params: { taskId: string }; Querystring: { token?: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const { taskId } = request.params;
|
||||
|
||||
reply.raw.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
Connection: 'keep-alive',
|
||||
'X-Accel-Buffering': 'no',
|
||||
});
|
||||
|
||||
// 首帧同步历史日志
|
||||
const recentLogs = extractionEventBus.getRecentLogs(taskId);
|
||||
reply.raw.write(`event: sync\ndata: ${JSON.stringify({ logs: recentLogs })}\n\n`);
|
||||
|
||||
// 订阅实时日志
|
||||
const unsubscribe = extractionEventBus.subscribe(taskId, (entry) => {
|
||||
reply.raw.write(`event: log\ndata: ${JSON.stringify(entry)}\n\n`);
|
||||
});
|
||||
|
||||
// 心跳防断(每 15 秒)
|
||||
const heartbeat = setInterval(() => {
|
||||
reply.raw.write(':heartbeat\n\n');
|
||||
}, 15_000);
|
||||
|
||||
// 客户端断开清理
|
||||
request.raw.on('close', () => {
|
||||
clearInterval(heartbeat);
|
||||
unsubscribe();
|
||||
});
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════
|
||||
// Excel 导出端点(M2 新增)
|
||||
// ═══════════════════════════════════════════
|
||||
|
||||
export async function exportTaskResults(
|
||||
request: FastifyRequest<{ Params: { taskId: string } }>,
|
||||
reply: FastifyReply,
|
||||
) {
|
||||
const { taskId } = request.params;
|
||||
const buffer = await extractionExcelExporter.exportToExcel(taskId);
|
||||
|
||||
reply.header('Content-Type', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
|
||||
reply.header('Content-Disposition', `attachment; filename="extraction-${taskId}.xlsx"`);
|
||||
return reply.send(buffer);
|
||||
}
|
||||
Reference in New Issue
Block a user