feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench
M1 Skeleton Pipeline: - Scatter-dispatch + Aggregator polling pattern (PgBoss) - PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs) - ExtractionSingleWorker with DeepSeek-V3 LLM extraction - PermanentExtractionError for non-retryable failures - Phantom Retry Guard (idempotent worker) - 3-step minimal frontend (Setup -> Progress -> Workbench) - 4 new DB tables (extraction_templates, project_templates, tasks, results) - 3 system templates seed (RCT, Cohort, QC) - M1 integration test suite M2 HITL Workbench: - MinerU VLM integration for high-fidelity table extraction - XML-isolated DynamicPromptBuilder with flat JSON output template - fuzzyQuoteMatch validator (3-tier confidence scoring) - SSE real-time logging via ExtractionEventBus - Schema-driven ExtractionDrawer (dynamic field rendering from template) - Excel wide-table export with flattenModuleData normalization - M2 integration test suite Critical Fixes (data normalization): - DynamicPromptBuilder: explicit flat key-value output format with example - ExtractionExcelExporter: handle both array and flat data formats - ExtractionDrawer: schema-driven rendering instead of hardcoded fields - ExtractionValidator: array-format quote verification support - SSE route: Fastify register encapsulation to bypass auth for EventSource - LLM JSON sanitizer: strip illegal control chars before JSON.parse Also includes: RVW stats verification spec, SSA expert config guide Tested: M1 pipeline test + M2 HITL test + manual frontend verification Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
96
backend/src/modules/pkb/services/PkbExportService.ts
Normal file
96
backend/src/modules/pkb/services/PkbExportService.ts
Normal file
@@ -0,0 +1,96 @@
|
||||
import { prisma } from '../../../config/database.js';
|
||||
|
||||
/**
|
||||
* PKB 数据导出服务(PKB 模块维护)
|
||||
*
|
||||
* ACL 防腐层出口:返回纯 DTO 对象,不暴露 Prisma 类型。
|
||||
* 消费方:ASL PkbBridgeService
|
||||
*/
|
||||
|
||||
export interface PkbDocumentExportDTO {
|
||||
documentId: string;
|
||||
storageKey: string;
|
||||
filename: string;
|
||||
extractedText: string | null;
|
||||
fileSizeBytes: number;
|
||||
}
|
||||
|
||||
export interface PkbKnowledgeBaseExportDTO {
|
||||
id: string;
|
||||
name: string;
|
||||
fileCount: number;
|
||||
}
|
||||
|
||||
class PkbExportServiceImpl {
|
||||
async listKnowledgeBases(userId: string): Promise<PkbKnowledgeBaseExportDTO[]> {
|
||||
const kbs = await prisma.knowledgeBase.findMany({
|
||||
where: { userId },
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
fileCount: true,
|
||||
},
|
||||
orderBy: { updatedAt: 'desc' },
|
||||
});
|
||||
|
||||
return kbs.map(kb => ({
|
||||
id: kb.id,
|
||||
name: kb.name,
|
||||
fileCount: kb.fileCount,
|
||||
}));
|
||||
}
|
||||
|
||||
async listPdfDocuments(kbId: string): Promise<PkbDocumentExportDTO[]> {
|
||||
const docs = await prisma.document.findMany({
|
||||
where: {
|
||||
kbId,
|
||||
fileType: { in: ['pdf', 'application/pdf'] },
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
storageKey: true,
|
||||
filename: true,
|
||||
extractedText: true,
|
||||
fileSizeBytes: true,
|
||||
},
|
||||
orderBy: { uploadedAt: 'desc' },
|
||||
});
|
||||
|
||||
return docs.map(doc => ({
|
||||
documentId: doc.id,
|
||||
storageKey: doc.storageKey || '',
|
||||
filename: doc.filename,
|
||||
extractedText: doc.extractedText,
|
||||
fileSizeBytes: Number(doc.fileSizeBytes || 0),
|
||||
}));
|
||||
}
|
||||
|
||||
async getDocumentForExtraction(documentId: string): Promise<PkbDocumentExportDTO> {
|
||||
const doc = await prisma.document.findUnique({
|
||||
where: { id: documentId },
|
||||
select: {
|
||||
id: true,
|
||||
storageKey: true,
|
||||
filename: true,
|
||||
extractedText: true,
|
||||
fileSizeBytes: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!doc) {
|
||||
const err = new Error(`PKB Document not found: ${documentId}`);
|
||||
(err as any).name = 'PkbDocumentNotFoundError';
|
||||
throw err;
|
||||
}
|
||||
|
||||
return {
|
||||
documentId: doc.id,
|
||||
storageKey: doc.storageKey || '',
|
||||
filename: doc.filename,
|
||||
extractedText: doc.extractedText,
|
||||
fileSizeBytes: Number(doc.fileSizeBytes || 0),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const pkbExportService = new PkbExportServiceImpl();
|
||||
Reference in New Issue
Block a user