feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench

M1 Skeleton Pipeline:
- Scatter-dispatch + Aggregator polling pattern (PgBoss)
- PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs)
- ExtractionSingleWorker with DeepSeek-V3 LLM extraction
- PermanentExtractionError for non-retryable failures
- Phantom Retry Guard (idempotent worker)
- 3-step minimal frontend (Setup -> Progress -> Workbench)
- 4 new DB tables (extraction_templates, project_templates, tasks, results)
- 3 system templates seed (RCT, Cohort, QC)
- M1 integration test suite

M2 HITL Workbench:
- MinerU VLM integration for high-fidelity table extraction
- XML-isolated DynamicPromptBuilder with flat JSON output template
- fuzzyQuoteMatch validator (3-tier confidence scoring)
- SSE real-time logging via ExtractionEventBus
- Schema-driven ExtractionDrawer (dynamic field rendering from template)
- Excel wide-table export with flattenModuleData normalization
- M2 integration test suite

Critical Fixes (data normalization):
- DynamicPromptBuilder: explicit flat key-value output format with example
- ExtractionExcelExporter: handle both array and flat data formats
- ExtractionDrawer: schema-driven rendering instead of hardcoded fields
- ExtractionValidator: array-format quote verification support
- SSE route: Fastify register encapsulation to bypass auth for EventSource
- LLM JSON sanitizer: strip illegal control chars before JSON.parse

Also includes: RVW stats verification spec, SSA expert config guide

Tested: M1 pipeline test + M2 HITL test + manual frontend verification
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-25 18:29:20 +08:00
parent 371fa53956
commit f0736dbca1
40 changed files with 6138 additions and 48 deletions

View File

@@ -528,6 +528,97 @@ export async function getDeepResearchTask(
return request(`/research/tasks/${taskId}`);
}
// ==================== 工具 3全文智能提取 API ====================
export async function getExtractionTemplates(): Promise<ApiResponse<any[]>> {
return request('/extraction/templates');
}
export async function getExtractionTemplate(templateId: string): Promise<ApiResponse<any>> {
return request(`/extraction/templates/${templateId}`);
}
export async function cloneExtractionTemplate(
projectId: string,
baseTemplateId: string
): Promise<ApiResponse<any>> {
return request('/extraction/templates/clone', {
method: 'POST',
body: JSON.stringify({ projectId, baseTemplateId }),
});
}
export async function getExtractionKnowledgeBases(): Promise<ApiResponse<any[]>> {
return request('/extraction/knowledge-bases');
}
export async function getExtractionDocuments(kbId: string): Promise<ApiResponse<any[]>> {
return request(`/extraction/knowledge-bases/${kbId}/documents`);
}
export async function createExtractionTask(params: {
projectId: string;
projectTemplateId: string;
pkbKnowledgeBaseId: string;
documentIds: string[];
idempotencyKey?: string;
}): Promise<ApiResponse<{ taskId: string }>> {
return request('/extraction/tasks', {
method: 'POST',
body: JSON.stringify(params),
});
}
export async function getExtractionTaskStatus(
taskId: string
): Promise<ApiResponse<{
taskId: string;
status: string;
totalCount: number;
completedCount: number;
errorCount: number;
extractingCount: number;
pendingCount: number;
percent: number;
}>> {
return request(`/extraction/tasks/${taskId}`);
}
export async function getExtractionTaskResults(
taskId: string
): Promise<ApiResponse<any[]>> {
return request(`/extraction/tasks/${taskId}/results`);
}
export async function getExtractionResultDetail(
resultId: string
): Promise<ApiResponse<any>> {
return request(`/extraction/results/${resultId}`);
}
export async function reviewExtractionResult(
resultId: string,
data: { reviewStatus: 'approved' | 'rejected' }
): Promise<ApiResponse<any>> {
return request(`/extraction/results/${resultId}/review`, {
method: 'PUT',
body: JSON.stringify(data),
});
}
export async function exportExtractionResults(
taskId: string
): Promise<Blob> {
const response = await fetch(
`${API_BASE_URL}/extraction/tasks/${taskId}/export`,
{ headers: getAuthHeaders() }
);
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
return response.blob();
}
// ==================== 统一导出API对象 ====================
/**
@@ -584,4 +675,17 @@ export const aslApi = {
generateRequirement,
executeDeepResearchTask,
getDeepResearchTask,
// 工具 3全文智能提取
getExtractionTemplates,
getExtractionTemplate,
cloneExtractionTemplate,
getExtractionKnowledgeBases,
getExtractionDocuments,
createExtractionTask,
getExtractionTaskStatus,
getExtractionTaskResults,
getExtractionResultDetail,
reviewExtractionResult,
exportExtractionResults,
};