feat(asl/extraction): Complete Tool 3 M1+M2 - skeleton pipeline and HITL workbench
M1 Skeleton Pipeline: - Scatter-dispatch + Aggregator polling pattern (PgBoss) - PKB ACL bridge (PkbBridgeService -> PkbExportService DTOs) - ExtractionSingleWorker with DeepSeek-V3 LLM extraction - PermanentExtractionError for non-retryable failures - Phantom Retry Guard (idempotent worker) - 3-step minimal frontend (Setup -> Progress -> Workbench) - 4 new DB tables (extraction_templates, project_templates, tasks, results) - 3 system templates seed (RCT, Cohort, QC) - M1 integration test suite M2 HITL Workbench: - MinerU VLM integration for high-fidelity table extraction - XML-isolated DynamicPromptBuilder with flat JSON output template - fuzzyQuoteMatch validator (3-tier confidence scoring) - SSE real-time logging via ExtractionEventBus - Schema-driven ExtractionDrawer (dynamic field rendering from template) - Excel wide-table export with flattenModuleData normalization - M2 integration test suite Critical Fixes (data normalization): - DynamicPromptBuilder: explicit flat key-value output format with example - ExtractionExcelExporter: handle both array and flat data formats - ExtractionDrawer: schema-driven rendering instead of hardcoded fields - ExtractionValidator: array-format quote verification support - SSE route: Fastify register encapsulation to bypass auth for EventSource - LLM JSON sanitizer: strip illegal control chars before JSON.parse Also includes: RVW stats verification spec, SSA expert config guide Tested: M1 pipeline test + M2 HITL test + manual frontend verification Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -528,6 +528,97 @@ export async function getDeepResearchTask(
|
||||
return request(`/research/tasks/${taskId}`);
|
||||
}
|
||||
|
||||
// ==================== 工具 3:全文智能提取 API ====================
|
||||
|
||||
export async function getExtractionTemplates(): Promise<ApiResponse<any[]>> {
|
||||
return request('/extraction/templates');
|
||||
}
|
||||
|
||||
export async function getExtractionTemplate(templateId: string): Promise<ApiResponse<any>> {
|
||||
return request(`/extraction/templates/${templateId}`);
|
||||
}
|
||||
|
||||
export async function cloneExtractionTemplate(
|
||||
projectId: string,
|
||||
baseTemplateId: string
|
||||
): Promise<ApiResponse<any>> {
|
||||
return request('/extraction/templates/clone', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ projectId, baseTemplateId }),
|
||||
});
|
||||
}
|
||||
|
||||
export async function getExtractionKnowledgeBases(): Promise<ApiResponse<any[]>> {
|
||||
return request('/extraction/knowledge-bases');
|
||||
}
|
||||
|
||||
export async function getExtractionDocuments(kbId: string): Promise<ApiResponse<any[]>> {
|
||||
return request(`/extraction/knowledge-bases/${kbId}/documents`);
|
||||
}
|
||||
|
||||
export async function createExtractionTask(params: {
|
||||
projectId: string;
|
||||
projectTemplateId: string;
|
||||
pkbKnowledgeBaseId: string;
|
||||
documentIds: string[];
|
||||
idempotencyKey?: string;
|
||||
}): Promise<ApiResponse<{ taskId: string }>> {
|
||||
return request('/extraction/tasks', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify(params),
|
||||
});
|
||||
}
|
||||
|
||||
export async function getExtractionTaskStatus(
|
||||
taskId: string
|
||||
): Promise<ApiResponse<{
|
||||
taskId: string;
|
||||
status: string;
|
||||
totalCount: number;
|
||||
completedCount: number;
|
||||
errorCount: number;
|
||||
extractingCount: number;
|
||||
pendingCount: number;
|
||||
percent: number;
|
||||
}>> {
|
||||
return request(`/extraction/tasks/${taskId}`);
|
||||
}
|
||||
|
||||
export async function getExtractionTaskResults(
|
||||
taskId: string
|
||||
): Promise<ApiResponse<any[]>> {
|
||||
return request(`/extraction/tasks/${taskId}/results`);
|
||||
}
|
||||
|
||||
export async function getExtractionResultDetail(
|
||||
resultId: string
|
||||
): Promise<ApiResponse<any>> {
|
||||
return request(`/extraction/results/${resultId}`);
|
||||
}
|
||||
|
||||
export async function reviewExtractionResult(
|
||||
resultId: string,
|
||||
data: { reviewStatus: 'approved' | 'rejected' }
|
||||
): Promise<ApiResponse<any>> {
|
||||
return request(`/extraction/results/${resultId}/review`, {
|
||||
method: 'PUT',
|
||||
body: JSON.stringify(data),
|
||||
});
|
||||
}
|
||||
|
||||
export async function exportExtractionResults(
|
||||
taskId: string
|
||||
): Promise<Blob> {
|
||||
const response = await fetch(
|
||||
`${API_BASE_URL}/extraction/tasks/${taskId}/export`,
|
||||
{ headers: getAuthHeaders() }
|
||||
);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}`);
|
||||
}
|
||||
return response.blob();
|
||||
}
|
||||
|
||||
// ==================== 统一导出API对象 ====================
|
||||
|
||||
/**
|
||||
@@ -584,4 +675,17 @@ export const aslApi = {
|
||||
generateRequirement,
|
||||
executeDeepResearchTask,
|
||||
getDeepResearchTask,
|
||||
|
||||
// 工具 3:全文智能提取
|
||||
getExtractionTemplates,
|
||||
getExtractionTemplate,
|
||||
cloneExtractionTemplate,
|
||||
getExtractionKnowledgeBases,
|
||||
getExtractionDocuments,
|
||||
createExtractionTask,
|
||||
getExtractionTaskStatus,
|
||||
getExtractionTaskResults,
|
||||
getExtractionResultDetail,
|
||||
reviewExtractionResult,
|
||||
exportExtractionResults,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user