feat(asl): Complete Day 5 - Fulltext Screening Backend API Development
- Implement 5 core API endpoints (create task, get progress, get results, update decision, export Excel) - Add FulltextScreeningController with Zod validation (652 lines) - Implement ExcelExporter service with 4-sheet report generation (352 lines) - Register routes under /api/v1/asl/fulltext-screening - Create 31 REST Client test cases - Add automated integration test script - Fix PDF extraction fallback mechanism in LLM12FieldsService - Update API design documentation to v3.0 - Update development plan to v1.2 - Create Day 5 development record - Clean up temporary test files
This commit is contained in:
@@ -0,0 +1,351 @@
|
||||
/**
|
||||
* Excel导出服务
|
||||
*
|
||||
* 生成全文复筛结果的Excel文件,包含:
|
||||
* - Sheet 1: 纳入文献列表
|
||||
* - Sheet 2: 排除文献列表
|
||||
* - Sheet 3: PRISMA统计
|
||||
* - Sheet 4: 成本统计
|
||||
*/
|
||||
|
||||
import ExcelJS from 'exceljs';
|
||||
import { logger } from '../../../../common/logging/index.js';
|
||||
|
||||
export class ExcelExporter {
|
||||
/**
|
||||
* 生成全文复筛Excel
|
||||
*/
|
||||
async generateFulltextScreeningExcel(
|
||||
task: any,
|
||||
results: any[]
|
||||
): Promise<Buffer> {
|
||||
logger.info('Generating fulltext screening Excel', {
|
||||
taskId: task.id,
|
||||
resultsCount: results.length,
|
||||
});
|
||||
|
||||
const workbook = new ExcelJS.Workbook();
|
||||
workbook.creator = 'AI智能文献系统';
|
||||
workbook.created = new Date();
|
||||
|
||||
// Sheet 1: 纳入文献列表
|
||||
await this.createIncludedSheet(workbook, results);
|
||||
|
||||
// Sheet 2: 排除文献列表
|
||||
await this.createExcludedSheet(workbook, results);
|
||||
|
||||
// Sheet 3: PRISMA统计
|
||||
await this.createStatisticsSheet(workbook, task, results);
|
||||
|
||||
// Sheet 4: 成本统计
|
||||
await this.createCostSheet(workbook, task, results);
|
||||
|
||||
// 生成Buffer
|
||||
const buffer = await workbook.xlsx.writeBuffer();
|
||||
logger.info('Excel generated successfully', {
|
||||
sheetCount: workbook.worksheets.length,
|
||||
bufferSize: buffer.length,
|
||||
});
|
||||
|
||||
return buffer as Buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sheet 1: 纳入文献列表
|
||||
*/
|
||||
private async createIncludedSheet(workbook: ExcelJS.Workbook, results: any[]) {
|
||||
const sheet = workbook.addWorksheet('纳入文献列表');
|
||||
|
||||
// 设置列
|
||||
sheet.columns = [
|
||||
{ header: '序号', key: 'index', width: 8 },
|
||||
{ header: 'PMID', key: 'pmid', width: 12 },
|
||||
{ header: '文献来源', key: 'source', width: 30 },
|
||||
{ header: '标题', key: 'title', width: 60 },
|
||||
{ header: '期刊', key: 'journal', width: 30 },
|
||||
{ header: '年份', key: 'year', width: 10 },
|
||||
{ header: 'DOI', key: 'doi', width: 25 },
|
||||
{ header: '最终决策', key: 'decision', width: 12 },
|
||||
{ header: '数据质量', key: 'dataQuality', width: 12 },
|
||||
{ header: '模型一致性', key: 'consistency', width: 12 },
|
||||
{ header: '是否人工审核', key: 'isReviewed', width: 14 },
|
||||
];
|
||||
|
||||
// 样式:表头
|
||||
sheet.getRow(1).font = { bold: true };
|
||||
sheet.getRow(1).fill = {
|
||||
type: 'pattern',
|
||||
pattern: 'solid',
|
||||
fgColor: { argb: 'FF4472C4' },
|
||||
};
|
||||
sheet.getRow(1).font = { bold: true, color: { argb: 'FFFFFFFF' } };
|
||||
sheet.getRow(1).alignment = { vertical: 'middle', horizontal: 'center' };
|
||||
|
||||
// 筛选纳入的文献
|
||||
const includedResults = results.filter(
|
||||
(r) => r.finalDecision === 'include'
|
||||
);
|
||||
|
||||
// 填充数据
|
||||
includedResults.forEach((result, index) => {
|
||||
const lit = result.literature;
|
||||
const modelAOverall = result.modelAOverall as any;
|
||||
const modelBOverall = result.modelBOverall as any;
|
||||
|
||||
const consistency =
|
||||
modelAOverall?.decision === modelBOverall?.decision
|
||||
? '一致'
|
||||
: '不一致';
|
||||
|
||||
const dataQuality = modelAOverall?.dataQuality || modelBOverall?.dataQuality || '-';
|
||||
|
||||
sheet.addRow({
|
||||
index: index + 1,
|
||||
pmid: lit.pmid || '-',
|
||||
source: `${lit.authors?.split(',')[0] || 'Unknown'} ${lit.year || '-'}`,
|
||||
title: lit.title || '-',
|
||||
journal: lit.journal || '-',
|
||||
year: lit.year || '-',
|
||||
doi: lit.doi || '-',
|
||||
decision: '纳入',
|
||||
dataQuality,
|
||||
consistency,
|
||||
isReviewed: result.finalDecisionBy ? '是' : '否',
|
||||
});
|
||||
});
|
||||
|
||||
// 冻结首行
|
||||
sheet.views = [{ state: 'frozen', ySplit: 1 }];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sheet 2: 排除文献列表
|
||||
*/
|
||||
private async createExcludedSheet(workbook: ExcelJS.Workbook, results: any[]) {
|
||||
const sheet = workbook.addWorksheet('排除文献列表');
|
||||
|
||||
// 设置列
|
||||
sheet.columns = [
|
||||
{ header: '序号', key: 'index', width: 8 },
|
||||
{ header: 'PMID', key: 'pmid', width: 12 },
|
||||
{ header: '文献来源', key: 'source', width: 30 },
|
||||
{ header: '标题', key: 'title', width: 60 },
|
||||
{ header: '排除原因', key: 'reason', width: 50 },
|
||||
{ header: '排除字段', key: 'fields', width: 20 },
|
||||
{ header: '是否冲突', key: 'isConflict', width: 12 },
|
||||
{ header: '审核人', key: 'reviewer', width: 20 },
|
||||
{ header: '审核时间', key: 'reviewTime', width: 20 },
|
||||
];
|
||||
|
||||
// 样式:表头
|
||||
sheet.getRow(1).font = { bold: true };
|
||||
sheet.getRow(1).fill = {
|
||||
type: 'pattern',
|
||||
pattern: 'solid',
|
||||
fgColor: { argb: 'FFE74C3C' },
|
||||
};
|
||||
sheet.getRow(1).font = { bold: true, color: { argb: 'FFFFFFFF' } };
|
||||
sheet.getRow(1).alignment = { vertical: 'middle', horizontal: 'center' };
|
||||
|
||||
// 筛选排除的文献
|
||||
const excludedResults = results.filter(
|
||||
(r) => r.finalDecision === 'exclude'
|
||||
);
|
||||
|
||||
// 填充数据
|
||||
excludedResults.forEach((result, index) => {
|
||||
const lit = result.literature;
|
||||
|
||||
sheet.addRow({
|
||||
index: index + 1,
|
||||
pmid: lit.pmid || '-',
|
||||
source: `${lit.authors?.split(',')[0] || 'Unknown'} ${lit.year || '-'}`,
|
||||
title: lit.title || '-',
|
||||
reason: result.exclusionReason || '-',
|
||||
fields: result.conflictFields?.join(', ') || '-',
|
||||
isConflict: result.isConflict ? '是' : '否',
|
||||
reviewer: result.finalDecisionBy || '-',
|
||||
reviewTime: result.finalDecisionAt
|
||||
? new Date(result.finalDecisionAt).toLocaleString('zh-CN')
|
||||
: '-',
|
||||
});
|
||||
});
|
||||
|
||||
// 冻结首行
|
||||
sheet.views = [{ state: 'frozen', ySplit: 1 }];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sheet 3: PRISMA统计
|
||||
*/
|
||||
private async createStatisticsSheet(
|
||||
workbook: ExcelJS.Workbook,
|
||||
task: any,
|
||||
results: any[]
|
||||
) {
|
||||
const sheet = workbook.addWorksheet('PRISMA统计');
|
||||
|
||||
// 统计数据
|
||||
const total = results.length;
|
||||
const included = results.filter((r) => r.finalDecision === 'include').length;
|
||||
const excluded = results.filter((r) => r.finalDecision === 'exclude').length;
|
||||
const pending = total - included - excluded;
|
||||
const conflictCount = results.filter((r) => r.isConflict).length;
|
||||
const reviewedCount = results.filter((r) => r.finalDecisionBy).length;
|
||||
|
||||
// 排除原因统计
|
||||
const exclusionReasons: Record<string, number> = {};
|
||||
results
|
||||
.filter((r) => r.finalDecision === 'exclude' && r.exclusionReason)
|
||||
.forEach((r) => {
|
||||
const reason = r.exclusionReason as string;
|
||||
exclusionReasons[reason] = (exclusionReasons[reason] || 0) + 1;
|
||||
});
|
||||
|
||||
// 设置列宽
|
||||
sheet.getColumn(1).width = 30;
|
||||
sheet.getColumn(2).width = 15;
|
||||
sheet.getColumn(3).width = 15;
|
||||
|
||||
// 标题
|
||||
sheet.mergeCells('A1:C1');
|
||||
const titleCell = sheet.getCell('A1');
|
||||
titleCell.value = '全文复筛PRISMA统计';
|
||||
titleCell.font = { size: 16, bold: true };
|
||||
titleCell.alignment = { horizontal: 'center', vertical: 'middle' };
|
||||
titleCell.fill = {
|
||||
type: 'pattern',
|
||||
pattern: 'solid',
|
||||
fgColor: { argb: 'FF2E86AB' },
|
||||
};
|
||||
titleCell.font = { size: 16, bold: true, color: { argb: 'FFFFFFFF' } };
|
||||
sheet.getRow(1).height = 30;
|
||||
|
||||
// 总体统计
|
||||
let currentRow = 3;
|
||||
sheet.addRow(['统计项', '数量', '百分比']);
|
||||
sheet.getRow(currentRow).font = { bold: true };
|
||||
sheet.getRow(currentRow).fill = {
|
||||
type: 'pattern',
|
||||
pattern: 'solid',
|
||||
fgColor: { argb: 'FFD0D0D0' },
|
||||
};
|
||||
|
||||
currentRow++;
|
||||
sheet.addRow(['全文复筛总数', total, '100%']);
|
||||
sheet.addRow(['最终纳入', included, `${((included / total) * 100).toFixed(1)}%`]);
|
||||
sheet.addRow(['最终排除', excluded, `${((excluded / total) * 100).toFixed(1)}%`]);
|
||||
sheet.addRow(['待审核', pending, `${((pending / total) * 100).toFixed(1)}%`]);
|
||||
sheet.addRow(['模型冲突数', conflictCount, `${((conflictCount / total) * 100).toFixed(1)}%`]);
|
||||
sheet.addRow(['人工审核数', reviewedCount, `${((reviewedCount / total) * 100).toFixed(1)}%`]);
|
||||
|
||||
// 空行
|
||||
currentRow += 7;
|
||||
sheet.addRow([]);
|
||||
|
||||
// 排除原因详细统计
|
||||
currentRow++;
|
||||
sheet.addRow(['排除原因', '数量', '占排除比例']);
|
||||
sheet.getRow(currentRow).font = { bold: true };
|
||||
sheet.getRow(currentRow).fill = {
|
||||
type: 'pattern',
|
||||
pattern: 'solid',
|
||||
fgColor: { argb: 'FFD0D0D0' },
|
||||
};
|
||||
|
||||
currentRow++;
|
||||
Object.entries(exclusionReasons)
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.forEach(([reason, count]) => {
|
||||
sheet.addRow([
|
||||
reason,
|
||||
count,
|
||||
excluded > 0 ? `${((count / excluded) * 100).toFixed(1)}%` : '0%',
|
||||
]);
|
||||
});
|
||||
|
||||
// 设置数字列格式
|
||||
sheet.getColumn(2).numFmt = '0';
|
||||
}
|
||||
|
||||
/**
|
||||
* Sheet 4: 成本统计
|
||||
*/
|
||||
private async createCostSheet(
|
||||
workbook: ExcelJS.Workbook,
|
||||
task: any,
|
||||
results: any[]
|
||||
) {
|
||||
const sheet = workbook.addWorksheet('成本统计');
|
||||
|
||||
// 设置列宽
|
||||
sheet.getColumn(1).width = 30;
|
||||
sheet.getColumn(2).width = 25;
|
||||
|
||||
// 标题
|
||||
sheet.mergeCells('A1:B1');
|
||||
const titleCell = sheet.getCell('A1');
|
||||
titleCell.value = '全文复筛成本统计';
|
||||
titleCell.font = { size: 16, bold: true };
|
||||
titleCell.alignment = { horizontal: 'center', vertical: 'middle' };
|
||||
titleCell.fill = {
|
||||
type: 'pattern',
|
||||
pattern: 'solid',
|
||||
fgColor: { argb: 'FF27AE60' },
|
||||
};
|
||||
titleCell.font = { size: 16, bold: true, color: { argb: 'FFFFFFFF' } };
|
||||
sheet.getRow(1).height = 30;
|
||||
|
||||
// 成本数据
|
||||
const totalTokens = task.totalTokens || 0;
|
||||
const totalCost = task.totalCost || 0;
|
||||
const processedCount = task.processedCount || 1;
|
||||
const avgCostPerLit = processedCount > 0 ? totalCost / processedCount : 0;
|
||||
const avgTokensPerLit = processedCount > 0 ? Math.round(totalTokens / processedCount) : 0;
|
||||
|
||||
// 时间统计
|
||||
const startedAt = task.startedAt ? new Date(task.startedAt) : null;
|
||||
const completedAt = task.completedAt ? new Date(task.completedAt) : new Date();
|
||||
const totalTimeMs = startedAt ? completedAt.getTime() - startedAt.getTime() : 0;
|
||||
const totalTimeSeconds = Math.round(totalTimeMs / 1000);
|
||||
const avgTimePerLit = processedCount > 0 ? Math.round(totalTimeMs / processedCount / 1000) : 0;
|
||||
|
||||
// 填充数据
|
||||
let currentRow = 3;
|
||||
sheet.addRow(['项目', '值']);
|
||||
sheet.getRow(currentRow).font = { bold: true };
|
||||
sheet.getRow(currentRow).fill = {
|
||||
type: 'pattern',
|
||||
pattern: 'solid',
|
||||
fgColor: { argb: 'FFD0D0D0' },
|
||||
};
|
||||
|
||||
currentRow++;
|
||||
sheet.addRow(['模型组合', `${task.modelA} + ${task.modelB}`]);
|
||||
sheet.addRow(['处理文献数', processedCount]);
|
||||
sheet.addRow(['成功处理数', task.successCount || 0]);
|
||||
sheet.addRow(['降级处理数', task.degradedCount || 0]);
|
||||
sheet.addRow(['失败处理数', task.failedCount || 0]);
|
||||
sheet.addRow([]);
|
||||
|
||||
sheet.addRow(['Token使用统计', '']);
|
||||
sheet.getRow(currentRow + 6).font = { bold: true };
|
||||
sheet.addRow(['总Token数', totalTokens.toLocaleString()]);
|
||||
sheet.addRow(['平均Token/篇', avgTokensPerLit.toLocaleString()]);
|
||||
sheet.addRow([]);
|
||||
|
||||
sheet.addRow(['成本统计', '']);
|
||||
sheet.getRow(currentRow + 10).font = { bold: true };
|
||||
sheet.addRow(['总成本(元)', `¥${totalCost.toFixed(4)}`]);
|
||||
sheet.addRow(['平均成本/篇(元)', `¥${avgCostPerLit.toFixed(4)}`]);
|
||||
sheet.addRow([]);
|
||||
|
||||
sheet.addRow(['时间统计', '']);
|
||||
sheet.getRow(currentRow + 14).font = { bold: true };
|
||||
sheet.addRow(['总处理时间', `${Math.floor(totalTimeSeconds / 60)}分${totalTimeSeconds % 60}秒`]);
|
||||
sheet.addRow(['平均时间/篇', `${avgTimePerLit}秒`]);
|
||||
sheet.addRow(['开始时间', startedAt ? startedAt.toLocaleString('zh-CN') : '-']);
|
||||
sheet.addRow(['完成时间', completedAt ? completedAt.toLocaleString('zh-CN') : '-']);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user