feat(asl): Complete Day 5 - Fulltext Screening Backend API Development

- Implement 5 core API endpoints (create task, get progress, get results, update decision, export Excel)
- Add FulltextScreeningController with Zod validation (652 lines)
- Implement ExcelExporter service with 4-sheet report generation (352 lines)
- Register routes under /api/v1/asl/fulltext-screening
- Create 31 REST Client test cases
- Add automated integration test script
- Fix PDF extraction fallback mechanism in LLM12FieldsService
- Update API design documentation to v3.0
- Update development plan to v1.2
- Create Day 5 development record
- Clean up temporary test files
This commit is contained in:
2025-11-23 10:52:07 +08:00
parent 08aa3f6c28
commit 88cc049fb3
232 changed files with 7780 additions and 441 deletions

View File

@@ -0,0 +1,351 @@
/**
* Excel导出服务
*
* 生成全文复筛结果的Excel文件包含
* - Sheet 1: 纳入文献列表
* - Sheet 2: 排除文献列表
* - Sheet 3: PRISMA统计
* - Sheet 4: 成本统计
*/
import ExcelJS from 'exceljs';
import { logger } from '../../../../common/logging/index.js';
export class ExcelExporter {
/**
* 生成全文复筛Excel
*/
async generateFulltextScreeningExcel(
task: any,
results: any[]
): Promise<Buffer> {
logger.info('Generating fulltext screening Excel', {
taskId: task.id,
resultsCount: results.length,
});
const workbook = new ExcelJS.Workbook();
workbook.creator = 'AI智能文献系统';
workbook.created = new Date();
// Sheet 1: 纳入文献列表
await this.createIncludedSheet(workbook, results);
// Sheet 2: 排除文献列表
await this.createExcludedSheet(workbook, results);
// Sheet 3: PRISMA统计
await this.createStatisticsSheet(workbook, task, results);
// Sheet 4: 成本统计
await this.createCostSheet(workbook, task, results);
// 生成Buffer
const buffer = await workbook.xlsx.writeBuffer();
logger.info('Excel generated successfully', {
sheetCount: workbook.worksheets.length,
bufferSize: buffer.length,
});
return buffer as Buffer;
}
/**
* Sheet 1: 纳入文献列表
*/
private async createIncludedSheet(workbook: ExcelJS.Workbook, results: any[]) {
const sheet = workbook.addWorksheet('纳入文献列表');
// 设置列
sheet.columns = [
{ header: '序号', key: 'index', width: 8 },
{ header: 'PMID', key: 'pmid', width: 12 },
{ header: '文献来源', key: 'source', width: 30 },
{ header: '标题', key: 'title', width: 60 },
{ header: '期刊', key: 'journal', width: 30 },
{ header: '年份', key: 'year', width: 10 },
{ header: 'DOI', key: 'doi', width: 25 },
{ header: '最终决策', key: 'decision', width: 12 },
{ header: '数据质量', key: 'dataQuality', width: 12 },
{ header: '模型一致性', key: 'consistency', width: 12 },
{ header: '是否人工审核', key: 'isReviewed', width: 14 },
];
// 样式:表头
sheet.getRow(1).font = { bold: true };
sheet.getRow(1).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FF4472C4' },
};
sheet.getRow(1).font = { bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).alignment = { vertical: 'middle', horizontal: 'center' };
// 筛选纳入的文献
const includedResults = results.filter(
(r) => r.finalDecision === 'include'
);
// 填充数据
includedResults.forEach((result, index) => {
const lit = result.literature;
const modelAOverall = result.modelAOverall as any;
const modelBOverall = result.modelBOverall as any;
const consistency =
modelAOverall?.decision === modelBOverall?.decision
? '一致'
: '不一致';
const dataQuality = modelAOverall?.dataQuality || modelBOverall?.dataQuality || '-';
sheet.addRow({
index: index + 1,
pmid: lit.pmid || '-',
source: `${lit.authors?.split(',')[0] || 'Unknown'} ${lit.year || '-'}`,
title: lit.title || '-',
journal: lit.journal || '-',
year: lit.year || '-',
doi: lit.doi || '-',
decision: '纳入',
dataQuality,
consistency,
isReviewed: result.finalDecisionBy ? '是' : '否',
});
});
// 冻结首行
sheet.views = [{ state: 'frozen', ySplit: 1 }];
}
/**
* Sheet 2: 排除文献列表
*/
private async createExcludedSheet(workbook: ExcelJS.Workbook, results: any[]) {
const sheet = workbook.addWorksheet('排除文献列表');
// 设置列
sheet.columns = [
{ header: '序号', key: 'index', width: 8 },
{ header: 'PMID', key: 'pmid', width: 12 },
{ header: '文献来源', key: 'source', width: 30 },
{ header: '标题', key: 'title', width: 60 },
{ header: '排除原因', key: 'reason', width: 50 },
{ header: '排除字段', key: 'fields', width: 20 },
{ header: '是否冲突', key: 'isConflict', width: 12 },
{ header: '审核人', key: 'reviewer', width: 20 },
{ header: '审核时间', key: 'reviewTime', width: 20 },
];
// 样式:表头
sheet.getRow(1).font = { bold: true };
sheet.getRow(1).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFE74C3C' },
};
sheet.getRow(1).font = { bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).alignment = { vertical: 'middle', horizontal: 'center' };
// 筛选排除的文献
const excludedResults = results.filter(
(r) => r.finalDecision === 'exclude'
);
// 填充数据
excludedResults.forEach((result, index) => {
const lit = result.literature;
sheet.addRow({
index: index + 1,
pmid: lit.pmid || '-',
source: `${lit.authors?.split(',')[0] || 'Unknown'} ${lit.year || '-'}`,
title: lit.title || '-',
reason: result.exclusionReason || '-',
fields: result.conflictFields?.join(', ') || '-',
isConflict: result.isConflict ? '是' : '否',
reviewer: result.finalDecisionBy || '-',
reviewTime: result.finalDecisionAt
? new Date(result.finalDecisionAt).toLocaleString('zh-CN')
: '-',
});
});
// 冻结首行
sheet.views = [{ state: 'frozen', ySplit: 1 }];
}
/**
* Sheet 3: PRISMA统计
*/
private async createStatisticsSheet(
workbook: ExcelJS.Workbook,
task: any,
results: any[]
) {
const sheet = workbook.addWorksheet('PRISMA统计');
// 统计数据
const total = results.length;
const included = results.filter((r) => r.finalDecision === 'include').length;
const excluded = results.filter((r) => r.finalDecision === 'exclude').length;
const pending = total - included - excluded;
const conflictCount = results.filter((r) => r.isConflict).length;
const reviewedCount = results.filter((r) => r.finalDecisionBy).length;
// 排除原因统计
const exclusionReasons: Record<string, number> = {};
results
.filter((r) => r.finalDecision === 'exclude' && r.exclusionReason)
.forEach((r) => {
const reason = r.exclusionReason as string;
exclusionReasons[reason] = (exclusionReasons[reason] || 0) + 1;
});
// 设置列宽
sheet.getColumn(1).width = 30;
sheet.getColumn(2).width = 15;
sheet.getColumn(3).width = 15;
// 标题
sheet.mergeCells('A1:C1');
const titleCell = sheet.getCell('A1');
titleCell.value = '全文复筛PRISMA统计';
titleCell.font = { size: 16, bold: true };
titleCell.alignment = { horizontal: 'center', vertical: 'middle' };
titleCell.fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FF2E86AB' },
};
titleCell.font = { size: 16, bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).height = 30;
// 总体统计
let currentRow = 3;
sheet.addRow(['统计项', '数量', '百分比']);
sheet.getRow(currentRow).font = { bold: true };
sheet.getRow(currentRow).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFD0D0D0' },
};
currentRow++;
sheet.addRow(['全文复筛总数', total, '100%']);
sheet.addRow(['最终纳入', included, `${((included / total) * 100).toFixed(1)}%`]);
sheet.addRow(['最终排除', excluded, `${((excluded / total) * 100).toFixed(1)}%`]);
sheet.addRow(['待审核', pending, `${((pending / total) * 100).toFixed(1)}%`]);
sheet.addRow(['模型冲突数', conflictCount, `${((conflictCount / total) * 100).toFixed(1)}%`]);
sheet.addRow(['人工审核数', reviewedCount, `${((reviewedCount / total) * 100).toFixed(1)}%`]);
// 空行
currentRow += 7;
sheet.addRow([]);
// 排除原因详细统计
currentRow++;
sheet.addRow(['排除原因', '数量', '占排除比例']);
sheet.getRow(currentRow).font = { bold: true };
sheet.getRow(currentRow).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFD0D0D0' },
};
currentRow++;
Object.entries(exclusionReasons)
.sort((a, b) => b[1] - a[1])
.forEach(([reason, count]) => {
sheet.addRow([
reason,
count,
excluded > 0 ? `${((count / excluded) * 100).toFixed(1)}%` : '0%',
]);
});
// 设置数字列格式
sheet.getColumn(2).numFmt = '0';
}
/**
* Sheet 4: 成本统计
*/
private async createCostSheet(
workbook: ExcelJS.Workbook,
task: any,
results: any[]
) {
const sheet = workbook.addWorksheet('成本统计');
// 设置列宽
sheet.getColumn(1).width = 30;
sheet.getColumn(2).width = 25;
// 标题
sheet.mergeCells('A1:B1');
const titleCell = sheet.getCell('A1');
titleCell.value = '全文复筛成本统计';
titleCell.font = { size: 16, bold: true };
titleCell.alignment = { horizontal: 'center', vertical: 'middle' };
titleCell.fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FF27AE60' },
};
titleCell.font = { size: 16, bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).height = 30;
// 成本数据
const totalTokens = task.totalTokens || 0;
const totalCost = task.totalCost || 0;
const processedCount = task.processedCount || 1;
const avgCostPerLit = processedCount > 0 ? totalCost / processedCount : 0;
const avgTokensPerLit = processedCount > 0 ? Math.round(totalTokens / processedCount) : 0;
// 时间统计
const startedAt = task.startedAt ? new Date(task.startedAt) : null;
const completedAt = task.completedAt ? new Date(task.completedAt) : new Date();
const totalTimeMs = startedAt ? completedAt.getTime() - startedAt.getTime() : 0;
const totalTimeSeconds = Math.round(totalTimeMs / 1000);
const avgTimePerLit = processedCount > 0 ? Math.round(totalTimeMs / processedCount / 1000) : 0;
// 填充数据
let currentRow = 3;
sheet.addRow(['项目', '值']);
sheet.getRow(currentRow).font = { bold: true };
sheet.getRow(currentRow).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFD0D0D0' },
};
currentRow++;
sheet.addRow(['模型组合', `${task.modelA} + ${task.modelB}`]);
sheet.addRow(['处理文献数', processedCount]);
sheet.addRow(['成功处理数', task.successCount || 0]);
sheet.addRow(['降级处理数', task.degradedCount || 0]);
sheet.addRow(['失败处理数', task.failedCount || 0]);
sheet.addRow([]);
sheet.addRow(['Token使用统计', '']);
sheet.getRow(currentRow + 6).font = { bold: true };
sheet.addRow(['总Token数', totalTokens.toLocaleString()]);
sheet.addRow(['平均Token/篇', avgTokensPerLit.toLocaleString()]);
sheet.addRow([]);
sheet.addRow(['成本统计', '']);
sheet.getRow(currentRow + 10).font = { bold: true };
sheet.addRow(['总成本(元)', `¥${totalCost.toFixed(4)}`]);
sheet.addRow(['平均成本/篇(元)', `¥${avgCostPerLit.toFixed(4)}`]);
sheet.addRow([]);
sheet.addRow(['时间统计', '']);
sheet.getRow(currentRow + 14).font = { bold: true };
sheet.addRow(['总处理时间', `${Math.floor(totalTimeSeconds / 60)}${totalTimeSeconds % 60}`]);
sheet.addRow(['平均时间/篇', `${avgTimePerLit}`]);
sheet.addRow(['开始时间', startedAt ? startedAt.toLocaleString('zh-CN') : '-']);
sheet.addRow(['完成时间', completedAt ? completedAt.toLocaleString('zh-CN') : '-']);
}
}