Files
AIclinicalresearch/backend/src/modules/asl/fulltext-screening/services/ExcelExporter.ts
HaHafeng 91cab452d1 fix(dc/tool-c): Fix special character handling and improve UX
Major fixes:
- Fix pivot transformation with special characters in column names
- Fix compute column validation for Chinese punctuation
- Fix recode dialog to fetch unique values from full dataset via new API
- Add column mapping mechanism to handle special characters

Database migration:
- Add column_mapping field to dc_tool_c_sessions table
- Migration file: 20251208_add_column_mapping

UX improvements:
- Darken table grid lines for better visibility
- Reduce column width by 40% with tooltip support
- Insert new columns next to source columns
- Preserve original row order after operations
- Add notice about 50-row preview limit

Modified files:
- Backend: SessionService, SessionController, QuickActionService, routes
- Python: pivot.py, compute.py, recode.py, binning.py, conditional.py
- Frontend: DataGrid, RecodeDialog, index.tsx, ag-grid-custom.css
- Database: schema.prisma, migration SQL

Status: Code complete, database migrated, ready for testing
2025-12-08 23:20:55 +08:00

369 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Excel导出服务
*
* 生成全文复筛结果的Excel文件包含
* - Sheet 1: 纳入文献列表
* - Sheet 2: 排除文献列表
* - Sheet 3: PRISMA统计
* - Sheet 4: 成本统计
*/
import ExcelJS from 'exceljs';
import { logger } from '../../../../common/logging/index.js';
export class ExcelExporter {
/**
* 生成全文复筛Excel
*/
async generateFulltextScreeningExcel(
task: any,
results: any[]
): Promise<Buffer> {
logger.info('Generating fulltext screening Excel', {
taskId: task.id,
resultsCount: results.length,
});
const workbook = new ExcelJS.Workbook();
workbook.creator = 'AI智能文献系统';
workbook.created = new Date();
// Sheet 1: 纳入文献列表
await this.createIncludedSheet(workbook, results);
// Sheet 2: 排除文献列表
await this.createExcludedSheet(workbook, results);
// Sheet 3: PRISMA统计
await this.createStatisticsSheet(workbook, task, results);
// Sheet 4: 成本统计
await this.createCostSheet(workbook, task, results);
// 生成Buffer
const buffer = await workbook.xlsx.writeBuffer();
logger.info('Excel generated successfully', {
sheetCount: workbook.worksheets.length,
bufferSize: buffer.length,
});
return buffer as Buffer;
}
/**
* Sheet 1: 纳入文献列表
*/
private async createIncludedSheet(workbook: ExcelJS.Workbook, results: any[]) {
const sheet = workbook.addWorksheet('纳入文献列表');
// 设置列
sheet.columns = [
{ header: '序号', key: 'index', width: 8 },
{ header: 'PMID', key: 'pmid', width: 12 },
{ header: '文献来源', key: 'source', width: 30 },
{ header: '标题', key: 'title', width: 60 },
{ header: '期刊', key: 'journal', width: 30 },
{ header: '年份', key: 'year', width: 10 },
{ header: 'DOI', key: 'doi', width: 25 },
{ header: '最终决策', key: 'decision', width: 12 },
{ header: '数据质量', key: 'dataQuality', width: 12 },
{ header: '模型一致性', key: 'consistency', width: 12 },
{ header: '是否人工审核', key: 'isReviewed', width: 14 },
];
// 样式:表头
sheet.getRow(1).font = { bold: true };
sheet.getRow(1).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FF4472C4' },
};
sheet.getRow(1).font = { bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).alignment = { vertical: 'middle', horizontal: 'center' };
// 筛选纳入的文献
const includedResults = results.filter(
(r) => r.finalDecision === 'include'
);
// 填充数据
includedResults.forEach((result, index) => {
const lit = result.literature;
const modelAOverall = result.modelAOverall as any;
const modelBOverall = result.modelBOverall as any;
const consistency =
modelAOverall?.decision === modelBOverall?.decision
? '一致'
: '不一致';
const dataQuality = modelAOverall?.dataQuality || modelBOverall?.dataQuality || '-';
sheet.addRow({
index: index + 1,
pmid: lit.pmid || '-',
source: `${lit.authors?.split(',')[0] || 'Unknown'} ${lit.year || '-'}`,
title: lit.title || '-',
journal: lit.journal || '-',
year: lit.year || '-',
doi: lit.doi || '-',
decision: '纳入',
dataQuality,
consistency,
isReviewed: result.finalDecisionBy ? '是' : '否',
});
});
// 冻结首行
sheet.views = [{ state: 'frozen', ySplit: 1 }];
}
/**
* Sheet 2: 排除文献列表
*/
private async createExcludedSheet(workbook: ExcelJS.Workbook, results: any[]) {
const sheet = workbook.addWorksheet('排除文献列表');
// 设置列
sheet.columns = [
{ header: '序号', key: 'index', width: 8 },
{ header: 'PMID', key: 'pmid', width: 12 },
{ header: '文献来源', key: 'source', width: 30 },
{ header: '标题', key: 'title', width: 60 },
{ header: '排除原因', key: 'reason', width: 50 },
{ header: '排除字段', key: 'fields', width: 20 },
{ header: '是否冲突', key: 'isConflict', width: 12 },
{ header: '审核人', key: 'reviewer', width: 20 },
{ header: '审核时间', key: 'reviewTime', width: 20 },
];
// 样式:表头
sheet.getRow(1).font = { bold: true };
sheet.getRow(1).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFE74C3C' },
};
sheet.getRow(1).font = { bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).alignment = { vertical: 'middle', horizontal: 'center' };
// 筛选排除的文献
const excludedResults = results.filter(
(r) => r.finalDecision === 'exclude'
);
// 填充数据
excludedResults.forEach((result, index) => {
const lit = result.literature;
sheet.addRow({
index: index + 1,
pmid: lit.pmid || '-',
source: `${lit.authors?.split(',')[0] || 'Unknown'} ${lit.year || '-'}`,
title: lit.title || '-',
reason: result.exclusionReason || '-',
fields: result.conflictFields?.join(', ') || '-',
isConflict: result.isConflict ? '是' : '否',
reviewer: result.finalDecisionBy || '-',
reviewTime: result.finalDecisionAt
? new Date(result.finalDecisionAt).toLocaleString('zh-CN')
: '-',
});
});
// 冻结首行
sheet.views = [{ state: 'frozen', ySplit: 1 }];
}
/**
* Sheet 3: PRISMA统计
*/
private async createStatisticsSheet(
workbook: ExcelJS.Workbook,
task: any,
results: any[]
) {
const sheet = workbook.addWorksheet('PRISMA统计');
// 统计数据
const total = results.length;
const included = results.filter((r) => r.finalDecision === 'include').length;
const excluded = results.filter((r) => r.finalDecision === 'exclude').length;
const pending = total - included - excluded;
const conflictCount = results.filter((r) => r.isConflict).length;
const reviewedCount = results.filter((r) => r.finalDecisionBy).length;
// 排除原因统计
const exclusionReasons: Record<string, number> = {};
results
.filter((r) => r.finalDecision === 'exclude' && r.exclusionReason)
.forEach((r) => {
const reason = r.exclusionReason as string;
exclusionReasons[reason] = (exclusionReasons[reason] || 0) + 1;
});
// 设置列宽
sheet.getColumn(1).width = 30;
sheet.getColumn(2).width = 15;
sheet.getColumn(3).width = 15;
// 标题
sheet.mergeCells('A1:C1');
const titleCell = sheet.getCell('A1');
titleCell.value = '全文复筛PRISMA统计';
titleCell.font = { size: 16, bold: true };
titleCell.alignment = { horizontal: 'center', vertical: 'middle' };
titleCell.fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FF2E86AB' },
};
titleCell.font = { size: 16, bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).height = 30;
// 总体统计
let currentRow = 3;
sheet.addRow(['统计项', '数量', '百分比']);
sheet.getRow(currentRow).font = { bold: true };
sheet.getRow(currentRow).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFD0D0D0' },
};
currentRow++;
sheet.addRow(['全文复筛总数', total, '100%']);
sheet.addRow(['最终纳入', included, `${((included / total) * 100).toFixed(1)}%`]);
sheet.addRow(['最终排除', excluded, `${((excluded / total) * 100).toFixed(1)}%`]);
sheet.addRow(['待审核', pending, `${((pending / total) * 100).toFixed(1)}%`]);
sheet.addRow(['模型冲突数', conflictCount, `${((conflictCount / total) * 100).toFixed(1)}%`]);
sheet.addRow(['人工审核数', reviewedCount, `${((reviewedCount / total) * 100).toFixed(1)}%`]);
// 空行
currentRow += 7;
sheet.addRow([]);
// 排除原因详细统计
currentRow++;
sheet.addRow(['排除原因', '数量', '占排除比例']);
sheet.getRow(currentRow).font = { bold: true };
sheet.getRow(currentRow).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFD0D0D0' },
};
currentRow++;
Object.entries(exclusionReasons)
.sort((a, b) => b[1] - a[1])
.forEach(([reason, count]) => {
sheet.addRow([
reason,
count,
excluded > 0 ? `${((count / excluded) * 100).toFixed(1)}%` : '0%',
]);
});
// 设置数字列格式
sheet.getColumn(2).numFmt = '0';
}
/**
* Sheet 4: 成本统计
*/
private async createCostSheet(
workbook: ExcelJS.Workbook,
task: any,
results: any[]
) {
const sheet = workbook.addWorksheet('成本统计');
// 设置列宽
sheet.getColumn(1).width = 30;
sheet.getColumn(2).width = 25;
// 标题
sheet.mergeCells('A1:B1');
const titleCell = sheet.getCell('A1');
titleCell.value = '全文复筛成本统计';
titleCell.font = { size: 16, bold: true };
titleCell.alignment = { horizontal: 'center', vertical: 'middle' };
titleCell.fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FF27AE60' },
};
titleCell.font = { size: 16, bold: true, color: { argb: 'FFFFFFFF' } };
sheet.getRow(1).height = 30;
// 成本数据
const totalTokens = task.totalTokens || 0;
const totalCost = task.totalCost || 0;
const processedCount = task.processedCount || 1;
const avgCostPerLit = processedCount > 0 ? totalCost / processedCount : 0;
const avgTokensPerLit = processedCount > 0 ? Math.round(totalTokens / processedCount) : 0;
// 时间统计
const startedAt = task.startedAt ? new Date(task.startedAt) : null;
const completedAt = task.completedAt ? new Date(task.completedAt) : new Date();
const totalTimeMs = startedAt ? completedAt.getTime() - startedAt.getTime() : 0;
const totalTimeSeconds = Math.round(totalTimeMs / 1000);
const avgTimePerLit = processedCount > 0 ? Math.round(totalTimeMs / processedCount / 1000) : 0;
// 填充数据
let currentRow = 3;
sheet.addRow(['项目', '值']);
sheet.getRow(currentRow).font = { bold: true };
sheet.getRow(currentRow).fill = {
type: 'pattern',
pattern: 'solid',
fgColor: { argb: 'FFD0D0D0' },
};
currentRow++;
sheet.addRow(['模型组合', `${task.modelA} + ${task.modelB}`]);
sheet.addRow(['处理文献数', processedCount]);
sheet.addRow(['成功处理数', task.successCount || 0]);
sheet.addRow(['降级处理数', task.degradedCount || 0]);
sheet.addRow(['失败处理数', task.failedCount || 0]);
sheet.addRow([]);
sheet.addRow(['Token使用统计', '']);
sheet.getRow(currentRow + 6).font = { bold: true };
sheet.addRow(['总Token数', totalTokens.toLocaleString()]);
sheet.addRow(['平均Token/篇', avgTokensPerLit.toLocaleString()]);
sheet.addRow([]);
sheet.addRow(['成本统计', '']);
sheet.getRow(currentRow + 10).font = { bold: true };
sheet.addRow(['总成本(元)', `¥${totalCost.toFixed(4)}`]);
sheet.addRow(['平均成本/篇(元)', `¥${avgCostPerLit.toFixed(4)}`]);
sheet.addRow([]);
sheet.addRow(['时间统计', '']);
sheet.getRow(currentRow + 14).font = { bold: true };
sheet.addRow(['总处理时间', `${Math.floor(totalTimeSeconds / 60)}${totalTimeSeconds % 60}`]);
sheet.addRow(['平均时间/篇', `${avgTimePerLit}`]);
sheet.addRow(['开始时间', startedAt ? startedAt.toLocaleString('zh-CN') : '-']);
sheet.addRow(['完成时间', completedAt ? completedAt.toLocaleString('zh-CN') : '-']);
}
}