Files
AIclinicalresearch/backend/src/common/jobs/utils.ts
HaHafeng 4ed67a8846 fix(admin): Fix Prompt management list not showing version info and add debug diagnostics
Summary:
- Fix Prompt list API response schema missing activeVersion and draftVersion fields
- Fastify was filtering out undefined schema fields, causing version columns to show empty
- Add detailed diagnostic logging for Prompt debug mode troubleshooting
- Verify debug mode works correctly (DRAFT version is used when debug enabled)

Changes:
- backend/src/common/prompt/prompt.routes.ts: Add activeVersion and draftVersion to response schema
- backend/src/common/prompt/prompt.service.ts: Add diagnostic logs for setDebugMode and get methods
- PKB module: Various authentication and document handling fixes from previous session

Tested: Debug mode verified working - v2 DRAFT version correctly loaded when debug enabled
2026-01-13 22:22:10 +08:00

324 lines
7.2 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 任务拆分工具函数
*
* 用于将长时间任务拆分成多个小任务,避免:
* - SAE 30秒超时
* - pg-boss 24小时任务过期
* - 任务失败时重做所有工作
*
* 核心策略:
* - 文献筛选每批20-50篇
* - 数据提取每批10-20条
* - 统计分析:按数据集大小动态调整
*/
/**
* 任务类型的拆分策略
*/
export interface ChunkStrategy {
/** 任务类型标识 */
type: string
/** 每批处理的数据量 */
chunkSize: number
/** 最大批次数(防止过度拆分) */
maxChunks?: number
/** 描述 */
description: string
}
/**
* 预定义的拆分策略
*
* 根据实际业务场景和性能测试数据配置
*/
export const CHUNK_STRATEGIES: Record<string, ChunkStrategy> = {
// ASL模块文献筛选
'asl:screening:title-abstract': {
type: 'asl:screening:title-abstract',
chunkSize: 50, // 每批50篇LLM API较快
maxChunks: 100, // 最多100批5000篇
description: '标题/摘要筛选 - 每批50篇'
},
'asl:screening:full-text': {
type: 'asl:screening:full-text',
chunkSize: 20, // 每批20篇全文较慢
maxChunks: 50, // 最多50批1000篇
description: '全文筛选 - 每批20篇'
},
'asl:extraction': {
type: 'asl:extraction',
chunkSize: 30, // 每批30篇
maxChunks: 50,
description: '数据提取 - 每批30篇'
},
// DC模块数据清洗
'dc:clean:batch': {
type: 'dc:clean:batch',
chunkSize: 100, // 每批100行
maxChunks: 100,
description: '数据清洗 - 每批100行'
},
'dc:extract:medical-record': {
type: 'dc:extract:medical-record',
chunkSize: 10, // 每批10份病历AI提取较慢
maxChunks: 100,
description: '病历提取 - 每批10份'
},
// SSA模块统计分析
'ssa:analysis:batch': {
type: 'ssa:analysis:batch',
chunkSize: 1000, // 每批1000条数据
maxChunks: 50,
description: '统计分析 - 每批1000条'
},
// 默认策略
'default': {
type: 'default',
chunkSize: 50,
maxChunks: 100,
description: '默认策略 - 每批50条'
}
}
/**
* 将数据数组拆分成多个批次
*
* @param items 要拆分的数据数组
* @param chunkSize 每批的大小
* @returns 拆分后的批次数组
*
* @example
* ```typescript
* const ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
* const batches = splitIntoChunks(ids, 3)
* // 结果: [[1,2,3], [4,5,6], [7,8,9], [10]]
* ```
*/
export function splitIntoChunks<T>(items: T[], chunkSize: number): T[][] {
if (chunkSize <= 0) {
throw new Error('chunkSize must be positive')
}
if (items.length === 0) {
return []
}
const chunks: T[][] = []
for (let i = 0; i < items.length; i += chunkSize) {
chunks.push(items.slice(i, i + chunkSize))
}
return chunks
}
/**
* 根据任务类型推荐批次大小
*
* @param taskType 任务类型(如:'asl:screening:title-abstract'
* @param totalItems 总数据量
* @returns 推荐的批次大小
*
* @example
* ```typescript
* const chunkSize = recommendChunkSize('asl:screening:title-abstract', 1000)
* // 返回: 50 (根据CHUNK_STRATEGIES配置)
* ```
*/
export function recommendChunkSize(taskType: string, totalItems: number): number {
// 查找对应的策略
const strategy = CHUNK_STRATEGIES[taskType] || CHUNK_STRATEGIES['default']
let chunkSize = strategy.chunkSize
// 如果总量很小,不拆分
if (totalItems <= chunkSize) {
return totalItems
}
// 如果拆分后批次数超过maxChunks增大chunkSize
if (strategy.maxChunks) {
const predictedChunks = Math.ceil(totalItems / chunkSize)
if (predictedChunks > strategy.maxChunks) {
chunkSize = Math.ceil(totalItems / strategy.maxChunks)
console.log(
`[TaskSplit] Adjusted chunkSize to ${chunkSize} to limit chunks to ${strategy.maxChunks}`
)
}
}
return chunkSize
}
/**
* 计算任务拆分信息
*
* @param taskType 任务类型
* @param totalItems 总数据量
* @returns 拆分信息
*
* @example
* ```typescript
* const info = calculateSplitInfo('asl:screening:title-abstract', 1000)
* // 返回: { chunkSize: 50, totalChunks: 20, strategy: {...} }
* ```
*/
export function calculateSplitInfo(taskType: string, totalItems: number) {
const strategy = CHUNK_STRATEGIES[taskType] || CHUNK_STRATEGIES['default']
const chunkSize = recommendChunkSize(taskType, totalItems)
const totalChunks = Math.ceil(totalItems / chunkSize)
return {
taskType,
totalItems,
chunkSize,
totalChunks,
strategy,
avgItemsPerChunk: totalChunks > 0 ? Math.round(totalItems / totalChunks) : 0,
lastChunkSize: totalItems % chunkSize || chunkSize
}
}
/**
* 获取批次索引的人类可读描述
*
* @param batchIndex 批次索引从0开始
* @param totalBatches 总批次数
* @returns 描述字符串
*
* @example
* ```typescript
* getBatchDescription(0, 20) // "批次 1/20"
* getBatchDescription(19, 20) // "批次 20/20最后一批"
* ```
*/
export function getBatchDescription(batchIndex: number, totalBatches: number): string {
const humanIndex = batchIndex + 1
if (humanIndex === totalBatches) {
return `批次 ${humanIndex}/${totalBatches}(最后一批)`
}
return `批次 ${humanIndex}/${totalBatches}`
}
/**
* 估算批次执行时间(秒)
*
* 基于经验值估算,用于前端显示预计完成时间
*
* @param taskType 任务类型
* @param batchSize 批次大小
* @returns 估算的执行时间(秒)
*/
export function estimateBatchDuration(taskType: string, batchSize: number): number {
// 每项平均处理时间(秒)
const TIME_PER_ITEM: Record<string, number> = {
'asl:screening:title-abstract': 0.5, // 0.5秒/篇含LLM调用
'asl:screening:full-text': 2, // 2秒/篇
'asl:extraction': 3, // 3秒/篇
'dc:clean:batch': 0.1, // 0.1秒/行
'dc:extract:medical-record': 5, // 5秒/份
'ssa:analysis:batch': 0.01, // 0.01秒/条
'default': 1 // 1秒/条
}
const timePerItem = TIME_PER_ITEM[taskType] || TIME_PER_ITEM['default']
return Math.ceil(batchSize * timePerItem)
}
/**
* 验证批次索引是否有效
*
* @param batchIndex 批次索引
* @param totalBatches 总批次数
* @throws Error 如果索引无效
*/
export function validateBatchIndex(batchIndex: number, totalBatches: number): void {
if (batchIndex < 0 || batchIndex >= totalBatches) {
throw new Error(
`Invalid batch index: ${batchIndex}. Must be between 0 and ${totalBatches - 1}`
)
}
}
/**
* 从数组中提取指定批次的数据
*
* @param items 完整数据数组
* @param batchIndex 批次索引从0开始
* @param chunkSize 批次大小
* @returns 该批次的数据
*
* @example
* ```typescript
* const ids = [1,2,3,4,5,6,7,8,9,10]
* getBatchItems(ids, 0, 3) // [1,2,3]
* getBatchItems(ids, 1, 3) // [4,5,6]
* getBatchItems(ids, 3, 3) // [10]
* ```
*/
export function getBatchItems<T>(
items: T[],
batchIndex: number,
chunkSize: number
): T[] {
const start = batchIndex * chunkSize
const end = Math.min(start + chunkSize, items.length)
return items.slice(start, end)
}