import { PrismaClient } from '@prisma/client'; const prisma = new PrismaClient(); function parseArgs() { const args = process.argv.slice(2); const apply = args.includes('--apply'); const projectId = args.find((a) => !a.startsWith('--')); if (!projectId) { throw new Error('Usage: npx tsx scripts/dedupe_open_equeries.ts [--apply]'); } return { projectId, apply }; } async function main() { const { projectId, apply } = parseArgs(); const summaryRows = await prisma.$queryRawUnsafe>( ` WITH grouped AS ( SELECT record_id, COALESCE(event_id, '') AS event_id_norm, COALESCE(category, '') AS category_norm, COUNT(*)::bigint AS cnt FROM iit_schema.equery WHERE project_id = $1 AND status IN ('pending', 'responded', 'reviewing', 'reopened') GROUP BY 1,2,3 ) SELECT ( SELECT COUNT(*)::bigint FROM iit_schema.equery WHERE project_id = $1 AND status IN ('pending', 'responded', 'reviewing', 'reopened') ) AS open_total, COUNT(*) FILTER (WHERE cnt > 1)::bigint AS duplicate_groups, COALESCE(SUM(cnt - 1) FILTER (WHERE cnt > 1), 0)::bigint AS duplicate_rows FROM grouped `, projectId, ); const sample = await prisma.$queryRawUnsafe>( ` SELECT record_id, COALESCE(event_id, '') AS event_id_norm, COALESCE(category, '') AS category_norm, COUNT(*)::bigint AS cnt FROM iit_schema.equery WHERE project_id = $1 AND status IN ('pending', 'responded', 'reviewing', 'reopened') GROUP BY 1,2,3 HAVING COUNT(*) > 1 ORDER BY cnt DESC, record_id ASC LIMIT 20 `, projectId, ); let updatedRows = 0; if (apply) { const updated = await prisma.$executeRawUnsafe( ` WITH ranked AS ( SELECT id, ROW_NUMBER() OVER ( PARTITION BY project_id, record_id, COALESCE(event_id, ''), COALESCE(category, '') ORDER BY CASE status WHEN 'reviewing' THEN 4 WHEN 'responded' THEN 3 WHEN 'reopened' THEN 2 WHEN 'pending' THEN 1 ELSE 0 END DESC, updated_at DESC NULLS LAST, created_at DESC NULLS LAST, id DESC ) AS rn FROM iit_schema.equery WHERE project_id = $1 AND status IN ('pending', 'responded', 'reviewing', 'reopened') ) UPDATE iit_schema.equery e SET status = 'auto_closed', closed_at = COALESCE(e.closed_at, NOW()), closed_by = COALESCE(e.closed_by, 'system_dedupe_script'), resolution = COALESCE( NULLIF(e.resolution, ''), '自动去重收敛:同一受试者/事件/规则已存在未关闭 eQuery' ), updated_at = NOW() FROM ranked r WHERE e.id = r.id AND r.rn > 1 `, projectId, ); updatedRows = Number(updated); } const result = { projectId, mode: apply ? 'apply' : 'dry-run', openTotal: Number(summaryRows[0]?.open_total || 0), duplicateGroups: Number(summaryRows[0]?.duplicate_groups || 0), duplicateRows: Number(summaryRows[0]?.duplicate_rows || 0), updatedRows, sampleTopGroups: sample.map((x) => ({ recordId: x.record_id, eventId: x.event_id_norm || '(empty)', category: x.category_norm || '(empty)', count: Number(x.cnt), })), }; console.log(JSON.stringify(result, null, 2)); } main() .catch((e) => { console.error(e); process.exit(1); }) .finally(async () => { await prisma.$disconnect(); });