Files
AIclinicalresearch/backend/scripts/dedupe_open_equeries.ts
HaHafeng a666649fd4 feat(iit): harden QC pipeline consistency and release artifacts
Implement IIT quality workflow hardening across eQuery deduplication, guard metadata validation, timeline/readability improvements, and chat evidence fallbacks, then synchronize release and development documentation for deployment handoff.

Includes migration/scripts for open eQuery dedupe guards, orchestration/status semantics, report/tool readability fixes, and updated module status plus deployment checklist.

Made-with: Cursor
2026-03-08 21:54:35 +08:00

146 lines
3.9 KiB
TypeScript

import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
function parseArgs() {
const args = process.argv.slice(2);
const apply = args.includes('--apply');
const projectId = args.find((a) => !a.startsWith('--'));
if (!projectId) {
throw new Error('Usage: npx tsx scripts/dedupe_open_equeries.ts <projectId> [--apply]');
}
return { projectId, apply };
}
async function main() {
const { projectId, apply } = parseArgs();
const summaryRows = await prisma.$queryRawUnsafe<Array<{
open_total: bigint;
duplicate_groups: bigint;
duplicate_rows: bigint;
}>>(
`
WITH grouped AS (
SELECT
record_id,
COALESCE(event_id, '') AS event_id_norm,
COALESCE(category, '') AS category_norm,
COUNT(*)::bigint AS cnt
FROM iit_schema.equery
WHERE project_id = $1
AND status IN ('pending', 'responded', 'reviewing', 'reopened')
GROUP BY 1,2,3
)
SELECT
(
SELECT COUNT(*)::bigint
FROM iit_schema.equery
WHERE project_id = $1
AND status IN ('pending', 'responded', 'reviewing', 'reopened')
) AS open_total,
COUNT(*) FILTER (WHERE cnt > 1)::bigint AS duplicate_groups,
COALESCE(SUM(cnt - 1) FILTER (WHERE cnt > 1), 0)::bigint AS duplicate_rows
FROM grouped
`,
projectId,
);
const sample = await prisma.$queryRawUnsafe<Array<{
record_id: string;
event_id_norm: string;
category_norm: string;
cnt: bigint;
}>>(
`
SELECT
record_id,
COALESCE(event_id, '') AS event_id_norm,
COALESCE(category, '') AS category_norm,
COUNT(*)::bigint AS cnt
FROM iit_schema.equery
WHERE project_id = $1
AND status IN ('pending', 'responded', 'reviewing', 'reopened')
GROUP BY 1,2,3
HAVING COUNT(*) > 1
ORDER BY cnt DESC, record_id ASC
LIMIT 20
`,
projectId,
);
let updatedRows = 0;
if (apply) {
const updated = await prisma.$executeRawUnsafe(
`
WITH ranked AS (
SELECT
id,
ROW_NUMBER() OVER (
PARTITION BY
project_id,
record_id,
COALESCE(event_id, ''),
COALESCE(category, '')
ORDER BY
CASE status
WHEN 'reviewing' THEN 4
WHEN 'responded' THEN 3
WHEN 'reopened' THEN 2
WHEN 'pending' THEN 1
ELSE 0
END DESC,
updated_at DESC NULLS LAST,
created_at DESC NULLS LAST,
id DESC
) AS rn
FROM iit_schema.equery
WHERE project_id = $1
AND status IN ('pending', 'responded', 'reviewing', 'reopened')
)
UPDATE iit_schema.equery e
SET
status = 'auto_closed',
closed_at = COALESCE(e.closed_at, NOW()),
closed_by = COALESCE(e.closed_by, 'system_dedupe_script'),
resolution = COALESCE(
NULLIF(e.resolution, ''),
'自动去重收敛:同一受试者/事件/规则已存在未关闭 eQuery'
),
updated_at = NOW()
FROM ranked r
WHERE e.id = r.id
AND r.rn > 1
`,
projectId,
);
updatedRows = Number(updated);
}
const result = {
projectId,
mode: apply ? 'apply' : 'dry-run',
openTotal: Number(summaryRows[0]?.open_total || 0),
duplicateGroups: Number(summaryRows[0]?.duplicate_groups || 0),
duplicateRows: Number(summaryRows[0]?.duplicate_rows || 0),
updatedRows,
sampleTopGroups: sample.map((x) => ({
recordId: x.record_id,
eventId: x.event_id_norm || '(empty)',
category: x.category_norm || '(empty)',
count: Number(x.cnt),
})),
};
console.log(JSON.stringify(result, null, 2));
}
main()
.catch((e) => {
console.error(e);
process.exit(1);
})
.finally(async () => {
await prisma.$disconnect();
});