Files
AIclinicalresearch/backend/scripts/create-tool-c-table.js
HaHafeng 4c6eaaecbf feat(dc): Implement Postgres-Only async architecture and performance optimization
Summary:
- Implement async file upload processing (Platform-Only pattern)
- Add parseExcelWorker with pg-boss queue
- Implement React Query polling mechanism
- Add clean data caching (avoid duplicate parsing)
- Fix pivot single-value column tuple issue
- Optimize performance by 99 percent

Technical Details:

1. Async Architecture (Postgres-Only):
   - SessionService.createSession: Fast upload + push to queue (3s)
   - parseExcelWorker: Background parsing + save clean data (53s)
   - SessionController.getSessionStatus: Status query API for polling
   - React Query Hook: useSessionStatus (auto-serial polling)
   - Frontend progress bar with real-time feedback

2. Performance Optimization:
   - Clean data caching: Worker saves processed data to OSS
   - getPreviewData: Read from clean data cache (0.5s vs 43s, -99 percent)
   - getFullData: Read from clean data cache (0.5s vs 43s, -99 percent)
   - Intelligent cleaning: Boundary detection + ghost column/row removal
   - Safety valve: Max 3000 columns, 5M cells

3. Bug Fixes:
   - Fix pivot column name tuple issue for single value column
   - Fix queue name format (colon to underscore: asl:screening -> asl_screening)
   - Fix polling storm (15+ concurrent requests -> 1 serial request)
   - Fix QUEUE_TYPE environment variable (memory -> pgboss)
   - Fix logger import in PgBossQueue
   - Fix formatSession to return cleanDataKey
   - Fix saveProcessedData to update clean data synchronously

4. Database Changes:
   - ALTER TABLE dc_tool_c_sessions ADD COLUMN clean_data_key VARCHAR(1000)
   - ALTER TABLE dc_tool_c_sessions ALTER COLUMN total_rows DROP NOT NULL
   - ALTER TABLE dc_tool_c_sessions ALTER COLUMN total_cols DROP NOT NULL
   - ALTER TABLE dc_tool_c_sessions ALTER COLUMN columns DROP NOT NULL

5. Documentation:
   - Create Postgres-Only async task processing guide (588 lines)
   - Update Tool C status document (Day 10 summary)
   - Update DC module status document
   - Update system overview document
   - Update cloud-native development guide

Performance Improvements:
- Upload + preview: 96s -> 53.5s (-44 percent)
- Filter operation: 44s -> 2.5s (-94 percent)
- Pivot operation: 45s -> 2.5s (-94 percent)
- Concurrent requests: 15+ -> 1 (-93 percent)
- Complete workflow (upload + 7 ops): 404s -> 70.5s (-83 percent)

Files Changed:
- Backend: 15 files (Worker, Service, Controller, Schema, Config)
- Frontend: 4 files (Hook, Component, API)
- Docs: 4 files (Guide, Status, Overview, Spec)
- Database: 4 column modifications
- Total: ~1388 lines of new/modified code

Status: Fully tested and verified, production ready
2025-12-22 21:30:31 +08:00

163 lines
4.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 创建 Tool C Session 表
*
* 执行方式node scripts/create-tool-c-table.js
*/
const { PrismaClient } = require('@prisma/client');
const fs = require('fs');
const path = require('path');
const prisma = new PrismaClient();
async function createToolCTable() {
console.log('========================================');
console.log('开始创建 Tool C Session 表');
console.log('========================================\n');
try {
// 1. 检查表是否已存在
console.log('[1/4] 检查表是否已存在...');
const checkResult = await prisma.$queryRawUnsafe(`
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_schema = 'dc_schema'
AND table_name = 'dc_tool_c_sessions'
) as exists
`);
const tableExists = checkResult[0].exists;
if (tableExists) {
console.log('✅ 表已存在: dc_schema.dc_tool_c_sessions');
console.log('\n是否需要重新创建(这将删除现有数据)');
console.log('如需重新创建,请手动执行: DROP TABLE dc_schema.dc_tool_c_sessions CASCADE;\n');
return;
}
console.log('✅ 表不存在,准备创建\n');
// 2. 创建表
console.log('[2/4] 创建表 dc_tool_c_sessions...');
await prisma.$executeRawUnsafe(`
CREATE TABLE dc_schema.dc_tool_c_sessions (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id VARCHAR(255) NOT NULL,
file_name VARCHAR(500) NOT NULL,
file_key VARCHAR(500) NOT NULL,
total_rows INTEGER NOT NULL,
total_cols INTEGER NOT NULL,
columns JSONB NOT NULL,
encoding VARCHAR(50),
file_size INTEGER NOT NULL,
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
expires_at TIMESTAMP NOT NULL
)
`);
console.log('✅ 表创建成功\n');
// 3. 创建索引
console.log('[3/4] 创建索引...');
await prisma.$executeRawUnsafe(`
CREATE INDEX idx_dc_tool_c_sessions_user_id ON dc_schema.dc_tool_c_sessions(user_id)
`);
await prisma.$executeRawUnsafe(`
CREATE INDEX idx_dc_tool_c_sessions_expires_at ON dc_schema.dc_tool_c_sessions(expires_at)
`);
console.log('✅ 索引创建成功\n');
// 4. 添加注释
console.log('[4/4] 添加表注释...');
await prisma.$executeRawUnsafe(`
COMMENT ON TABLE dc_schema.dc_tool_c_sessions IS 'Tool C (科研数据编辑器) Session会话表'
`);
await prisma.$executeRawUnsafe(`
COMMENT ON COLUMN dc_schema.dc_tool_c_sessions.file_key IS 'OSS存储路径: dc/tool-c/sessions/{timestamp}-{fileName}'
`);
await prisma.$executeRawUnsafe(`
COMMENT ON COLUMN dc_schema.dc_tool_c_sessions.columns IS '列名数组 ["age", "gender", "diagnosis"]'
`);
await prisma.$executeRawUnsafe(`
COMMENT ON COLUMN dc_schema.dc_tool_c_sessions.expires_at IS '过期时间创建后10分钟'
`);
console.log('✅ 注释添加成功\n');
// 5. 验证表创建
console.log('========================================');
console.log('验证表结构');
console.log('========================================\n');
const columns = await prisma.$queryRawUnsafe(`
SELECT column_name, data_type, is_nullable
FROM information_schema.columns
WHERE table_schema = 'dc_schema'
AND table_name = 'dc_tool_c_sessions'
ORDER BY ordinal_position
`);
console.log('表结构:');
console.table(columns);
const indexes = await prisma.$queryRawUnsafe(`
SELECT indexname, indexdef
FROM pg_indexes
WHERE schemaname = 'dc_schema'
AND tablename = 'dc_tool_c_sessions'
`);
console.log('\n索引:');
console.table(indexes);
console.log('\n========================================');
console.log('🎉 Tool C Session 表创建成功!');
console.log('========================================\n');
console.log('表名: dc_schema.dc_tool_c_sessions');
console.log(`列数: ${columns.length}`);
console.log(`索引数: ${indexes.length}\n`);
} catch (error) {
console.error('\n❌ 创建表失败:', error.message);
console.error('\n详细错误:');
console.error(error);
process.exit(1);
} finally {
await prisma.$disconnect();
}
}
// 执行
createToolCTable()
.then(() => {
console.log('脚本执行完成');
process.exit(0);
})
.catch((error) => {
console.error('脚本执行失败:', error);
process.exit(1);
});