Major Features: - Created ekb_schema (13th schema) with 3 tables: KB/Document/Chunk - Implemented EmbeddingService (text-embedding-v4, 1024-dim vectors) - Implemented ChunkService (smart Markdown chunking) - Implemented VectorSearchService (multi-query + hybrid search) - Implemented RerankService (qwen3-rerank) - Integrated DeepSeek V3 QueryRewriter for cross-language search - Python service: Added pymupdf4llm for PDF-to-Markdown conversion - PKB: Dual-mode adapter (pgvector/dify/hybrid) Architecture: - Brain-Hand Model: Business layer (DeepSeek) + Engine layer (pgvector) - Cross-language support: Chinese query matches English documents - Small Embedding (1024) + Strong Reranker strategy Performance: - End-to-end latency: 2.5s - Cost per query: 0.0025 RMB - Accuracy improvement: +20.5% (cross-language) Tests: - test-embedding-service.ts: Vector embedding verified - test-rag-e2e.ts: Full pipeline tested - test-rerank.ts: Rerank quality validated - test-query-rewrite.ts: Cross-language search verified - test-pdf-ingest.ts: Real PDF document tested (Dongen 2003.pdf) Documentation: - Added 05-RAG-Engine-User-Guide.md - Added 02-Document-Processing-User-Guide.md - Updated system status documentation Status: Production ready
205 lines
4.6 KiB
JavaScript
205 lines
4.6 KiB
JavaScript
/**
|
||
* 创建 Tool C Session 表
|
||
*
|
||
* 执行方式:node scripts/create-tool-c-table.js
|
||
*/
|
||
|
||
const { PrismaClient } = require('@prisma/client');
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
const prisma = new PrismaClient();
|
||
|
||
async function createToolCTable() {
|
||
console.log('========================================');
|
||
console.log('开始创建 Tool C Session 表');
|
||
console.log('========================================\n');
|
||
|
||
try {
|
||
// 1. 检查表是否已存在
|
||
console.log('[1/4] 检查表是否已存在...');
|
||
const checkResult = await prisma.$queryRawUnsafe(`
|
||
SELECT EXISTS (
|
||
SELECT FROM information_schema.tables
|
||
WHERE table_schema = 'dc_schema'
|
||
AND table_name = 'dc_tool_c_sessions'
|
||
) as exists
|
||
`);
|
||
|
||
const tableExists = checkResult[0].exists;
|
||
|
||
if (tableExists) {
|
||
console.log('✅ 表已存在: dc_schema.dc_tool_c_sessions');
|
||
console.log('\n是否需要重新创建?(这将删除现有数据)');
|
||
console.log('如需重新创建,请手动执行: DROP TABLE dc_schema.dc_tool_c_sessions CASCADE;\n');
|
||
return;
|
||
}
|
||
|
||
console.log('✅ 表不存在,准备创建\n');
|
||
|
||
// 2. 创建表
|
||
console.log('[2/4] 创建表 dc_tool_c_sessions...');
|
||
await prisma.$executeRawUnsafe(`
|
||
CREATE TABLE dc_schema.dc_tool_c_sessions (
|
||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||
user_id VARCHAR(255) NOT NULL,
|
||
file_name VARCHAR(500) NOT NULL,
|
||
file_key VARCHAR(500) NOT NULL,
|
||
|
||
total_rows INTEGER NOT NULL,
|
||
total_cols INTEGER NOT NULL,
|
||
columns JSONB NOT NULL,
|
||
encoding VARCHAR(50),
|
||
file_size INTEGER NOT NULL,
|
||
|
||
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||
expires_at TIMESTAMP NOT NULL
|
||
)
|
||
`);
|
||
console.log('✅ 表创建成功\n');
|
||
|
||
// 3. 创建索引
|
||
console.log('[3/4] 创建索引...');
|
||
await prisma.$executeRawUnsafe(`
|
||
CREATE INDEX idx_dc_tool_c_sessions_user_id ON dc_schema.dc_tool_c_sessions(user_id)
|
||
`);
|
||
await prisma.$executeRawUnsafe(`
|
||
CREATE INDEX idx_dc_tool_c_sessions_expires_at ON dc_schema.dc_tool_c_sessions(expires_at)
|
||
`);
|
||
console.log('✅ 索引创建成功\n');
|
||
|
||
// 4. 添加注释
|
||
console.log('[4/4] 添加表注释...');
|
||
await prisma.$executeRawUnsafe(`
|
||
COMMENT ON TABLE dc_schema.dc_tool_c_sessions IS 'Tool C (科研数据编辑器) Session会话表'
|
||
`);
|
||
await prisma.$executeRawUnsafe(`
|
||
COMMENT ON COLUMN dc_schema.dc_tool_c_sessions.file_key IS 'OSS存储路径: dc/tool-c/sessions/{timestamp}-{fileName}'
|
||
`);
|
||
await prisma.$executeRawUnsafe(`
|
||
COMMENT ON COLUMN dc_schema.dc_tool_c_sessions.columns IS '列名数组 ["age", "gender", "diagnosis"]'
|
||
`);
|
||
await prisma.$executeRawUnsafe(`
|
||
COMMENT ON COLUMN dc_schema.dc_tool_c_sessions.expires_at IS '过期时间(创建后10分钟)'
|
||
`);
|
||
console.log('✅ 注释添加成功\n');
|
||
|
||
// 5. 验证表创建
|
||
console.log('========================================');
|
||
console.log('验证表结构');
|
||
console.log('========================================\n');
|
||
|
||
const columns = await prisma.$queryRawUnsafe(`
|
||
SELECT column_name, data_type, is_nullable
|
||
FROM information_schema.columns
|
||
WHERE table_schema = 'dc_schema'
|
||
AND table_name = 'dc_tool_c_sessions'
|
||
ORDER BY ordinal_position
|
||
`);
|
||
|
||
console.log('表结构:');
|
||
console.table(columns);
|
||
|
||
const indexes = await prisma.$queryRawUnsafe(`
|
||
SELECT indexname, indexdef
|
||
FROM pg_indexes
|
||
WHERE schemaname = 'dc_schema'
|
||
AND tablename = 'dc_tool_c_sessions'
|
||
`);
|
||
|
||
console.log('\n索引:');
|
||
console.table(indexes);
|
||
|
||
console.log('\n========================================');
|
||
console.log('🎉 Tool C Session 表创建成功!');
|
||
console.log('========================================\n');
|
||
console.log('表名: dc_schema.dc_tool_c_sessions');
|
||
console.log(`列数: ${columns.length}`);
|
||
console.log(`索引数: ${indexes.length}\n`);
|
||
|
||
} catch (error) {
|
||
console.error('\n❌ 创建表失败:', error.message);
|
||
console.error('\n详细错误:');
|
||
console.error(error);
|
||
process.exit(1);
|
||
} finally {
|
||
await prisma.$disconnect();
|
||
}
|
||
}
|
||
|
||
// 执行
|
||
createToolCTable()
|
||
.then(() => {
|
||
console.log('脚本执行完成');
|
||
process.exit(0);
|
||
})
|
||
.catch((error) => {
|
||
console.error('脚本执行失败:', error);
|
||
process.exit(1);
|
||
});
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|