Files
AIclinicalresearch/backend/scripts/create-tool-c-ai-history-table.mjs
HaHafeng 40c2f8e148 feat(rag): Complete RAG engine implementation with pgvector
Major Features:
- Created ekb_schema (13th schema) with 3 tables: KB/Document/Chunk
- Implemented EmbeddingService (text-embedding-v4, 1024-dim vectors)
- Implemented ChunkService (smart Markdown chunking)
- Implemented VectorSearchService (multi-query + hybrid search)
- Implemented RerankService (qwen3-rerank)
- Integrated DeepSeek V3 QueryRewriter for cross-language search
- Python service: Added pymupdf4llm for PDF-to-Markdown conversion
- PKB: Dual-mode adapter (pgvector/dify/hybrid)

Architecture:
- Brain-Hand Model: Business layer (DeepSeek) + Engine layer (pgvector)
- Cross-language support: Chinese query matches English documents
- Small Embedding (1024) + Strong Reranker strategy

Performance:
- End-to-end latency: 2.5s
- Cost per query: 0.0025 RMB
- Accuracy improvement: +20.5% (cross-language)

Tests:
- test-embedding-service.ts: Vector embedding verified
- test-rag-e2e.ts: Full pipeline tested
- test-rerank.ts: Rerank quality validated
- test-query-rewrite.ts: Cross-language search verified
- test-pdf-ingest.ts: Real PDF document tested (Dongen 2003.pdf)

Documentation:
- Added 05-RAG-Engine-User-Guide.md
- Added 02-Document-Processing-User-Guide.md
- Updated system status documentation

Status: Production ready
2026-01-21 20:24:29 +08:00

218 lines
4.8 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 创建 Tool C AI对话历史表
*
* 执行方式node scripts/create-tool-c-ai-history-table.mjs
*/
import { PrismaClient } from '@prisma/client';
const prisma = new PrismaClient();
async function createAiHistoryTable() {
console.log('========================================');
console.log('开始创建 Tool C AI对话历史表');
console.log('========================================\n');
try {
// 1. 检查表是否已存在
console.log('[1/4] 检查表是否已存在...');
const checkResult = await prisma.$queryRawUnsafe(`
SELECT EXISTS (
SELECT FROM information_schema.tables
WHERE table_schema = 'dc_schema'
AND table_name = 'dc_tool_c_ai_history'
) as exists
`);
const tableExists = checkResult[0].exists;
if (tableExists) {
console.log('✅ 表已存在: dc_schema.dc_tool_c_ai_history');
console.log('\n如需重新创建请手动执行: DROP TABLE dc_schema.dc_tool_c_ai_history CASCADE;\n');
return;
}
console.log('✅ 表不存在,准备创建\n');
// 2. 创建表
console.log('[2/4] 创建表 dc_tool_c_ai_history...');
await prisma.$executeRawUnsafe(`
CREATE TABLE dc_schema.dc_tool_c_ai_history (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
session_id VARCHAR(255) NOT NULL,
user_id VARCHAR(255) NOT NULL,
role VARCHAR(50) NOT NULL,
content TEXT NOT NULL,
-- Tool C特有字段
generated_code TEXT,
code_explanation TEXT,
execute_status VARCHAR(50),
execute_result JSONB,
execute_error TEXT,
retry_count INTEGER DEFAULT 0,
-- LLM相关
model VARCHAR(100),
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
`);
console.log('✅ 表创建成功\n');
// 3. 创建索引
console.log('[3/4] 创建索引...');
await prisma.$executeRawUnsafe(`
CREATE INDEX idx_dc_tool_c_ai_history_session_id
ON dc_schema.dc_tool_c_ai_history(session_id)
`);
await prisma.$executeRawUnsafe(`
CREATE INDEX idx_dc_tool_c_ai_history_user_id
ON dc_schema.dc_tool_c_ai_history(user_id)
`);
await prisma.$executeRawUnsafe(`
CREATE INDEX idx_dc_tool_c_ai_history_created_at
ON dc_schema.dc_tool_c_ai_history(created_at)
`);
console.log('✅ 索引创建成功\n');
// 4. 添加注释
console.log('[4/4] 添加表注释...');
await prisma.$executeRawUnsafe(`
COMMENT ON TABLE dc_schema.dc_tool_c_ai_history
IS 'Tool C (科研数据编辑器) AI对话历史表'
`);
await prisma.$executeRawUnsafe(`
COMMENT ON COLUMN dc_schema.dc_tool_c_ai_history.session_id
IS '关联Tool C Session ID'
`);
await prisma.$executeRawUnsafe(`
COMMENT ON COLUMN dc_schema.dc_tool_c_ai_history.generated_code
IS 'AI生成的Pandas代码'
`);
await prisma.$executeRawUnsafe(`
COMMENT ON COLUMN dc_schema.dc_tool_c_ai_history.execute_status
IS '执行状态: pending/success/failed'
`);
await prisma.$executeRawUnsafe(`
COMMENT ON COLUMN dc_schema.dc_tool_c_ai_history.retry_count
IS '自我修正重试次数'
`);
console.log('✅ 注释添加成功\n');
// 5. 验证表创建
console.log('========================================');
console.log('验证表结构');
console.log('========================================\n');
const columns = await prisma.$queryRawUnsafe(`
SELECT column_name, data_type, is_nullable
FROM information_schema.columns
WHERE table_schema = 'dc_schema'
AND table_name = 'dc_tool_c_ai_history'
ORDER BY ordinal_position
`);
console.log('表结构:');
console.table(columns);
const indexes = await prisma.$queryRawUnsafe(`
SELECT indexname, indexdef
FROM pg_indexes
WHERE schemaname = 'dc_schema'
AND tablename = 'dc_tool_c_ai_history'
`);
console.log('\n索引:');
console.table(indexes);
console.log('\n========================================');
console.log('🎉 Tool C AI对话历史表创建成功');
console.log('========================================\n');
console.log('表名: dc_schema.dc_tool_c_ai_history');
console.log(`列数: ${columns.length}`);
console.log(`索引数: ${indexes.length}\n`);
} catch (error) {
console.error('\n❌ 创建表失败:', error.message);
console.error('\n详细错误:');
console.error(error);
process.exit(1);
} finally {
await prisma.$disconnect();
}
}
// 执行
createAiHistoryTable()
.then(() => {
console.log('脚本执行完成');
process.exit(0);
})
.catch((error) => {
console.error('脚本执行失败:', error);
process.exit(1);
});