docs: complete documentation system (250+ files)

- System architecture and design documentation
- Business module docs (ASL/AIA/PKB/RVW/DC/SSA/ST)
- ASL module complete design (quality assurance, tech selection)
- Platform layer and common capabilities docs
- Development standards and API specifications
- Deployment and operations guides
- Project management and milestone tracking
- Architecture implementation reports
- Documentation templates and guides
This commit is contained in:
2025-11-16 15:43:55 +08:00
parent 0fe6821a89
commit e52020409c
173 changed files with 46227 additions and 11964 deletions

View File

@@ -0,0 +1,128 @@
-- ========================================
-- 001-create-all-10-schemas.sql
-- ========================================
-- 目的创建10个Schema3个详细 + 7个空
-- 执行时间约5秒
-- 作者AI助手
-- 日期2025-11-09
-- ========================================
-- 提示请在PostgreSQL数据库中执行此脚本
-- 执行前确认DATABASE_URL指向正确的数据库
BEGIN;
-- ========================================
-- 第一部分创建3个详细SchemaWeek 1迁移
-- ========================================
-- 1. Platform Schema - 平台基础层
CREATE SCHEMA IF NOT EXISTS platform_schema;
COMMENT ON SCHEMA platform_schema IS '平台基础层 - 用户管理、权限控制、认证服务';
-- 2. AIA Schema - AI智能问答
CREATE SCHEMA IF NOT EXISTS aia_schema;
COMMENT ON SCHEMA aia_schema IS 'AI智能问答模块 - 对话管理、项目管理、通用对话';
-- 3. PKB Schema - 个人知识库
CREATE SCHEMA IF NOT EXISTS pkb_schema;
COMMENT ON SCHEMA pkb_schema IS '个人知识库模块 - 知识库管理、文档管理、批处理任务';
-- ========================================
-- 第二部分创建7个空Schema命名空间预留
-- ========================================
-- 4. ASL Schema - AI智能文献Week 3再设计表
CREATE SCHEMA IF NOT EXISTS asl_schema;
COMMENT ON SCHEMA asl_schema IS 'AI智能文献筛选模块 - Week 3开发前再设计表结构';
-- 5. Common Schema - 通用能力层(需要时再创建表)
CREATE SCHEMA IF NOT EXISTS common_schema;
COMMENT ON SCHEMA common_schema IS '通用能力层 - LLM使用记录、Feature Flags、配额管理等';
-- 6. DC Schema - 数据清洗模块
CREATE SCHEMA IF NOT EXISTS dc_schema;
COMMENT ON SCHEMA dc_schema IS '数据清洗工具模块';
-- 7. RVW Schema - 审稿系统
CREATE SCHEMA IF NOT EXISTS rvw_schema;
COMMENT ON SCHEMA rvw_schema IS '稿件审查系统模块 - 包含review_tasks表';
-- 8. ADMIN Schema - 运营管理
CREATE SCHEMA IF NOT EXISTS admin_schema;
COMMENT ON SCHEMA admin_schema IS '运营管理后台模块 - 包含admin_logs表';
-- 9. SSA Schema - 智能统计分析
CREATE SCHEMA IF NOT EXISTS ssa_schema;
COMMENT ON SCHEMA ssa_schema IS '智能统计分析模块';
-- 10. ST Schema - 统计分析工具
CREATE SCHEMA IF NOT EXISTS st_schema;
COMMENT ON SCHEMA st_schema IS '统计分析工具集模块';
-- ========================================
-- 验证查询所有Schema
-- ========================================
DO $$
DECLARE
schema_count INTEGER;
BEGIN
SELECT COUNT(*) INTO schema_count
FROM information_schema.schemata
WHERE schema_name IN (
'platform_schema', 'aia_schema', 'pkb_schema',
'asl_schema', 'common_schema', 'dc_schema',
'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema'
);
RAISE NOTICE '已创建 % 个Schema', schema_count;
IF schema_count < 10 THEN
RAISE WARNING '警告期望创建10个Schema实际只创建了 % 个', schema_count;
ELSE
RAISE NOTICE '✅ 成功10个Schema全部创建完成';
END IF;
END $$;
COMMIT;
-- ========================================
-- 执行后验证SQL可单独运行
-- ========================================
-- 查看所有新建的Schema
SELECT
nspname AS schema_name,
pg_catalog.obj_description(oid, 'pg_namespace') AS description
FROM pg_namespace
WHERE nspname IN (
'platform_schema', 'aia_schema', 'pkb_schema',
'asl_schema', 'common_schema', 'dc_schema',
'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema'
)
ORDER BY nspname;
-- ========================================
-- 预期输出:
-- ========================================
-- schema_name | description
-- -----------------|----------------------------------
-- admin_schema | 运营管理后台模块 - 包含admin_logs表
-- aia_schema | AI智能问答模块 - 对话管理...
-- asl_schema | AI智能文献筛选模块...
-- common_schema | 通用能力层...
-- dc_schema | 数据清洗工具模块
-- pkb_schema | 个人知识库模块...
-- platform_schema | 平台基础层...
-- rvw_schema | 稿件审查系统模块...
-- ssa_schema | 智能统计分析模块
-- st_schema | 统计分析工具集模块
-- ========================================

View File

@@ -0,0 +1,146 @@
-- ========================================
-- 002-migrate-platform.sql
-- ========================================
-- 目的迁移platform_schema用户表
-- 迁移表1个users
-- 预计时间15分钟
-- 作者AI助手
-- 日期2025-11-09
-- ========================================
-- 前置条件:
-- 1. 已执行 001-create-all-10-schemas.sql
-- 2. public.users 表存在且有数据
BEGIN;
-- ========================================
-- 第一步创建platform_schema.users表
-- ========================================
CREATE TABLE IF NOT EXISTS platform_schema.users (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
email VARCHAR(255) UNIQUE NOT NULL,
password VARCHAR(255) NOT NULL,
name VARCHAR(255),
avatar_url VARCHAR(500),
role VARCHAR(50) NOT NULL DEFAULT 'user',
status VARCHAR(50) DEFAULT 'active',
kb_quota INT DEFAULT 3,
kb_used INT DEFAULT 0,
trial_ends_at TIMESTAMP,
is_trial BOOLEAN DEFAULT true,
last_login_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- ========================================
-- 第二步:创建索引
-- ========================================
CREATE INDEX IF NOT EXISTS idx_platform_users_email ON platform_schema.users(email);
CREATE INDEX IF NOT EXISTS idx_platform_users_role ON platform_schema.users(role);
CREATE INDEX IF NOT EXISTS idx_platform_users_status ON platform_schema.users(status);
CREATE INDEX IF NOT EXISTS idx_platform_users_created_at ON platform_schema.users(created_at);
-- ========================================
-- 第三步:迁移数据
-- ========================================
-- 从public.users迁移数据到platform_schema.users
INSERT INTO platform_schema.users (
id, email, password, name, avatar_url,
role, status, kb_quota, kb_used,
trial_ends_at, is_trial, last_login_at,
created_at, updated_at
)
SELECT
id, email, password, name, avatar_url,
role, status, kb_quota, kb_used,
trial_ends_at, is_trial, last_login_at,
created_at, updated_at
FROM public.users
ON CONFLICT (id) DO NOTHING; -- 如果已存在则跳过(支持重复执行)
-- ========================================
-- 第四步:数据验证
-- ========================================
DO $$
DECLARE
public_count INTEGER;
platform_count INTEGER;
BEGIN
-- 统计原表数据量
SELECT COUNT(*) INTO public_count FROM public.users;
-- 统计新表数据量
SELECT COUNT(*) INTO platform_count FROM platform_schema.users;
RAISE NOTICE '原表 public.users 数据量:%', public_count;
RAISE NOTICE '新表 platform_schema.users 数据量:%', platform_count;
-- 验证数据一致性
IF public_count = platform_count THEN
RAISE NOTICE '✅ 数据迁移成功:数据量完全一致';
ELSE
RAISE WARNING '⚠️ 警告:数据量不一致!预期 %,实际 %', public_count, platform_count;
END IF;
-- 验证email唯一性
IF (SELECT COUNT(DISTINCT email) FROM platform_schema.users) = platform_count THEN
RAISE NOTICE '✅ Email唯一性校验通过';
ELSE
RAISE WARNING '⚠️ 警告Email存在重复';
END IF;
END $$;
-- ========================================
-- 第五步:对比验证(抽样检查)
-- ========================================
-- 对比前5条数据
SELECT 'public.users' AS source, id, email, name, role, created_at
FROM public.users
ORDER BY created_at DESC
LIMIT 5;
SELECT 'platform_schema.users' AS source, id, email, name, role, created_at
FROM platform_schema.users
ORDER BY created_at DESC
LIMIT 5;
COMMIT;
-- ========================================
-- 执行结果统计(可单独运行)
-- ========================================
SELECT
'platform_schema.users' AS table_name,
COUNT(*) AS total_count,
COUNT(DISTINCT email) AS unique_emails,
COUNT(CASE WHEN role = 'admin' THEN 1 END) AS admin_count,
COUNT(CASE WHEN role = 'user' THEN 1 END) AS user_count,
COUNT(CASE WHEN status = 'active' THEN 1 END) AS active_count,
MIN(created_at) AS first_user_date,
MAX(created_at) AS last_user_date
FROM platform_schema.users;
-- ========================================
-- 后续步骤说明
-- ========================================
-- 注意public.users表暂时保留不删除
-- 原因:
-- 1. 其他Schema的表aia, pkb会引用platform_schema.users
-- 2. 所有迁移完成并验证后再决定是否删除public.users
-- 3. 删除前需确保所有外键已更新
-- ========================================

View File

@@ -0,0 +1,339 @@
-- ========================================
-- 003-migrate-aia.sql
-- ========================================
-- 目的迁移aia_schemaAI智能问答模块
-- 迁移表5个projects, conversations, messages, general_conversations, general_messages
-- 预计时间30分钟
-- 作者AI助手
-- 日期2025-11-09
-- ========================================
-- 前置条件:
-- 1. 已执行 001-create-all-10-schemas.sql
-- 2. 已执行 002-migrate-platform.sql因为需要引用platform_schema.users
-- 3. public schema中的相关表存在且有数据
BEGIN;
-- ========================================
-- 第一步创建aia_schema.projects表
-- ========================================
CREATE TABLE IF NOT EXISTS aia_schema.projects (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
name VARCHAR(255) NOT NULL,
background TEXT DEFAULT '',
research_type VARCHAR(50) DEFAULT 'observational',
conversation_count INT DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
deleted_at TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_aia_projects_user_id ON aia_schema.projects(user_id);
CREATE INDEX IF NOT EXISTS idx_aia_projects_created_at ON aia_schema.projects(created_at);
CREATE INDEX IF NOT EXISTS idx_aia_projects_deleted_at ON aia_schema.projects(deleted_at);
-- ========================================
-- 第二步创建aia_schema.conversations表
-- ========================================
CREATE TABLE IF NOT EXISTS aia_schema.conversations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
project_id UUID,
agent_id VARCHAR(100) NOT NULL,
title VARCHAR(255) NOT NULL,
model_name VARCHAR(50) DEFAULT 'deepseek-v3',
message_count INT DEFAULT 0,
total_tokens INT DEFAULT 0,
metadata JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
deleted_at TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE,
FOREIGN KEY (project_id) REFERENCES aia_schema.projects(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_aia_conversations_user_id ON aia_schema.conversations(user_id);
CREATE INDEX IF NOT EXISTS idx_aia_conversations_project_id ON aia_schema.conversations(project_id);
CREATE INDEX IF NOT EXISTS idx_aia_conversations_agent_id ON aia_schema.conversations(agent_id);
CREATE INDEX IF NOT EXISTS idx_aia_conversations_created_at ON aia_schema.conversations(created_at);
CREATE INDEX IF NOT EXISTS idx_aia_conversations_deleted_at ON aia_schema.conversations(deleted_at);
-- ========================================
-- 第三步创建aia_schema.messages表
-- ========================================
CREATE TABLE IF NOT EXISTS aia_schema.messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
conversation_id UUID NOT NULL,
role VARCHAR(20) NOT NULL,
content TEXT NOT NULL,
model VARCHAR(50),
metadata JSONB,
tokens INT,
is_pinned BOOLEAN DEFAULT false,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES aia_schema.conversations(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_aia_messages_conversation_id ON aia_schema.messages(conversation_id);
CREATE INDEX IF NOT EXISTS idx_aia_messages_created_at ON aia_schema.messages(created_at);
CREATE INDEX IF NOT EXISTS idx_aia_messages_is_pinned ON aia_schema.messages(is_pinned);
-- ========================================
-- 第四步创建aia_schema.general_conversations表
-- ========================================
CREATE TABLE IF NOT EXISTS aia_schema.general_conversations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
title VARCHAR(255) NOT NULL,
model_name VARCHAR(50),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
deleted_at TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_aia_general_conversations_user_id ON aia_schema.general_conversations(user_id);
CREATE INDEX IF NOT EXISTS idx_aia_general_conversations_created_at ON aia_schema.general_conversations(created_at);
CREATE INDEX IF NOT EXISTS idx_aia_general_conversations_updated_at ON aia_schema.general_conversations(updated_at);
-- ========================================
-- 第五步创建aia_schema.general_messages表
-- ========================================
CREATE TABLE IF NOT EXISTS aia_schema.general_messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
conversation_id UUID NOT NULL,
role VARCHAR(20) NOT NULL,
content TEXT NOT NULL,
model VARCHAR(50),
metadata JSONB,
tokens INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (conversation_id) REFERENCES aia_schema.general_conversations(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_aia_general_messages_conversation_id ON aia_schema.general_messages(conversation_id);
CREATE INDEX IF NOT EXISTS idx_aia_general_messages_created_at ON aia_schema.general_messages(created_at);
-- ========================================
-- 第六步:迁移数据
-- ========================================
-- 6.1 迁移projects
INSERT INTO aia_schema.projects (
id, user_id, name, background, research_type,
conversation_count, created_at, updated_at, deleted_at
)
SELECT
id, user_id, name, background, research_type,
conversation_count, created_at, updated_at, deleted_at
FROM public.projects
ON CONFLICT (id) DO NOTHING;
-- 6.2 迁移conversations
INSERT INTO aia_schema.conversations (
id, user_id, project_id, agent_id, title,
model_name, message_count, total_tokens, metadata,
created_at, updated_at, deleted_at
)
SELECT
id, user_id, project_id, agent_id, title,
model_name, message_count, total_tokens, metadata,
created_at, updated_at, deleted_at
FROM public.conversations
ON CONFLICT (id) DO NOTHING;
-- 6.3 迁移messages
INSERT INTO aia_schema.messages (
id, conversation_id, role, content,
model, metadata, tokens, is_pinned, created_at
)
SELECT
id, conversation_id, role, content,
model, metadata, tokens, is_pinned, created_at
FROM public.messages
ON CONFLICT (id) DO NOTHING;
-- 6.4 迁移general_conversations
INSERT INTO aia_schema.general_conversations (
id, user_id, title, model_name,
created_at, updated_at, deleted_at
)
SELECT
id, user_id, title, model_name,
created_at, updated_at, deleted_at
FROM public.general_conversations
ON CONFLICT (id) DO NOTHING;
-- 6.5 迁移general_messages
INSERT INTO aia_schema.general_messages (
id, conversation_id, role, content,
model, metadata, tokens, created_at
)
SELECT
id, conversation_id, role, content,
model, metadata, tokens, created_at
FROM public.general_messages
ON CONFLICT (id) DO NOTHING;
-- ========================================
-- 第七步:数据验证
-- ========================================
DO $$
DECLARE
public_projects INTEGER;
public_conversations INTEGER;
public_messages INTEGER;
public_general_conversations INTEGER;
public_general_messages INTEGER;
aia_projects INTEGER;
aia_conversations INTEGER;
aia_messages INTEGER;
aia_general_conversations INTEGER;
aia_general_messages INTEGER;
all_match BOOLEAN := true;
BEGIN
-- 统计原表
SELECT COUNT(*) INTO public_projects FROM public.projects;
SELECT COUNT(*) INTO public_conversations FROM public.conversations;
SELECT COUNT(*) INTO public_messages FROM public.messages;
SELECT COUNT(*) INTO public_general_conversations FROM public.general_conversations;
SELECT COUNT(*) INTO public_general_messages FROM public.general_messages;
-- 统计新表
SELECT COUNT(*) INTO aia_projects FROM aia_schema.projects;
SELECT COUNT(*) INTO aia_conversations FROM aia_schema.conversations;
SELECT COUNT(*) INTO aia_messages FROM aia_schema.messages;
SELECT COUNT(*) INTO aia_general_conversations FROM aia_schema.general_conversations;
SELECT COUNT(*) INTO aia_general_messages FROM aia_schema.general_messages;
-- 输出统计
RAISE NOTICE '==================== 数据迁移统计 ====================';
RAISE NOTICE 'projects: public.% -> aia_schema.%', public_projects, aia_projects;
RAISE NOTICE 'conversations: public.% -> aia_schema.%', public_conversations, aia_conversations;
RAISE NOTICE 'messages: public.% -> aia_schema.%', public_messages, aia_messages;
RAISE NOTICE 'general_conversations: public.% -> aia_schema.%', public_general_conversations, aia_general_conversations;
RAISE NOTICE 'general_messages: public.% -> aia_schema.%', public_general_messages, aia_general_messages;
RAISE NOTICE '=====================================================';
-- 验证每个表
IF public_projects != aia_projects THEN
RAISE WARNING '⚠️ projects 数据量不一致';
all_match := false;
END IF;
IF public_conversations != aia_conversations THEN
RAISE WARNING '⚠️ conversations 数据量不一致';
all_match := false;
END IF;
IF public_messages != aia_messages THEN
RAISE WARNING '⚠️ messages 数据量不一致';
all_match := false;
END IF;
IF public_general_conversations != aia_general_conversations THEN
RAISE WARNING '⚠️ general_conversations 数据量不一致';
all_match := false;
END IF;
IF public_general_messages != aia_general_messages THEN
RAISE WARNING '⚠️ general_messages 数据量不一致';
all_match := false;
END IF;
IF all_match THEN
RAISE NOTICE '✅ AIA Schema 所有表数据迁移成功!';
ELSE
RAISE WARNING '⚠️ 部分表数据迁移存在问题,请检查';
END IF;
END $$;
-- ========================================
-- 第八步:外键完整性验证
-- ========================================
-- 验证所有project的user_id都存在于platform_schema.users中
DO $$
DECLARE
invalid_users INTEGER;
BEGIN
SELECT COUNT(*) INTO invalid_users
FROM aia_schema.projects p
LEFT JOIN platform_schema.users u ON p.user_id = u.id
WHERE u.id IS NULL;
IF invalid_users > 0 THEN
RAISE WARNING '⚠️ projects表中有 % 条记录的user_id无效', invalid_users;
ELSE
RAISE NOTICE '✅ projects表外键完整性验证通过';
END IF;
END $$;
COMMIT;
-- ========================================
-- 执行结果统计(可单独运行)
-- ========================================
SELECT
'aia_schema' AS schema_name,
'projects' AS table_name,
COUNT(*) AS row_count,
COUNT(DISTINCT user_id) AS unique_users,
MIN(created_at) AS earliest_record,
MAX(created_at) AS latest_record
FROM aia_schema.projects
UNION ALL
SELECT
'aia_schema',
'conversations',
COUNT(*),
COUNT(DISTINCT user_id),
MIN(created_at),
MAX(created_at)
FROM aia_schema.conversations
UNION ALL
SELECT
'aia_schema',
'messages',
COUNT(*),
NULL,
MIN(created_at),
MAX(created_at)
FROM aia_schema.messages;
-- ========================================
-- 完成提示
-- ========================================
-- ✅ AIA Schema 迁移完成
-- 包含5个表projects, conversations, messages,
-- general_conversations, general_messages
-- 下一步:执行 004-migrate-pkb.sql
-- ========================================

View File

@@ -0,0 +1,412 @@
-- ========================================
-- 004-migrate-pkb.sql
-- ========================================
-- 目的迁移pkb_schema个人知识库模块
-- 迁移表5个knowledge_bases, documents, batch_tasks, batch_results, task_templates
-- 预计时间30分钟
-- 作者AI助手
-- 日期2025-11-09
-- ========================================
-- 前置条件:
-- 1. 已执行 001-create-all-10-schemas.sql
-- 2. 已执行 002-migrate-platform.sql因为需要引用platform_schema.users
-- 3. public schema中的相关表存在且有数据
BEGIN;
-- ========================================
-- 第一步创建pkb_schema.knowledge_bases表
-- ========================================
CREATE TABLE IF NOT EXISTS pkb_schema.knowledge_bases (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
name VARCHAR(255) NOT NULL,
description TEXT,
dify_dataset_id VARCHAR(255) NOT NULL,
file_count INT DEFAULT 0,
total_size_bytes BIGINT DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_pkb_knowledge_bases_user_id ON pkb_schema.knowledge_bases(user_id);
CREATE INDEX IF NOT EXISTS idx_pkb_knowledge_bases_dify_dataset_id ON pkb_schema.knowledge_bases(dify_dataset_id);
-- ========================================
-- 第二步创建pkb_schema.documents表包含Phase 2字段
-- ========================================
CREATE TABLE IF NOT EXISTS pkb_schema.documents (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
kb_id UUID NOT NULL,
user_id UUID NOT NULL,
filename VARCHAR(255) NOT NULL,
file_type VARCHAR(50) NOT NULL,
file_size_bytes BIGINT NOT NULL,
file_url TEXT NOT NULL,
dify_document_id VARCHAR(255) NOT NULL,
status VARCHAR(50) DEFAULT 'uploading',
progress INT DEFAULT 0,
error_message TEXT,
segments_count INT,
tokens_count INT,
-- Phase 2全文阅读模式字段
extraction_method VARCHAR(50),
extraction_quality FLOAT,
char_count INT,
language VARCHAR(20),
extracted_text TEXT,
uploaded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
processed_at TIMESTAMP,
FOREIGN KEY (kb_id) REFERENCES pkb_schema.knowledge_bases(id) ON DELETE CASCADE,
FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_pkb_documents_kb_id ON pkb_schema.documents(kb_id);
CREATE INDEX IF NOT EXISTS idx_pkb_documents_user_id ON pkb_schema.documents(user_id);
CREATE INDEX IF NOT EXISTS idx_pkb_documents_status ON pkb_schema.documents(status);
CREATE INDEX IF NOT EXISTS idx_pkb_documents_dify_document_id ON pkb_schema.documents(dify_document_id);
CREATE INDEX IF NOT EXISTS idx_pkb_documents_extraction_method ON pkb_schema.documents(extraction_method);
-- ========================================
-- 第三步创建pkb_schema.batch_tasks表
-- ========================================
CREATE TABLE IF NOT EXISTS pkb_schema.batch_tasks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
kb_id UUID NOT NULL,
name VARCHAR(255) NOT NULL,
template_type VARCHAR(50) NOT NULL,
template_id VARCHAR(100),
prompt TEXT NOT NULL,
status VARCHAR(50) NOT NULL,
total_documents INT NOT NULL,
completed_count INT DEFAULT 0,
failed_count INT DEFAULT 0,
model_type VARCHAR(50) NOT NULL,
concurrency INT DEFAULT 3,
started_at TIMESTAMP,
completed_at TIMESTAMP,
duration_seconds INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE,
FOREIGN KEY (kb_id) REFERENCES pkb_schema.knowledge_bases(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_pkb_batch_tasks_user_id ON pkb_schema.batch_tasks(user_id);
CREATE INDEX IF NOT EXISTS idx_pkb_batch_tasks_kb_id ON pkb_schema.batch_tasks(kb_id);
CREATE INDEX IF NOT EXISTS idx_pkb_batch_tasks_status ON pkb_schema.batch_tasks(status);
CREATE INDEX IF NOT EXISTS idx_pkb_batch_tasks_created_at ON pkb_schema.batch_tasks(created_at);
-- ========================================
-- 第四步创建pkb_schema.batch_results表
-- ========================================
CREATE TABLE IF NOT EXISTS pkb_schema.batch_results (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
task_id UUID NOT NULL,
document_id UUID NOT NULL,
status VARCHAR(50) NOT NULL,
data JSONB,
raw_output TEXT,
error_message TEXT,
processing_time_ms INT,
tokens_used INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (task_id) REFERENCES pkb_schema.batch_tasks(id) ON DELETE CASCADE,
FOREIGN KEY (document_id) REFERENCES pkb_schema.documents(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_pkb_batch_results_task_id ON pkb_schema.batch_results(task_id);
CREATE INDEX IF NOT EXISTS idx_pkb_batch_results_document_id ON pkb_schema.batch_results(document_id);
CREATE INDEX IF NOT EXISTS idx_pkb_batch_results_status ON pkb_schema.batch_results(status);
-- ========================================
-- 第五步创建pkb_schema.task_templates表
-- ========================================
CREATE TABLE IF NOT EXISTS pkb_schema.task_templates (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
name VARCHAR(255) NOT NULL,
description TEXT,
prompt TEXT NOT NULL,
output_fields JSONB NOT NULL,
is_public BOOLEAN DEFAULT false,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (user_id) REFERENCES platform_schema.users(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_pkb_task_templates_user_id ON pkb_schema.task_templates(user_id);
-- ========================================
-- 第六步:迁移数据
-- ========================================
-- 6.1 迁移knowledge_bases
INSERT INTO pkb_schema.knowledge_bases (
id, user_id, name, description, dify_dataset_id,
file_count, total_size_bytes, created_at, updated_at
)
SELECT
id, user_id, name, description, dify_dataset_id,
file_count, total_size_bytes, created_at, updated_at
FROM public.knowledge_bases
ON CONFLICT (id) DO NOTHING;
-- 6.2 迁移documents包含Phase 2字段
INSERT INTO pkb_schema.documents (
id, kb_id, user_id, filename, file_type,
file_size_bytes, file_url, dify_document_id,
status, progress, error_message, segments_count, tokens_count,
extraction_method, extraction_quality, char_count,
language, extracted_text, uploaded_at, processed_at
)
SELECT
id, kb_id, user_id, filename, file_type,
file_size_bytes, file_url, dify_document_id,
status, progress, error_message, segments_count, tokens_count,
extraction_method, extraction_quality, char_count,
language, extracted_text, uploaded_at, processed_at
FROM public.documents
ON CONFLICT (id) DO NOTHING;
-- 6.3 迁移batch_tasks
INSERT INTO pkb_schema.batch_tasks (
id, user_id, kb_id, name, template_type, template_id,
prompt, status, total_documents, completed_count, failed_count,
model_type, concurrency, started_at, completed_at, duration_seconds,
created_at, updated_at
)
SELECT
id, user_id, kb_id, name, template_type, template_id,
prompt, status, total_documents, completed_count, failed_count,
model_type, concurrency, started_at, completed_at, duration_seconds,
created_at, updated_at
FROM public.batch_tasks
ON CONFLICT (id) DO NOTHING;
-- 6.4 迁移batch_results
INSERT INTO pkb_schema.batch_results (
id, task_id, document_id, status, data, raw_output,
error_message, processing_time_ms, tokens_used, created_at
)
SELECT
id, task_id, document_id, status, data, raw_output,
error_message, processing_time_ms, tokens_used, created_at
FROM public.batch_results
ON CONFLICT (id) DO NOTHING;
-- 6.5 迁移task_templates
INSERT INTO pkb_schema.task_templates (
id, user_id, name, description, prompt,
output_fields, is_public, created_at, updated_at
)
SELECT
id, user_id, name, description, prompt,
output_fields, is_public, created_at, updated_at
FROM public.task_templates
ON CONFLICT (id) DO NOTHING;
-- ========================================
-- 第七步:数据验证
-- ========================================
DO $$
DECLARE
public_knowledge_bases INTEGER;
public_documents INTEGER;
public_batch_tasks INTEGER;
public_batch_results INTEGER;
public_task_templates INTEGER;
pkb_knowledge_bases INTEGER;
pkb_documents INTEGER;
pkb_batch_tasks INTEGER;
pkb_batch_results INTEGER;
pkb_task_templates INTEGER;
all_match BOOLEAN := true;
BEGIN
-- 统计原表
SELECT COUNT(*) INTO public_knowledge_bases FROM public.knowledge_bases;
SELECT COUNT(*) INTO public_documents FROM public.documents;
SELECT COUNT(*) INTO public_batch_tasks FROM public.batch_tasks;
SELECT COUNT(*) INTO public_batch_results FROM public.batch_results;
SELECT COUNT(*) INTO public_task_templates FROM public.task_templates;
-- 统计新表
SELECT COUNT(*) INTO pkb_knowledge_bases FROM pkb_schema.knowledge_bases;
SELECT COUNT(*) INTO pkb_documents FROM pkb_schema.documents;
SELECT COUNT(*) INTO pkb_batch_tasks FROM pkb_schema.batch_tasks;
SELECT COUNT(*) INTO pkb_batch_results FROM pkb_schema.batch_results;
SELECT COUNT(*) INTO pkb_task_templates FROM pkb_schema.task_templates;
-- 输出统计
RAISE NOTICE '==================== 数据迁移统计 ====================';
RAISE NOTICE 'knowledge_bases: public.% -> pkb_schema.%', public_knowledge_bases, pkb_knowledge_bases;
RAISE NOTICE 'documents: public.% -> pkb_schema.%', public_documents, pkb_documents;
RAISE NOTICE 'batch_tasks: public.% -> pkb_schema.%', public_batch_tasks, pkb_batch_tasks;
RAISE NOTICE 'batch_results: public.% -> pkb_schema.%', public_batch_results, pkb_batch_results;
RAISE NOTICE 'task_templates: public.% -> pkb_schema.%', public_task_templates, pkb_task_templates;
RAISE NOTICE '=====================================================';
-- 验证每个表
IF public_knowledge_bases != pkb_knowledge_bases THEN
RAISE WARNING '⚠️ knowledge_bases 数据量不一致';
all_match := false;
END IF;
IF public_documents != pkb_documents THEN
RAISE WARNING '⚠️ documents 数据量不一致';
all_match := false;
END IF;
IF public_batch_tasks != pkb_batch_tasks THEN
RAISE WARNING '⚠️ batch_tasks 数据量不一致';
all_match := false;
END IF;
IF public_batch_results != pkb_batch_results THEN
RAISE WARNING '⚠️ batch_results 数据量不一致';
all_match := false;
END IF;
IF public_task_templates != pkb_task_templates THEN
RAISE WARNING '⚠️ task_templates 数据量不一致';
all_match := false;
END IF;
IF all_match THEN
RAISE NOTICE '✅ PKB Schema 所有表数据迁移成功!';
ELSE
RAISE WARNING '⚠️ 部分表数据迁移存在问题,请检查';
END IF;
END $$;
-- ========================================
-- 第八步:外键完整性验证
-- ========================================
-- 验证knowledge_bases的user_id
DO $$
DECLARE
invalid_kb_users INTEGER;
invalid_doc_users INTEGER;
invalid_doc_kb INTEGER;
BEGIN
-- 验证knowledge_bases.user_id
SELECT COUNT(*) INTO invalid_kb_users
FROM pkb_schema.knowledge_bases kb
LEFT JOIN platform_schema.users u ON kb.user_id = u.id
WHERE u.id IS NULL;
IF invalid_kb_users > 0 THEN
RAISE WARNING '⚠️ knowledge_bases表中有 % 条记录的user_id无效', invalid_kb_users;
ELSE
RAISE NOTICE '✅ knowledge_bases表user_id外键完整性验证通过';
END IF;
-- 验证documents.user_id
SELECT COUNT(*) INTO invalid_doc_users
FROM pkb_schema.documents d
LEFT JOIN platform_schema.users u ON d.user_id = u.id
WHERE u.id IS NULL;
IF invalid_doc_users > 0 THEN
RAISE WARNING '⚠️ documents表中有 % 条记录的user_id无效', invalid_doc_users;
ELSE
RAISE NOTICE '✅ documents表user_id外键完整性验证通过';
END IF;
-- 验证documents.kb_id
SELECT COUNT(*) INTO invalid_doc_kb
FROM pkb_schema.documents d
LEFT JOIN pkb_schema.knowledge_bases kb ON d.kb_id = kb.id
WHERE kb.id IS NULL;
IF invalid_doc_kb > 0 THEN
RAISE WARNING '⚠️ documents表中有 % 条记录的kb_id无效', invalid_doc_kb;
ELSE
RAISE NOTICE '✅ documents表kb_id外键完整性验证通过';
END IF;
END $$;
-- ========================================
-- 第九步Phase 2字段统计
-- ========================================
SELECT
'全文阅读字段统计' AS category,
COUNT(*) AS total_documents,
COUNT(extracted_text) AS has_extracted_text,
COUNT(extraction_method) AS has_extraction_method,
COUNT(CASE WHEN extraction_method = 'pymupdf' THEN 1 END) AS pymupdf_count,
COUNT(CASE WHEN extraction_method = 'nougat' THEN 1 END) AS nougat_count,
AVG(extraction_quality) AS avg_quality,
AVG(char_count) AS avg_char_count
FROM pkb_schema.documents;
COMMIT;
-- ========================================
-- 执行结果统计(可单独运行)
-- ========================================
SELECT
'pkb_schema' AS schema_name,
'knowledge_bases' AS table_name,
COUNT(*) AS row_count,
COUNT(DISTINCT user_id) AS unique_users,
SUM(file_count) AS total_files,
pg_size_pretty(SUM(total_size_bytes)) AS total_size
FROM pkb_schema.knowledge_bases
UNION ALL
SELECT
'pkb_schema',
'documents',
COUNT(*),
COUNT(DISTINCT user_id),
NULL,
pg_size_pretty(SUM(file_size_bytes))
FROM pkb_schema.documents
UNION ALL
SELECT
'pkb_schema',
'batch_tasks',
COUNT(*),
COUNT(DISTINCT user_id),
NULL,
NULL
FROM pkb_schema.batch_tasks;
-- ========================================
-- 完成提示
-- ========================================
-- ✅ PKB Schema 迁移完成
-- 包含5个表knowledge_bases, documents, batch_tasks,
-- batch_results, task_templates
-- 下一步:执行 005-validate-all.sql 进行全局验证
-- ========================================

View File

@@ -0,0 +1,544 @@
-- ========================================
-- 005-validate-all.sql
-- ========================================
-- 目的全局验证10个Schema和数据迁移的完整性
-- 预计时间10分钟
-- 作者AI助手
-- 日期2025-11-09
-- ========================================
-- 前置条件:
-- 1. 已执行 001-create-all-10-schemas.sql
-- 2. 已执行 002-migrate-platform.sql
-- 3. 已执行 003-migrate-aia.sql
-- 4. 已执行 004-migrate-pkb.sql
-- ========================================
-- 第一部分Schema完整性验证
-- ========================================
\echo '========================================';
\echo '第一部分Schema完整性验证';
\echo '========================================';
SELECT
'Schema检查' AS check_type,
nspname AS schema_name,
CASE
WHEN nspname IN ('platform_schema', 'aia_schema', 'pkb_schema',
'asl_schema', 'common_schema', 'dc_schema',
'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema')
THEN '✅ 存在'
ELSE '❌ 不存在'
END AS status,
pg_catalog.obj_description(oid, 'pg_namespace') AS description
FROM pg_namespace
WHERE nspname IN (
'platform_schema', 'aia_schema', 'pkb_schema',
'asl_schema', 'common_schema', 'dc_schema',
'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema'
)
ORDER BY nspname;
-- 统计Schema数量
DO $$
DECLARE
schema_count INTEGER;
BEGIN
SELECT COUNT(*) INTO schema_count
FROM pg_namespace
WHERE nspname IN (
'platform_schema', 'aia_schema', 'pkb_schema',
'asl_schema', 'common_schema', 'dc_schema',
'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema'
);
RAISE NOTICE '========================================';
IF schema_count = 10 THEN
RAISE NOTICE '✅ Schema完整性检查通过10个Schema全部创建成功';
ELSE
RAISE WARNING '❌ Schema完整性检查失败预期10个实际%个', schema_count;
END IF;
RAISE NOTICE '========================================';
END $$;
-- ========================================
-- 第二部分:表结构验证
-- ========================================
\echo '';
\echo '========================================';
\echo '第二部分:表结构验证';
\echo '========================================';
SELECT
'platform_schema' AS schema_name,
tablename AS table_name,
'' AS status
FROM pg_tables
WHERE schemaname = 'platform_schema'
UNION ALL
SELECT
'aia_schema',
tablename,
''
FROM pg_tables
WHERE schemaname = 'aia_schema'
UNION ALL
SELECT
'pkb_schema',
tablename,
''
FROM pg_tables
WHERE schemaname = 'pkb_schema'
ORDER BY schema_name, table_name;
-- 统计各Schema的表数量
DO $$
DECLARE
platform_tables INTEGER;
aia_tables INTEGER;
pkb_tables INTEGER;
all_ok BOOLEAN := true;
BEGIN
SELECT COUNT(*) INTO platform_tables FROM pg_tables WHERE schemaname = 'platform_schema';
SELECT COUNT(*) INTO aia_tables FROM pg_tables WHERE schemaname = 'aia_schema';
SELECT COUNT(*) INTO pkb_tables FROM pg_tables WHERE schemaname = 'pkb_schema';
RAISE NOTICE '========================================';
RAISE NOTICE 'platform_schema: % 个表 (预期1个)', platform_tables;
RAISE NOTICE 'aia_schema: % 个表 (预期5个)', aia_tables;
RAISE NOTICE 'pkb_schema: % 个表 (预期5个)', pkb_tables;
RAISE NOTICE '========================================';
IF platform_tables != 1 THEN
RAISE WARNING '❌ platform_schema表数量异常';
all_ok := false;
END IF;
IF aia_tables != 5 THEN
RAISE WARNING '❌ aia_schema表数量异常';
all_ok := false;
END IF;
IF pkb_tables != 5 THEN
RAISE WARNING '❌ pkb_schema表数量异常';
all_ok := false;
END IF;
IF all_ok THEN
RAISE NOTICE '✅ 表结构验证通过:所有表创建成功';
END IF;
RAISE NOTICE '========================================';
END $$;
-- ========================================
-- 第三部分:数据量对比验证
-- ========================================
\echo '';
\echo '========================================';
\echo '第三部分:数据量对比验证';
\echo '========================================';
-- 3.1 Platform Schema
SELECT
'users' AS table_name,
(SELECT COUNT(*) FROM public.users) AS public_count,
(SELECT COUNT(*) FROM platform_schema.users) AS migrated_count,
CASE
WHEN (SELECT COUNT(*) FROM public.users) = (SELECT COUNT(*) FROM platform_schema.users)
THEN '✅ 一致'
ELSE '❌ 不一致'
END AS status
WHERE EXISTS (SELECT 1 FROM public.users LIMIT 1);
-- 3.2 AIA Schema
SELECT
'projects' AS table_name,
(SELECT COUNT(*) FROM public.projects) AS public_count,
(SELECT COUNT(*) FROM aia_schema.projects) AS migrated_count,
CASE
WHEN (SELECT COUNT(*) FROM public.projects) = (SELECT COUNT(*) FROM aia_schema.projects)
THEN '✅ 一致'
ELSE '❌ 不一致'
END AS status
UNION ALL
SELECT
'conversations',
(SELECT COUNT(*) FROM public.conversations),
(SELECT COUNT(*) FROM aia_schema.conversations),
CASE
WHEN (SELECT COUNT(*) FROM public.conversations) = (SELECT COUNT(*) FROM aia_schema.conversations)
THEN '✅ 一致'
ELSE '❌ 不一致'
END
UNION ALL
SELECT
'messages',
(SELECT COUNT(*) FROM public.messages),
(SELECT COUNT(*) FROM aia_schema.messages),
CASE
WHEN (SELECT COUNT(*) FROM public.messages) = (SELECT COUNT(*) FROM aia_schema.messages)
THEN '✅ 一致'
ELSE '❌ 不一致'
END
UNION ALL
SELECT
'general_conversations',
(SELECT COUNT(*) FROM public.general_conversations),
(SELECT COUNT(*) FROM aia_schema.general_conversations),
CASE
WHEN (SELECT COUNT(*) FROM public.general_conversations) = (SELECT COUNT(*) FROM aia_schema.general_conversations)
THEN '✅ 一致'
ELSE '❌ 不一致'
END
UNION ALL
SELECT
'general_messages',
(SELECT COUNT(*) FROM public.general_messages),
(SELECT COUNT(*) FROM aia_schema.general_messages),
CASE
WHEN (SELECT COUNT(*) FROM public.general_messages) = (SELECT COUNT(*) FROM aia_schema.general_messages)
THEN '✅ 一致'
ELSE '❌ 不一致'
END;
-- 3.3 PKB Schema
SELECT
'knowledge_bases' AS table_name,
(SELECT COUNT(*) FROM public.knowledge_bases) AS public_count,
(SELECT COUNT(*) FROM pkb_schema.knowledge_bases) AS migrated_count,
CASE
WHEN (SELECT COUNT(*) FROM public.knowledge_bases) = (SELECT COUNT(*) FROM pkb_schema.knowledge_bases)
THEN '✅ 一致'
ELSE '❌ 不一致'
END AS status
UNION ALL
SELECT
'documents',
(SELECT COUNT(*) FROM public.documents),
(SELECT COUNT(*) FROM pkb_schema.documents),
CASE
WHEN (SELECT COUNT(*) FROM public.documents) = (SELECT COUNT(*) FROM pkb_schema.documents)
THEN '✅ 一致'
ELSE '❌ 不一致'
END
UNION ALL
SELECT
'batch_tasks',
(SELECT COUNT(*) FROM public.batch_tasks),
(SELECT COUNT(*) FROM pkb_schema.batch_tasks),
CASE
WHEN (SELECT COUNT(*) FROM public.batch_tasks) = (SELECT COUNT(*) FROM pkb_schema.batch_tasks)
THEN '✅ 一致'
ELSE '❌ 不一致'
END
UNION ALL
SELECT
'batch_results',
(SELECT COUNT(*) FROM public.batch_results),
(SELECT COUNT(*) FROM pkb_schema.batch_results),
CASE
WHEN (SELECT COUNT(*) FROM public.batch_results) = (SELECT COUNT(*) FROM pkb_schema.batch_results)
THEN '✅ 一致'
ELSE '❌ 不一致'
END
UNION ALL
SELECT
'task_templates',
(SELECT COUNT(*) FROM public.task_templates),
(SELECT COUNT(*) FROM pkb_schema.task_templates),
CASE
WHEN (SELECT COUNT(*) FROM public.task_templates) = (SELECT COUNT(*) FROM pkb_schema.task_templates)
THEN '✅ 一致'
ELSE '❌ 不一致'
END;
-- ========================================
-- 第四部分:外键约束验证
-- ========================================
\echo '';
\echo '========================================';
\echo '第四部分:外键约束验证';
\echo '========================================';
SELECT
tc.table_schema AS schema_name,
tc.table_name,
tc.constraint_name,
kcu.column_name,
ccu.table_schema AS foreign_schema,
ccu.table_name AS foreign_table,
ccu.column_name AS foreign_column,
'' AS status
FROM information_schema.table_constraints tc
JOIN information_schema.key_column_usage kcu
ON tc.constraint_name = kcu.constraint_name
AND tc.table_schema = kcu.table_schema
JOIN information_schema.constraint_column_usage ccu
ON ccu.constraint_name = tc.constraint_name
AND ccu.table_schema = tc.table_schema
WHERE tc.constraint_type = 'FOREIGN KEY'
AND tc.table_schema IN ('platform_schema', 'aia_schema', 'pkb_schema')
ORDER BY tc.table_schema, tc.table_name, tc.constraint_name;
-- 统计外键数量
DO $$
DECLARE
fk_count INTEGER;
BEGIN
SELECT COUNT(*) INTO fk_count
FROM information_schema.table_constraints
WHERE constraint_type = 'FOREIGN KEY'
AND table_schema IN ('platform_schema', 'aia_schema', 'pkb_schema');
RAISE NOTICE '========================================';
RAISE NOTICE '✅ 外键约束总数:%', fk_count;
RAISE NOTICE '========================================';
END $$;
-- ========================================
-- 第五部分跨Schema引用验证
-- ========================================
\echo '';
\echo '========================================';
\echo '第五部分跨Schema引用验证';
\echo '========================================';
-- 验证所有引用platform_schema.users的外键是否有效
DO $$
DECLARE
invalid_aia_projects INTEGER;
invalid_aia_conversations INTEGER;
invalid_aia_general_conversations INTEGER;
invalid_pkb_knowledge_bases INTEGER;
invalid_pkb_documents INTEGER;
all_valid BOOLEAN := true;
BEGIN
-- 验证aia_schema.projects
SELECT COUNT(*) INTO invalid_aia_projects
FROM aia_schema.projects p
LEFT JOIN platform_schema.users u ON p.user_id = u.id
WHERE u.id IS NULL;
-- 验证aia_schema.conversations
SELECT COUNT(*) INTO invalid_aia_conversations
FROM aia_schema.conversations c
LEFT JOIN platform_schema.users u ON c.user_id = u.id
WHERE u.id IS NULL;
-- 验证aia_schema.general_conversations
SELECT COUNT(*) INTO invalid_aia_general_conversations
FROM aia_schema.general_conversations gc
LEFT JOIN platform_schema.users u ON gc.user_id = u.id
WHERE u.id IS NULL;
-- 验证pkb_schema.knowledge_bases
SELECT COUNT(*) INTO invalid_pkb_knowledge_bases
FROM pkb_schema.knowledge_bases kb
LEFT JOIN platform_schema.users u ON kb.user_id = u.id
WHERE u.id IS NULL;
-- 验证pkb_schema.documents
SELECT COUNT(*) INTO invalid_pkb_documents
FROM pkb_schema.documents d
LEFT JOIN platform_schema.users u ON d.user_id = u.id
WHERE u.id IS NULL;
RAISE NOTICE '========================================';
RAISE NOTICE '跨Schema引用验证结果';
RAISE NOTICE '----------------------------------------';
IF invalid_aia_projects > 0 THEN
RAISE WARNING '❌ aia_schema.projects有%条无效user_id', invalid_aia_projects;
all_valid := false;
ELSE
RAISE NOTICE '✅ aia_schema.projects外键全部有效';
END IF;
IF invalid_aia_conversations > 0 THEN
RAISE WARNING '❌ aia_schema.conversations有%条无效user_id', invalid_aia_conversations;
all_valid := false;
ELSE
RAISE NOTICE '✅ aia_schema.conversations外键全部有效';
END IF;
IF invalid_aia_general_conversations > 0 THEN
RAISE WARNING '❌ aia_schema.general_conversations有%条无效user_id', invalid_aia_general_conversations;
all_valid := false;
ELSE
RAISE NOTICE '✅ aia_schema.general_conversations外键全部有效';
END IF;
IF invalid_pkb_knowledge_bases > 0 THEN
RAISE WARNING '❌ pkb_schema.knowledge_bases有%条无效user_id', invalid_pkb_knowledge_bases;
all_valid := false;
ELSE
RAISE NOTICE '✅ pkb_schema.knowledge_bases外键全部有效';
END IF;
IF invalid_pkb_documents > 0 THEN
RAISE WARNING '❌ pkb_schema.documents有%条无效user_id', invalid_pkb_documents;
all_valid := false;
ELSE
RAISE NOTICE '✅ pkb_schema.documents外键全部有效';
END IF;
RAISE NOTICE '========================================';
IF all_valid THEN
RAISE NOTICE '✅ 所有跨Schema引用验证通过';
ELSE
RAISE WARNING '❌ 存在无效的跨Schema引用';
END IF;
RAISE NOTICE '========================================';
END $$;
-- ========================================
-- 第六部分:索引验证
-- ========================================
\echo '';
\echo '========================================';
\echo '第六部分:索引验证';
\echo '========================================';
SELECT
schemaname AS schema_name,
tablename AS table_name,
indexname AS index_name,
'' AS status
FROM pg_indexes
WHERE schemaname IN ('platform_schema', 'aia_schema', 'pkb_schema')
ORDER BY schemaname, tablename, indexname;
-- 统计索引数量
DO $$
DECLARE
index_count INTEGER;
BEGIN
SELECT COUNT(*) INTO index_count
FROM pg_indexes
WHERE schemaname IN ('platform_schema', 'aia_schema', 'pkb_schema')
AND indexname NOT LIKE '%pkey'; -- 排除主键索引
RAISE NOTICE '========================================';
RAISE NOTICE '✅ 非主键索引总数:%', index_count;
RAISE NOTICE '========================================';
END $$;
-- ========================================
-- 第七部分:数据采样验证(抽样检查)
-- ========================================
\echo '';
\echo '========================================';
\echo '第七部分:数据采样验证';
\echo '========================================';
-- 采样验证检查ID是否完全一致
DO $$
DECLARE
user_ids_match BOOLEAN;
project_ids_match BOOLEAN;
BEGIN
-- 验证users的ID
SELECT NOT EXISTS (
SELECT id FROM public.users
EXCEPT
SELECT id FROM platform_schema.users
) AND NOT EXISTS (
SELECT id FROM platform_schema.users
EXCEPT
SELECT id FROM public.users
) INTO user_ids_match;
-- 验证projects的ID
SELECT NOT EXISTS (
SELECT id FROM public.projects
EXCEPT
SELECT id FROM aia_schema.projects
) AND NOT EXISTS (
SELECT id FROM aia_schema.projects
EXCEPT
SELECT id FROM public.projects
) INTO project_ids_match;
RAISE NOTICE '========================================';
IF user_ids_match THEN
RAISE NOTICE '✅ users表ID完全一致';
ELSE
RAISE WARNING '❌ users表ID存在差异';
END IF;
IF project_ids_match THEN
RAISE NOTICE '✅ projects表ID完全一致';
ELSE
RAISE WARNING '❌ projects表ID存在差异';
END IF;
RAISE NOTICE '========================================';
END $$;
-- ========================================
-- 第八部分:最终总结报告
-- ========================================
\echo '';
\echo '========================================';
\echo '最终总结报告';
\echo '========================================';
SELECT
'✅ Schema隔离迁移验证完成' AS summary,
(SELECT COUNT(*) FROM platform_schema.users) || ' users' AS platform,
(SELECT COUNT(*) FROM aia_schema.projects) || ' projects, ' ||
(SELECT COUNT(*) FROM aia_schema.conversations) || ' conversations' AS aia,
(SELECT COUNT(*) FROM pkb_schema.knowledge_bases) || ' knowledge_bases, ' ||
(SELECT COUNT(*) FROM pkb_schema.documents) || ' documents' AS pkb;
\echo '';
\echo '========================================';
\echo '✅ 所有验证完成!';
\echo '========================================';
\echo '下一步:';
\echo '1. 更新Prisma配置schema.prisma';
\echo '2. 生成Prisma Client';
\echo '3. 更新代码以使用新Schema';
\echo '4. 运行应用测试';
\echo '========================================';
-- ========================================
-- 完成
-- ========================================

View File

@@ -0,0 +1,325 @@
# Schema迁移脚本使用指南
> **版本:** V1.0
> **创建日期:** 2025-11-09
> **迁移目标:** 从public schema迁移到10个隔离Schema
---
## 📋 脚本清单
| # | 脚本名称 | 说明 | 预计时间 | 前置依赖 |
|---|---------|------|---------|---------|
| 1 | `001-create-all-10-schemas.sql` | 创建10个Schema3详细+7空 | 5秒 | 无 |
| 2 | `002-migrate-platform.sql` | 迁移platform_schema1个表users | 15分钟 | 001 |
| 3 | `003-migrate-aia.sql` | 迁移aia_schema5个表对话相关 | 30分钟 | 001, 002 |
| 4 | `004-migrate-pkb.sql` | 迁移pkb_schema5个表知识库相关 | 30分钟 | 001, 002 |
| 5 | `005-validate-all.sql` | 全局验证和数据完整性检查 | 10分钟 | 001-004 |
**总计:** 约1.5小时
---
## 🚀 执行步骤
### 前置准备
1. **备份数据库(强烈建议)**
```bash
pg_dump -U postgres -d your_database > backup_$(date +%Y%m%d_%H%M%S).sql
```
2. **确认数据库连接**
```bash
# 确保DATABASE_URL环境变量正确
echo $DATABASE_URL
# 或查看 .env 文件
```
3. **确认当前表结构**
```sql
SELECT tablename FROM pg_tables WHERE schemaname = 'public';
```
---
### 执行迁移
#### 方法1使用psql命令推荐
```bash
# 进入脚本目录
cd AIclinicalresearch/docs/09-架构实施/migration-scripts
# 依次执行脚本
psql $DATABASE_URL -f 001-create-all-10-schemas.sql
psql $DATABASE_URL -f 002-migrate-platform.sql
psql $DATABASE_URL -f 003-migrate-aia.sql
psql $DATABASE_URL -f 004-migrate-pkb.sql
psql $DATABASE_URL -f 005-validate-all.sql
```
#### 方法2一次性执行所有脚本
```bash
# 创建执行脚本
cat 001-create-all-10-schemas.sql \
002-migrate-platform.sql \
003-migrate-aia.sql \
004-migrate-pkb.sql \
005-validate-all.sql \
| psql $DATABASE_URL
```
#### 方法3使用数据库客户端如DBeaver、pgAdmin
1. 打开数据库客户端
2. 连接到目标数据库
3. 依次打开并执行每个SQL文件
---
## ✅ 验证清单
### 执行001后
- [ ] 10个Schema全部创建成功
- [ ] 每个Schema都有注释说明
```sql
-- 验证SQL
SELECT nspname, pg_catalog.obj_description(oid, 'pg_namespace')
FROM pg_namespace
WHERE nspname LIKE '%_schema'
ORDER BY nspname;
```
### 执行002后
- [ ] platform_schema.users表创建成功
- [ ] 数据从public.users完整迁移
- [ ] 4个索引创建成功
```sql
-- 验证SQL
SELECT COUNT(*) AS public_count FROM public.users;
SELECT COUNT(*) AS platform_count FROM platform_schema.users;
```
### 执行003后
- [ ] aia_schema的5个表创建成功
- [ ] 数据完整迁移
- [ ] 外键约束正确建立
```sql
-- 验证SQL
SELECT COUNT(*) FROM aia_schema.projects;
SELECT COUNT(*) FROM aia_schema.conversations;
```
### 执行004后
- [ ] pkb_schema的5个表创建成功
- [ ] 包含Phase 2全文阅读字段
- [ ] 数据完整迁移
```sql
-- 验证SQL
SELECT COUNT(*) FROM pkb_schema.knowledge_bases;
SELECT COUNT(*) FROM pkb_schema.documents;
```
### 执行005后
- [ ] 所有数据量对比一致
- [ ] 跨Schema外键引用有效
- [ ] 无数据丢失
---
## 📊 迁移后数据分布
### Platform Schema
```
platform_schema
└── users (1表)
```
### AIA Schema
```
aia_schema
├── projects
├── conversations
├── messages
├── general_conversations
└── general_messages (5表)
```
### PKB Schema
```
pkb_schema
├── knowledge_bases
├── documents
├── batch_tasks
├── batch_results
└── task_templates (5表)
```
### 空Schema7个
```
asl_schema (AI智能文献 - Week 3设计)
common_schema (通用能力层)
dc_schema (数据清洗)
rvw_schema (审稿系统)
admin_schema (运营管理)
ssa_schema (智能统计分析)
st_schema (统计分析工具)
```
---
## ⚠️ 注意事项
### 1. 事务保护
所有迁移脚本都使用了事务BEGIN/COMMIT
- 成功:全部提交
- 失败:自动回滚,无部分迁移
### 2. 幂等性
所有脚本支持重复执行:
- 使用 `IF NOT EXISTS` 创建对象
- 使用 `ON CONFLICT DO NOTHING` 插入数据
### 3. public schema保留
迁移后**不会删除** public schema中的原表
- 原因:方便回滚和对比验证
- 清理:待所有验证通过后,再决定是否删除
### 4. 外键约束
支持跨Schema外键
- 所有业务表引用 `platform_schema.users(id)`
- PostgreSQL原生支持无需特殊配置
---
## 🔧 故障排查
### 问题1连接被拒绝
**错误:** `connection refused`
**解决:**
```bash
# 检查PostgreSQL服务
sudo systemctl status postgresql
# 启动服务
sudo systemctl start postgresql
```
### 问题2权限不足
**错误:** `permission denied to create schema`
**解决:**
```sql
-- 授予权限
GRANT CREATE ON DATABASE your_database TO your_user;
```
### 问题3外键约束失败
**错误:** `violates foreign key constraint`
**解决:**
- 确保先执行 002platform再执行 003/004aia/pkb
- 检查是否有孤立的user_id
### 问题4数据量不一致
**错误:** 验证脚本报告数据量不一致
**解决:**
1. 检查是否有迁移过程中新增的数据
2. 使用ID对比检查具体差异
```sql
-- 找出差异的ID
SELECT id FROM public.users
EXCEPT
SELECT id FROM platform_schema.users;
```
---
## 📝 回滚方案
### 快速回滚(推荐)
```sql
-- 删除所有新建的Schema会级联删除所有表和数据
DROP SCHEMA IF EXISTS platform_schema CASCADE;
DROP SCHEMA IF EXISTS aia_schema CASCADE;
DROP SCHEMA IF EXISTS pkb_schema CASCADE;
DROP SCHEMA IF EXISTS asl_schema CASCADE;
DROP SCHEMA IF EXISTS common_schema CASCADE;
DROP SCHEMA IF EXISTS dc_schema CASCADE;
DROP SCHEMA IF EXISTS rvw_schema CASCADE;
DROP SCHEMA IF EXISTS admin_schema CASCADE;
DROP SCHEMA IF EXISTS ssa_schema CASCADE;
DROP SCHEMA IF EXISTS st_schema CASCADE;
```
### 从备份恢复
```bash
# 恢复备份
psql $DATABASE_URL < backup_20251109_100000.sql
```
---
## 📂 后续步骤
迁移完成后,需要:
1. **更新Prisma配置** → 见任务9
- 更新 `backend/prisma/schema.prisma`
- 添加 `multiSchema` 预览特性
- 为3个Schema定义模型
2. **生成Prisma Client**
```bash
cd backend
npx prisma generate
```
3. **更新代码** → 见任务12
- 所有数据库查询使用新Schema
- 测试现有功能
4. **运行测试** → 见任务8
- 测试AI智能问答
- 测试知识库功能
---
## 📞 获取帮助
如果遇到问题:
1. **查看日志** - psql会输出详细的执行信息和错误
2. **检查文档** - 参考 `09-架构实施/01-Schema隔离架构设计10个.md`
3. **验证数据** - 运行 `005-validate-all.sql`
---
**创建人:** AI助手
**最后更新:** 2025-11-09
**版本:** V1.0
**核心理念:可重复执行 + 事务保护 + 完整验证 = 安全迁移** ⭐⭐⭐

View File

@@ -0,0 +1,268 @@
# ========================================
# execute-migration.ps1
# Schema迁移执行脚本Windows PowerShell
# ========================================
# 功能:
# 1. 备份当前数据库
# 2. 依次执行5个迁移脚本
# 3. 验证迁移结果
#
# 使用方法:
# .\execute-migration.ps1
# ========================================
# 设置错误时停止
$ErrorActionPreference = "Stop"
# 数据库连接信息
$DB_HOST = "localhost"
$DB_PORT = "5432"
$DB_NAME = "ai_clinical_research"
$DB_USER = "postgres"
$DB_PASS = "postgres"
# 设置PostgreSQL密码环境变量
$env:PGPASSWORD = $DB_PASS
# 脚本目录
$SCRIPT_DIR = Split-Path -Parent $MyInvocation.MyCommand.Path
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Schema迁移执行脚本 - V1.0" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host ""
# ========================================
# 第一步检查PostgreSQL连接
# ========================================
Write-Host "[1/6] 检查PostgreSQL连接..." -ForegroundColor Yellow
try {
$testConnection = psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -c "SELECT version();" 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Host "✅ PostgreSQL连接成功" -ForegroundColor Green
} else {
throw "PostgreSQL连接失败"
}
} catch {
Write-Host "❌ 无法连接到PostgreSQL数据库" -ForegroundColor Red
Write-Host "错误信息: $_" -ForegroundColor Red
Write-Host ""
Write-Host "请检查:" -ForegroundColor Yellow
Write-Host "1. PostgreSQL服务是否正在运行" -ForegroundColor Yellow
Write-Host "2. 数据库连接信息是否正确" -ForegroundColor Yellow
Write-Host "3. psql命令是否在PATH中" -ForegroundColor Yellow
exit 1
}
Write-Host ""
# ========================================
# 第二步:备份当前数据库
# ========================================
Write-Host "[2/6] 备份当前数据库..." -ForegroundColor Yellow
$timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$backupFile = Join-Path $SCRIPT_DIR "backup_before_migration_$timestamp.sql"
Write-Host "备份文件: $backupFile" -ForegroundColor Gray
try {
$output = pg_dump -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -f $backupFile 2>&1
if (Test-Path $backupFile) {
$fileSize = (Get-Item $backupFile).Length / 1KB
Write-Host "✅ 数据库备份成功 (大小: $([math]::Round($fileSize, 2)) KB)" -ForegroundColor Green
} else {
throw "备份文件未生成"
}
} catch {
Write-Host "❌ 数据库备份失败" -ForegroundColor Red
Write-Host "错误信息: $_" -ForegroundColor Red
exit 1
}
Write-Host ""
# ========================================
# 第三步:确认执行迁移
# ========================================
Write-Host "[3/6] 迁移确认" -ForegroundColor Yellow
Write-Host ""
Write-Host "即将执行以下操作:" -ForegroundColor White
Write-Host " 1. 创建10个Schema" -ForegroundColor Gray
Write-Host " 2. 迁移platform_schema1个表" -ForegroundColor Gray
Write-Host " 3. 迁移aia_schema5个表" -ForegroundColor Gray
Write-Host " 4. 迁移pkb_schema5个表" -ForegroundColor Gray
Write-Host " 5. 全局验证" -ForegroundColor Gray
Write-Host ""
Write-Host "⚠️ 这将修改数据库结构!" -ForegroundColor Red
Write-Host "✅ 数据库已备份到: $backupFile" -ForegroundColor Green
Write-Host ""
$confirmation = Read-Host "确认执行迁移?(输入 YES 继续,其他键取消)"
if ($confirmation -ne "YES") {
Write-Host "❌ 迁移已取消" -ForegroundColor Yellow
exit 0
}
Write-Host ""
# ========================================
# 第四步:执行迁移脚本
# ========================================
Write-Host "[4/6] 执行迁移脚本..." -ForegroundColor Yellow
Write-Host ""
# 迁移脚本列表
$migrationScripts = @(
@{Name="001-create-all-10-schemas.sql"; Description="创建10个Schema"},
@{Name="002-migrate-platform.sql"; Description="迁移platform_schema"},
@{Name="003-migrate-aia.sql"; Description="迁移aia_schema"},
@{Name="004-migrate-pkb.sql"; Description="迁移pkb_schema"},
@{Name="005-validate-all.sql"; Description="全局验证"}
)
$successCount = 0
$failedScripts = @()
foreach ($script in $migrationScripts) {
$scriptPath = Join-Path $SCRIPT_DIR $script.Name
Write-Host " 执行: $($script.Name) - $($script.Description)..." -ForegroundColor Cyan
if (-not (Test-Path $scriptPath)) {
Write-Host " ❌ 脚本文件不存在: $scriptPath" -ForegroundColor Red
$failedScripts += $script.Name
continue
}
try {
# 执行SQL脚本
$output = psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -f $scriptPath 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Host " ✅ 成功" -ForegroundColor Green
$successCount++
} else {
throw "脚本执行返回错误代码: $LASTEXITCODE"
}
} catch {
Write-Host " ❌ 失败" -ForegroundColor Red
Write-Host " 错误: $_" -ForegroundColor Red
$failedScripts += $script.Name
}
Write-Host ""
}
# ========================================
# 第五步:迁移结果总结
# ========================================
Write-Host "[5/6] 迁移结果总结" -ForegroundColor Yellow
Write-Host ""
Write-Host "总计: $($migrationScripts.Count) 个脚本" -ForegroundColor White
Write-Host "成功: $successCount" -ForegroundColor Green
Write-Host "失败: $($failedScripts.Count)" -ForegroundColor $(if ($failedScripts.Count -gt 0) {"Red"} else {"Green"})
if ($failedScripts.Count -gt 0) {
Write-Host ""
Write-Host "失败的脚本:" -ForegroundColor Red
foreach ($failed in $failedScripts) {
Write-Host " - $failed" -ForegroundColor Red
}
Write-Host ""
Write-Host "⚠️ 迁移未完全成功!" -ForegroundColor Red
Write-Host "建议:检查错误日志,修复问题后重新执行" -ForegroundColor Yellow
Write-Host "回滚:可使用备份文件恢复 -> $backupFile" -ForegroundColor Yellow
} else {
Write-Host ""
Write-Host "✅ 所有迁移脚本执行成功!" -ForegroundColor Green
}
Write-Host ""
# ========================================
# 第六步:验证迁移结果
# ========================================
if ($successCount -eq $migrationScripts.Count) {
Write-Host "[6/6] 验证迁移结果..." -ForegroundColor Yellow
Write-Host ""
# 验证Schema数量
Write-Host "验证Schema创建..." -ForegroundColor Cyan
$schemaQuery = @"
SELECT COUNT(*) FROM information_schema.schemata
WHERE schema_name IN (
'platform_schema', 'aia_schema', 'pkb_schema',
'asl_schema', 'common_schema', 'dc_schema',
'rvw_schema', 'admin_schema', 'ssa_schema', 'st_schema'
);
"@
try {
$schemaCount = psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -t -c $schemaQuery 2>&1
$schemaCount = $schemaCount.Trim()
if ($schemaCount -eq "10") {
Write-Host "✅ 10个Schema全部创建成功" -ForegroundColor Green
} else {
Write-Host "⚠️ Schema数量异常: 预期10个实际${schemaCount}" -ForegroundColor Yellow
}
} catch {
Write-Host "⚠️ 无法验证Schema数量" -ForegroundColor Yellow
}
Write-Host ""
# 验证数据迁移
Write-Host "验证数据迁移..." -ForegroundColor Cyan
$tables = @(
@{Schema="platform_schema"; Table="users"},
@{Schema="aia_schema"; Table="projects"},
@{Schema="aia_schema"; Table="conversations"},
@{Schema="pkb_schema"; Table="knowledge_bases"},
@{Schema="pkb_schema"; Table="documents"}
)
foreach ($tbl in $tables) {
try {
$countQuery = "SELECT COUNT(*) FROM $($tbl.Schema).$($tbl.Table);"
$count = psql -h $DB_HOST -p $DB_PORT -U $DB_USER -d $DB_NAME -t -c $countQuery 2>&1
$count = $count.Trim()
Write-Host " $($tbl.Schema).$($tbl.Table): $count 条记录" -ForegroundColor Gray
} catch {
Write-Host " ⚠️ 无法查询 $($tbl.Schema).$($tbl.Table)" -ForegroundColor Yellow
}
}
Write-Host ""
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "✅ Schema迁移执行完成" -ForegroundColor Green
Write-Host "========================================" -ForegroundColor Cyan
Write-Host ""
Write-Host "下一步操作:" -ForegroundColor Yellow
Write-Host "1. 更新Prisma配置 (backend/prisma/schema.prisma)" -ForegroundColor Gray
Write-Host "2. 生成Prisma Client (npx prisma generate)" -ForegroundColor Gray
Write-Host "3. 更新代码以使用新Schema" -ForegroundColor Gray
Write-Host "4. 测试现有功能" -ForegroundColor Gray
Write-Host ""
Write-Host "备份文件保存在: $backupFile" -ForegroundColor Cyan
Write-Host ""
} else {
Write-Host "[6/6] 跳过验证(因为迁移未完全成功)" -ForegroundColor Yellow
Write-Host ""
}
# 清理环境变量
Remove-Item Env:\PGPASSWORD
Write-Host "脚本执行完成!" -ForegroundColor Green