Files
AIclinicalresearch/backend/restore_pgboss_functions.sql
HaHafeng 40c2f8e148 feat(rag): Complete RAG engine implementation with pgvector
Major Features:
- Created ekb_schema (13th schema) with 3 tables: KB/Document/Chunk
- Implemented EmbeddingService (text-embedding-v4, 1024-dim vectors)
- Implemented ChunkService (smart Markdown chunking)
- Implemented VectorSearchService (multi-query + hybrid search)
- Implemented RerankService (qwen3-rerank)
- Integrated DeepSeek V3 QueryRewriter for cross-language search
- Python service: Added pymupdf4llm for PDF-to-Markdown conversion
- PKB: Dual-mode adapter (pgvector/dify/hybrid)

Architecture:
- Brain-Hand Model: Business layer (DeepSeek) + Engine layer (pgvector)
- Cross-language support: Chinese query matches English documents
- Small Embedding (1024) + Strong Reranker strategy

Performance:
- End-to-end latency: 2.5s
- Cost per query: 0.0025 RMB
- Accuracy improvement: +20.5% (cross-language)

Tests:
- test-embedding-service.ts: Vector embedding verified
- test-rag-e2e.ts: Full pipeline tested
- test-rerank.ts: Rerank quality validated
- test-query-rewrite.ts: Cross-language search verified
- test-pdf-ingest.ts: Real PDF document tested (Dongen 2003.pdf)

Documentation:
- Added 05-RAG-Engine-User-Guide.md
- Added 02-Document-Processing-User-Guide.md
- Updated system status documentation

Status: Production ready
2026-01-21 20:24:29 +08:00

118 lines
4.5 KiB
PL/PgSQL

-- 恢复 pg-boss 需要的函数
-- 从备份文件 rds_init_20251224_154529.sql 提取
-- 1. create_queue 函数
CREATE OR REPLACE FUNCTION platform_schema.create_queue(queue_name text, options jsonb) RETURNS void
LANGUAGE plpgsql
AS $_$
DECLARE
tablename varchar := CASE WHEN options->>'partition' = 'true'
THEN 'j' || encode(sha224(queue_name::bytea), 'hex')
ELSE 'job_common'
END;
queue_created_on timestamptz;
BEGIN
WITH q as (
INSERT INTO platform_schema.queue (
name,
policy,
retry_limit,
retry_delay,
retry_backoff,
retry_delay_max,
expire_seconds,
retention_seconds,
deletion_seconds,
warning_queued,
dead_letter,
partition,
table_name
)
VALUES (
queue_name,
options->>'policy',
COALESCE((options->>'retryLimit')::int, 2),
COALESCE((options->>'retryDelay')::int, 0),
COALESCE((options->>'retryBackoff')::bool, false),
(options->>'retryDelayMax')::int,
COALESCE((options->>'expireInSeconds')::int, 900),
COALESCE((options->>'retentionSeconds')::int, 1209600),
COALESCE((options->>'deleteAfterSeconds')::int, 604800),
COALESCE((options->>'warningQueueSize')::int, 0),
options->>'deadLetter',
COALESCE((options->>'partition')::bool, false),
tablename
)
ON CONFLICT DO NOTHING
RETURNING created_on
)
SELECT created_on into queue_created_on from q;
IF queue_created_on IS NULL OR options->>'partition' IS DISTINCT FROM 'true' THEN
RETURN;
END IF;
EXECUTE format('CREATE TABLE platform_schema.%I (LIKE platform_schema.job INCLUDING DEFAULTS)', tablename);
EXECUTE format('ALTER TABLE platform_schema.%1$I ADD PRIMARY KEY (name, id)', tablename);
EXECUTE format('ALTER TABLE platform_schema.%1$I ADD CONSTRAINT q_fkey FOREIGN KEY (name) REFERENCES platform_schema.queue (name) ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED', tablename);
EXECUTE format('ALTER TABLE platform_schema.%1$I ADD CONSTRAINT dlq_fkey FOREIGN KEY (dead_letter) REFERENCES platform_schema.queue (name) ON DELETE RESTRICT DEFERRABLE INITIALLY DEFERRED', tablename);
EXECUTE format('CREATE INDEX %1$s_i5 ON platform_schema.%1$I (name, start_after) INCLUDE (priority, created_on, id) WHERE state < ''active''', tablename);
EXECUTE format('CREATE UNIQUE INDEX %1$s_i4 ON platform_schema.%1$I (name, singleton_on, COALESCE(singleton_key, '''')) WHERE state <> ''cancelled'' AND singleton_on IS NOT NULL', tablename);
IF options->>'policy' = 'short' THEN
EXECUTE format('CREATE UNIQUE INDEX %1$s_i1 ON platform_schema.%1$I (name, COALESCE(singleton_key, '''')) WHERE state = ''created'' AND policy = ''short''', tablename);
ELSIF options->>'policy' = 'singleton' THEN
EXECUTE format('CREATE UNIQUE INDEX %1$s_i2 ON platform_schema.%1$I (name, COALESCE(singleton_key, '''')) WHERE state = ''active'' AND policy = ''singleton''', tablename);
ELSIF options->>'policy' = 'stately' THEN
EXECUTE format('CREATE UNIQUE INDEX %1$s_i3 ON platform_schema.%1$I (name, state, COALESCE(singleton_key, '''')) WHERE state <= ''active'' AND policy = ''stately''', tablename);
ELSIF options->>'policy' = 'exclusive' THEN
EXECUTE format('CREATE UNIQUE INDEX %1$s_i6 ON platform_schema.%1$I (name, COALESCE(singleton_key, '''')) WHERE state <= ''active'' AND policy = ''exclusive''', tablename);
END IF;
EXECUTE format('ALTER TABLE platform_schema.%I ADD CONSTRAINT cjc CHECK (name=%L)', tablename, queue_name);
EXECUTE format('ALTER TABLE platform_schema.job ATTACH PARTITION platform_schema.%I FOR VALUES IN (%L)', tablename, queue_name);
END;
$_$;
-- 2. delete_queue 函数
CREATE OR REPLACE FUNCTION platform_schema.delete_queue(queue_name text) RETURNS void
LANGUAGE plpgsql
AS $$
DECLARE
v_table varchar;
v_partition bool;
BEGIN
SELECT table_name, partition
FROM platform_schema.queue
WHERE name = queue_name
INTO v_table, v_partition;
IF v_partition THEN
EXECUTE format('DROP TABLE IF EXISTS platform_schema.%I', v_table);
ELSE
EXECUTE format('DELETE FROM platform_schema.%I WHERE name = %L', v_table, queue_name);
END IF;
DELETE FROM platform_schema.queue WHERE name = queue_name;
END;
$$;