feat(iit): harden QC pipeline consistency and release artifacts
Implement IIT quality workflow hardening across eQuery deduplication, guard metadata validation, timeline/readability improvements, and chat evidence fallbacks, then synchronize release and development documentation for deployment handoff. Includes migration/scripts for open eQuery dedupe guards, orchestration/status semantics, report/tool readability fixes, and updated module status plus deployment checklist. Made-with: Cursor
This commit is contained in:
126
backend/scripts/iit_chat_regression.py
Normal file
126
backend/scripts/iit_chat_regression.py
Normal file
@@ -0,0 +1,126 @@
|
||||
import json
|
||||
import sys
|
||||
from typing import List, Dict
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
BASE = "http://localhost:3001"
|
||||
PROJECT_ID = "1d80f270-6a02-4b58-9db3-6af176e91f3c"
|
||||
USER_ID = "diag-user-p1-regression"
|
||||
|
||||
|
||||
def login() -> str:
|
||||
resp = requests.post(
|
||||
f"{BASE}/api/v1/auth/login/password",
|
||||
json={"phone": "13800000001", "password": "123456"},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["data"]["tokens"]["accessToken"]
|
||||
|
||||
|
||||
def ask(question: str) -> Dict:
|
||||
resp = requests.post(
|
||||
f"{BASE}/api/v1/iit/chat",
|
||||
json={"message": question, "projectId": PROJECT_ID, "userId": USER_ID},
|
||||
timeout=80,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
|
||||
def must_contain(text: str, keywords: List[str]) -> bool:
|
||||
return all(k in text for k in keywords)
|
||||
|
||||
def must_not_contain(text: str, keywords: List[str]) -> bool:
|
||||
return all(k not in text for k in keywords)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
# Ensure backend is alive and creds are valid before running chat checks.
|
||||
_ = login()
|
||||
|
||||
cases = [
|
||||
{
|
||||
"name": "知情统计",
|
||||
"q": "目前已经有几个患者签署知情了?",
|
||||
"must": ["结论", "证据", "签署", "12"],
|
||||
},
|
||||
{
|
||||
"name": "纳排合规",
|
||||
"q": "3号患者的纳入排除标准都符合要求吗?",
|
||||
"must": ["结论", "证据", "3号", "规则"],
|
||||
},
|
||||
{
|
||||
"name": "项目总览",
|
||||
"q": "最新质控报告怎么样?",
|
||||
"must": ["结论", "通过率", "严重问题"],
|
||||
},
|
||||
{
|
||||
"name": "患者明细",
|
||||
"q": "查询一下患者ID为2的患者数据",
|
||||
"must": ["结论", "证据", "2"],
|
||||
},
|
||||
{
|
||||
"name": "访视进度",
|
||||
"q": "4号患者到第几次访视了",
|
||||
"must": ["结论", "证据", "4号", "访视"],
|
||||
},
|
||||
{
|
||||
"name": "eQuery状态",
|
||||
"q": "目前eQuery总体状态如何?",
|
||||
"must": ["结论", "待处理", "证据"],
|
||||
},
|
||||
{
|
||||
"name": "通过率口径",
|
||||
"q": "现在通过率是多少,怎么算出来的?",
|
||||
"must": ["结论", "证据", "通过率", "计算方法"],
|
||||
},
|
||||
{
|
||||
"name": "D6风险",
|
||||
"q": "现在方案偏离风险大吗?",
|
||||
"must": ["结论", "D6"],
|
||||
"must_not": ["156条严重问题", "284条警告问题"],
|
||||
},
|
||||
{
|
||||
"name": "D1维度风险",
|
||||
"q": "D1数据一致性风险现在怎么样?",
|
||||
"must": ["结论", "D1", "证据"],
|
||||
"must_not": ["D6问题总数"],
|
||||
},
|
||||
{
|
||||
"name": "D2维度风险",
|
||||
"q": "D2数据完整性现在风险大吗?",
|
||||
"must": ["结论", "D2", "证据"],
|
||||
"must_not": ["D1问题总数", "D6问题总数"],
|
||||
},
|
||||
]
|
||||
|
||||
failed = 0
|
||||
print("IIT Chat Regression Start\n")
|
||||
for idx, c in enumerate(cases, 1):
|
||||
try:
|
||||
out = ask(c["q"])
|
||||
reply = out.get("reply", "")
|
||||
ok = must_contain(reply, c["must"])
|
||||
if ok and c.get("must_not"):
|
||||
ok = must_not_contain(reply, c["must_not"])
|
||||
status = "PASS" if ok else "FAIL"
|
||||
print(f"[{idx}] {status} {c['name']}")
|
||||
print(f"Q: {c['q']}")
|
||||
print(f"A: {reply[:220].replace(chr(10), ' | ')}")
|
||||
print("")
|
||||
if not ok:
|
||||
failed += 1
|
||||
except Exception as exc:
|
||||
failed += 1
|
||||
print(f"[{idx}] FAIL {c['name']}: {exc}\n")
|
||||
|
||||
print(f"Done. total={len(cases)} failed={failed}")
|
||||
return 1 if failed else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user