AIclinicalresearch/backend/scripts/iit_chat_regression.py

import json
import sys
from typing import List, Dict

import requests


BASE = "http://localhost:3001"
PROJECT_ID = "1d80f270-6a02-4b58-9db3-6af176e91f3c"
USER_ID = "diag-user-p1-regression"


def login() -> str:
    resp = requests.post(
        f"{BASE}/api/v1/auth/login/password",
        json={"phone": "13800000001", "password": "123456"},
        timeout=10,
    )
    resp.raise_for_status()
    return resp.json()["data"]["tokens"]["accessToken"]


def ask(question: str) -> Dict:
    resp = requests.post(
        f"{BASE}/api/v1/iit/chat",
        json={"message": question, "projectId": PROJECT_ID, "userId": USER_ID},
        timeout=80,
    )
    resp.raise_for_status()
    return resp.json()


def must_contain(text: str, keywords: List[str]) -> bool:
    return all(k in text for k in keywords)

def must_not_contain(text: str, keywords: List[str]) -> bool:
    return all(k not in text for k in keywords)


def main() -> int:
    # Ensure backend is alive and creds are valid before running chat checks.
    _ = login()

    cases = [
        {
            "name": "知情统计",
            "q": "目前已经有几个患者签署知情了？",
            "must": ["结论", "证据", "签署", "12"],
        },
        {
            "name": "纳排合规",
            "q": "3号患者的纳入排除标准都符合要求吗？",
            "must": ["结论", "证据", "3号", "规则"],
        },
        {
            "name": "项目总览",
            "q": "最新质控报告怎么样？",
            "must": ["结论", "通过率", "严重问题"],
        },
        {
            "name": "患者明细",
            "q": "查询一下患者ID为2的患者数据",
            "must": ["结论", "证据", "2"],
        },
        {
            "name": "访视进度",
            "q": "4号患者到第几次访视了",
            "must": ["结论", "证据", "4号", "访视"],
        },
        {
            "name": "eQuery状态",
            "q": "目前eQuery总体状态如何？",
            "must": ["结论", "待处理", "证据"],
        },
        {
            "name": "通过率口径",
            "q": "现在通过率是多少，怎么算出来的？",
            "must": ["结论", "证据", "通过率", "计算方法"],
        },
        {
            "name": "D6风险",
            "q": "现在方案偏离风险大吗？",
            "must": ["结论", "D6"],
            "must_not": ["156条严重问题", "284条警告问题"],
        },
        {
            "name": "D1维度风险",
            "q": "D1数据一致性风险现在怎么样？",
            "must": ["结论", "D1", "证据"],
            "must_not": ["D6问题总数"],
        },
        {
            "name": "D2维度风险",
            "q": "D2数据完整性现在风险大吗？",
            "must": ["结论", "D2", "证据"],
            "must_not": ["D1问题总数", "D6问题总数"],
        },
    ]

    failed = 0
    print("IIT Chat Regression Start\n")
    for idx, c in enumerate(cases, 1):
        try:
            out = ask(c["q"])
            reply = out.get("reply", "")
            ok = must_contain(reply, c["must"])
            if ok and c.get("must_not"):
                ok = must_not_contain(reply, c["must_not"])
            status = "PASS" if ok else "FAIL"
            print(f"[{idx}] {status} {c['name']}")
            print(f"Q: {c['q']}")
            print(f"A: {reply[:220].replace(chr(10), ' | ')}")
            print("")
            if not ok:
                failed += 1
        except Exception as exc:
            failed += 1
            print(f"[{idx}] FAIL {c['name']}: {exc}\n")

    print(f"Done. total={len(cases)} failed={failed}")
    return 1 if failed else 0


if __name__ == "__main__":
    sys.exit(main())