feat(dc/tool-c): 完成AI代码生成服务（Day 3 MVP）

核心功能： - 新增AICodeService（550行）：AI代码生成核心服务 - 新增AIController（257行）：4个API端点 - 新增dc_tool_c_ai_history表：存储对话历史 - 实现自我修正机制：最多3次智能重试 - 集成LLMFactory：复用通用能力层 - 10个Few-shot示例：覆盖Level 1-4场景技术优化： - 修复NaN序列化问题（Python端转None） - 修复数据传递问题（从Session获取真实数据） - 优化System Prompt（明确环境信息） - 调整Few-shot示例（移除import语句）测试结果： - 通过率：9/11（81.8%）达到MVP标准 - 成功场景：缺失值处理、编码、分箱、BMI、筛选、填补、统计、分类 - 待优化：数值清洗、智能去重（已记录技术债务TD-C-006） API端点： - POST /api/v1/dc/tool-c/ai/generate（生成代码） - POST /api/v1/dc/tool-c/ai/execute（执行代码） - POST /api/v1/dc/tool-c/ai/process（生成并执行，一步到位） - GET /api/v1/dc/tool-c/ai/history/:sessionId（对话历史）文档更新： - 新增Day 3开发完成总结（770行） - 新增复杂场景优化技术债务（TD-C-006） - 更新工具C当前状态文档 - 更新技术债务清单影响范围： - backend/src/modules/dc/tool-c/*（新增2个文件，更新1个文件） - backend/scripts/create-tool-c-ai-history-table.mjs（新增） - backend/prisma/schema.prisma（新增DcToolCAiHistory模型） - extraction_service/services/dc_executor.py（NaN序列化修复） - docs/03-业务模块/DC-数据清洗整理/*（5份文档更新） Breaking Changes: 无总代码行数：+950行 Refs: #Tool-C-Day3
2025-12-07 16:21:32 +08:00
parent 2348234013
commit f01981bf78
68 changed files with 6257 additions and 17 deletions
--- a/extraction_service/test_dc_api.py
+++ b/extraction_service/test_dc_api.py
@@ -0,0 +1,281 @@
+"""
+DC工具C - API测试脚本
+
+测试项：
+1. 健康检查 (GET /api/health)
+2. AST安全检查 - 正常代码
+3. AST安全检查 - 危险代码
+4. Pandas代码执行 - 简单场景
+5. Pandas代码执行 - 医疗数据清洗场景
+"""
+
+import requests
+import json
+from typing import Dict, Any
+
+BASE_URL = "http://localhost:8000"
+
+def print_test_header(title: str):
+    """打印测试标题"""
+    print("\n" + "=" * 70)
+    print(f"  {title}")
+    print("=" * 70)
+
+def print_result(response: requests.Response):
+    """打印响应结果"""
+    print(f"\n状态码: {response.status_code}")
+    print(f"响应内容:")
+    try:
+        result = response.json()
+        print(json.dumps(result, indent=2, ensure_ascii=False))
+    except:
+        print(response.text)
+
+def test_health_check():
+    """测试1: 健康检查"""
+    print_test_header("测试1: 健康检查")
+    
+    try:
+        response = requests.get(f"{BASE_URL}/api/health", timeout=5)
+        print_result(response)
+        
+        if response.status_code == 200:
+            print("\n✅ 健康检查通过")
+            return True
+        else:
+            print("\n❌ 健康检查失败")
+            return False
+    except Exception as e:
+        print(f"\n❌ 健康检查异常: {str(e)}")
+        return False
+
+def test_validate_safe_code():
+    """测试2: AST安全检查 - 正常代码"""
+    print_test_header("测试2: AST安全检查 - 正常代码")
+    
+    safe_code = """
+import pandas as pd
+df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
+print(df['age_group'].value_counts())
+"""
+    
+    try:
+        response = requests.post(
+            f"{BASE_URL}/api/dc/validate",
+            json={"code": safe_code},
+            timeout=5
+        )
+        print_result(response)
+        
+        if response.status_code == 200:
+            result = response.json()
+            if result.get("valid"):
+                print("\n✅ 正常代码验证通过（valid=True）")
+                return True
+            else:
+                print("\n❌ 正常代码被误判为危险")
+                return False
+        else:
+            print("\n❌ API调用失败")
+            return False
+    except Exception as e:
+        print(f"\n❌ 测试异常: {str(e)}")
+        return False
+
+def test_validate_dangerous_code():
+    """测试3: AST安全检查 - 危险代码"""
+    print_test_header("测试3: AST安全检查 - 危险代码（应该被拦截）")
+    
+    dangerous_code = """
+import os
+import sys
+os.system('echo "危险操作"')
+eval('print("evil code")')
+"""
+    
+    try:
+        response = requests.post(
+            f"{BASE_URL}/api/dc/validate",
+            json={"code": dangerous_code},
+            timeout=5
+        )
+        print_result(response)
+        
+        if response.status_code == 200:
+            result = response.json()
+            if not result.get("valid") and len(result.get("errors", [])) > 0:
+                print("\n✅ 危险代码成功拦截（valid=False, 有错误信息）")
+                return True
+            else:
+                print("\n❌ 危险代码未被拦截！")
+                return False
+        else:
+            print("\n❌ API调用失败")
+            return False
+    except Exception as e:
+        print(f"\n❌ 测试异常: {str(e)}")
+        return False
+
+def test_execute_simple_code():
+    """测试4: Pandas代码执行 - 简单场景"""
+    print_test_header("测试4: Pandas代码执行 - 简单场景")
+    
+    test_data = [
+        {"patient_id": "P001", "age": 25, "gender": "男"},
+        {"patient_id": "P002", "age": 65, "gender": "女"},
+        {"patient_id": "P003", "age": 45, "gender": "男"},
+        {"patient_id": "P004", "age": 70, "gender": "女"},
+    ]
+    
+    simple_code = """
+df['age_group'] = df['age'].apply(lambda x: '老年' if x > 60 else '非老年')
+print(f"数据处理完成，共 {len(df)} 行")
+print(df['age_group'].value_counts())
+"""
+    
+    try:
+        response = requests.post(
+            f"{BASE_URL}/api/dc/execute",
+            json={"data": test_data, "code": simple_code},
+            timeout=10
+        )
+        print_result(response)
+        
+        if response.status_code == 200:
+            result = response.json()
+            if result.get("success"):
+                result_data = result.get("result_data", [])
+                print(f"\n结果数据行数: {len(result_data)}")
+                print(f"执行时间: {result.get('execution_time', 0):.3f}秒")
+                
+                # 验证新列是否添加
+                if len(result_data) > 0 and 'age_group' in result_data[0]:
+                    print("\n✅ 简单代码执行成功（新增列 age_group）")
+                    return True
+                else:
+                    print("\n❌ 代码执行成功但结果不正确")
+                    return False
+            else:
+                print(f"\n❌ 代码执行失败: {result.get('error')}")
+                return False
+        else:
+            print("\n❌ API调用失败")
+            return False
+    except Exception as e:
+        print(f"\n❌ 测试异常: {str(e)}")
+        return False
+
+def test_execute_medical_cleaning():
+    """测试5: Pandas代码执行 - 医疗数据清洗场景"""
+    print_test_header("测试5: Pandas代码执行 - 医疗数据清洗场景")
+    
+    # 模拟医疗数据
+    medical_data = [
+        {"patient_id": "P001", "age": 25, "gender": "男", "sbp": 120, "dbp": 80},
+        {"patient_id": "P002", "age": 65, "gender": "女", "sbp": 150, "dbp": 95},
+        {"patient_id": "P003", "age": 45, "gender": "男", "sbp": 135, "dbp": 85},
+        {"patient_id": "P004", "age": None, "gender": "女", "sbp": 160, "dbp": 100},
+        {"patient_id": "P005", "age": 200, "gender": "男", "sbp": 110, "dbp": 70},
+    ]
+    
+    # 复杂的医疗数据清洗代码
+    medical_code = """
+import numpy as np
+
+# 1. 清理异常年龄值（>120视为异常）
+df['age'] = df['age'].apply(lambda x: np.nan if x is None or x > 120 else x)
+
+# 2. 计算血压状态（收缩压 >= 140 或舒张压 >= 90 为高血压）
+df['hypertension'] = df.apply(
+    lambda row: '高血压' if row['sbp'] >= 140 or row['dbp'] >= 90 else '正常',
+    axis=1
+)
+
+# 3. 统计结果
+print(f"总样本数: {len(df)}")
+print(f"年龄缺失数: {df['age'].isna().sum()}")
+print(f"高血压人数: {(df['hypertension'] == '高血压').sum()}")
+"""
+    
+    try:
+        response = requests.post(
+            f"{BASE_URL}/api/dc/execute",
+            json={"data": medical_data, "code": medical_code},
+            timeout=10
+        )
+        print_result(response)
+        
+        if response.status_code == 200:
+            result = response.json()
+            if result.get("success"):
+                result_data = result.get("result_data", [])
+                print(f"\n结果数据行数: {len(result_data)}")
+                print(f"执行时间: {result.get('execution_time', 0):.3f}秒")
+                
+                # 验证新列是否添加
+                if len(result_data) > 0 and 'hypertension' in result_data[0]:
+                    # 验证数据清洗逻辑
+                    hypertension_count = sum(
+                        1 for row in result_data 
+                        if row.get('hypertension') == '高血压'
+                    )
+                    print(f"高血压人数: {hypertension_count}")
+                    
+                    print("\n✅ 医疗数据清洗场景执行成功")
+                    return True
+                else:
+                    print("\n❌ 代码执行成功但结果不正确")
+                    return False
+            else:
+                print(f"\n❌ 代码执行失败: {result.get('error')}")
+                return False
+        else:
+            print("\n❌ API调用失败")
+            return False
+    except Exception as e:
+        print(f"\n❌ 测试异常: {str(e)}")
+        return False
+
+def main():
+    """主测试函数"""
+    print("\n" + "🚀" * 35)
+    print("   DC工具C - Python微服务API测试")
+    print("🚀" * 35)
+    
+    # 运行所有测试
+    results = {
+        "健康检查": test_health_check(),
+        "AST检查-正常代码": test_validate_safe_code(),
+        "AST检查-危险代码": test_validate_dangerous_code(),
+        "代码执行-简单场景": test_execute_simple_code(),
+        "代码执行-医疗清洗": test_execute_medical_cleaning(),
+    }
+    
+    # 汇总结果
+    print("\n" + "=" * 70)
+    print("   测试结果汇总")
+    print("=" * 70)
+    
+    for test_name, passed in results.items():
+        status = "✅ 通过" if passed else "❌ 失败"
+        print(f"{test_name:20s}: {status}")
+    
+    total = len(results)
+    passed = sum(1 for r in results.values() if r)
+    success_rate = (passed / total * 100) if total > 0 else 0
+    
+    print("\n" + "-" * 70)
+    print(f"总计: {passed}/{total} 通过 ({success_rate:.1f}%)")
+    print("-" * 70)
+    
+    if passed == total:
+        print("\n🎉 所有测试通过！Day 1 Python服务开发完成！")
+    else:
+        print(f"\n⚠️  有 {total - passed} 个测试失败，请检查")
+    
+    print("\n")
+
+if __name__ == "__main__":
+    main()
+
+