feat(dc/tool-c): 完成AI代码生成服务(Day 3 MVP)
核心功能: - 新增AICodeService(550行):AI代码生成核心服务 - 新增AIController(257行):4个API端点 - 新增dc_tool_c_ai_history表:存储对话历史 - 实现自我修正机制:最多3次智能重试 - 集成LLMFactory:复用通用能力层 - 10个Few-shot示例:覆盖Level 1-4场景 技术优化: - 修复NaN序列化问题(Python端转None) - 修复数据传递问题(从Session获取真实数据) - 优化System Prompt(明确环境信息) - 调整Few-shot示例(移除import语句) 测试结果: - 通过率:9/11(81.8%) 达到MVP标准 - 成功场景:缺失值处理、编码、分箱、BMI、筛选、填补、统计、分类 - 待优化:数值清洗、智能去重(已记录技术债务TD-C-006) API端点: - POST /api/v1/dc/tool-c/ai/generate(生成代码) - POST /api/v1/dc/tool-c/ai/execute(执行代码) - POST /api/v1/dc/tool-c/ai/process(生成并执行,一步到位) - GET /api/v1/dc/tool-c/ai/history/:sessionId(对话历史) 文档更新: - 新增Day 3开发完成总结(770行) - 新增复杂场景优化技术债务(TD-C-006) - 更新工具C当前状态文档 - 更新技术债务清单 影响范围: - backend/src/modules/dc/tool-c/*(新增2个文件,更新1个文件) - backend/scripts/create-tool-c-ai-history-table.mjs(新增) - backend/prisma/schema.prisma(新增DcToolCAiHistory模型) - extraction_service/services/dc_executor.py(NaN序列化修复) - docs/03-业务模块/DC-数据清洗整理/*(5份文档更新) Breaking Changes: 无 总代码行数:+950行 Refs: #Tool-C-Day3
This commit is contained in:
@@ -12,6 +12,8 @@
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Dict, Any
|
||||
from loguru import logger
|
||||
from pathlib import Path
|
||||
import os
|
||||
@@ -58,6 +60,19 @@ from services.nougat_extractor import check_nougat_available, get_nougat_info
|
||||
from services.file_utils import detect_file_type, cleanup_temp_file
|
||||
from services.docx_extractor import extract_docx_mammoth, validate_docx_file
|
||||
from services.txt_extractor import extract_txt, validate_txt_file
|
||||
from services.dc_executor import validate_code, execute_pandas_code
|
||||
|
||||
|
||||
# ==================== Pydantic Models ====================
|
||||
|
||||
class ValidateCodeRequest(BaseModel):
|
||||
"""代码验证请求模型"""
|
||||
code: str
|
||||
|
||||
class ExecuteCodeRequest(BaseModel):
|
||||
"""代码执行请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
code: str
|
||||
|
||||
|
||||
# ==================== API路由 ====================
|
||||
@@ -484,6 +499,99 @@ async def extract_document(
|
||||
)
|
||||
|
||||
|
||||
# ==================== DC工具C - 代码执行接口 ====================
|
||||
|
||||
@app.post("/api/dc/validate")
|
||||
async def validate_pandas_code(request: ValidateCodeRequest):
|
||||
"""
|
||||
DC工具C - Pandas代码安全验证接口
|
||||
|
||||
Args:
|
||||
request: ValidateCodeRequest
|
||||
- code: str # 待验证的Pandas代码
|
||||
|
||||
Returns:
|
||||
{
|
||||
"valid": bool,
|
||||
"errors": List[str],
|
||||
"warnings": List[str]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
logger.info(f"开始验证Pandas代码,长度: {len(request.code)} 字符")
|
||||
|
||||
# 执行AST安全检查
|
||||
result = validate_code(request.code)
|
||||
|
||||
logger.info(
|
||||
f"代码验证完成: valid={result['valid']}, "
|
||||
f"errors={len(result['errors'])}, warnings={len(result['warnings'])}"
|
||||
)
|
||||
|
||||
return JSONResponse(content=result)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"代码验证失败: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"验证失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/dc/execute")
|
||||
async def execute_pandas_code_endpoint(request: ExecuteCodeRequest):
|
||||
"""
|
||||
DC工具C - Pandas代码执行接口
|
||||
|
||||
Args:
|
||||
request: ExecuteCodeRequest
|
||||
- data: List[Dict] # JSON格式的数据(数组对象)
|
||||
- code: str # Pandas代码(操作df变量)
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict], # 执行后的数据
|
||||
"output": str, # 打印输出
|
||||
"error": str, # 错误信息(如果失败)
|
||||
"execution_time": float, # 执行时间(秒)
|
||||
"result_shape": [rows, cols] # 结果形状
|
||||
}
|
||||
"""
|
||||
try:
|
||||
logger.info(
|
||||
f"开始执行Pandas代码: "
|
||||
f"数据行数={len(request.data)}, 代码长度={len(request.code)} 字符"
|
||||
)
|
||||
|
||||
# 执行代码
|
||||
result = execute_pandas_code(request.data, request.code)
|
||||
|
||||
if result["success"]:
|
||||
logger.info(
|
||||
f"代码执行成功: "
|
||||
f"结果shape={result.get('result_shape')}, "
|
||||
f"耗时={result['execution_time']:.3f}秒"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"代码执行失败: {result.get('error', 'Unknown error')}"
|
||||
)
|
||||
|
||||
return JSONResponse(content=result)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"代码执行接口失败: {str(e)}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"处理失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
# ==================== 启动配置 ====================
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user