feat(ssa): Complete Phase 2A frontend integration - multi-step workflow end-to-end
Phase 2A: WorkflowPlannerService, WorkflowExecutorService, Python data quality, 6 bug fixes, DescriptiveResultView, multi-step R code/Word export, MVP UI reuse. V11 UI: Gemini-style, multi-task, single-page scroll, Word export. Architecture: Block-based rendering consensus (4 block types). New R tools: chi_square, correlation, descriptive, logistic_binary, mann_whitney, t_test_paired. Docs: dev summary, block-based plan, status updates, task list v2.0. Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
@@ -94,6 +94,8 @@ from operations.metric_time_transform import (
|
||||
preview_multi_metric_to_matrix # ✨ 多指标转换预览(方向2)
|
||||
)
|
||||
from operations.fillna import fillna_simple, fillna_mice, get_column_missing_stats
|
||||
# ✨ SSA Phase 2A: 数据画像
|
||||
from operations.data_profile import generate_data_profile, get_quality_score
|
||||
|
||||
|
||||
# ==================== Pydantic Models ====================
|
||||
@@ -231,6 +233,21 @@ class FillnaStatsRequest(BaseModel):
|
||||
column: str
|
||||
|
||||
|
||||
# ✨ SSA Phase 2A: DataProfile 请求模型
|
||||
class DataProfileRequest(BaseModel):
|
||||
"""数据画像请求模型 (SSA Phase 2A)"""
|
||||
data: List[Dict[str, Any]]
|
||||
max_unique_values: int = 20 # 分类变量显示的最大唯一值数量
|
||||
include_quality_score: bool = True # 是否包含质量评分
|
||||
|
||||
|
||||
class DataProfileCSVRequest(BaseModel):
|
||||
"""数据画像请求模型 - CSV 直传 (SSA Phase 2A)"""
|
||||
csv_content: str # CSV 文件内容(字符串)
|
||||
max_unique_values: int = 20
|
||||
include_quality_score: bool = True
|
||||
|
||||
|
||||
class FillnaSimpleRequest(BaseModel):
|
||||
"""简单填补请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
@@ -2125,6 +2142,129 @@ async def operation_fillna_mice(request: FillnaMiceRequest):
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
# ==================== SSA Phase 2A: DataProfile API ====================
|
||||
|
||||
@app.post("/api/ssa/data-profile")
|
||||
async def ssa_data_profile(request: DataProfileRequest):
|
||||
"""
|
||||
生成数据画像 (SSA Phase 2A)
|
||||
|
||||
用于 SSA 模块在用户上传数据时快速生成数据画像,
|
||||
画像将喂给 LLM 以生成分析计划 (SAP)。
|
||||
|
||||
Args:
|
||||
request: DataProfileRequest
|
||||
- data: 数据 (JSON 格式)
|
||||
- max_unique_values: 分类变量显示的最大唯一值数量
|
||||
- include_quality_score: 是否包含质量评分
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"profile": {
|
||||
"columns": [...],
|
||||
"summary": {...}
|
||||
},
|
||||
"quality": {...} (可选),
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
logger.info(f"[SSA] 开始生成数据画像: {df.shape}")
|
||||
|
||||
profile = generate_data_profile(df, request.max_unique_values)
|
||||
|
||||
result = {
|
||||
"success": True,
|
||||
"profile": profile
|
||||
}
|
||||
|
||||
if request.include_quality_score:
|
||||
result["quality"] = get_quality_score(profile)
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
result["execution_time"] = round(execution_time, 3)
|
||||
|
||||
logger.info(f"[SSA] 数据画像生成完成: {execution_time:.3f}s")
|
||||
|
||||
return JSONResponse(content=result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[SSA] 数据画像生成失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
@app.post("/api/ssa/data-profile-csv")
|
||||
async def ssa_data_profile_csv(request: DataProfileCSVRequest):
|
||||
"""
|
||||
生成数据画像 - CSV 直传 (SSA Phase 2A)
|
||||
|
||||
直接接收 CSV 字符串,由 Python pandas 解析,
|
||||
比 Node.js 解析后再转 JSON 更高效、更可靠。
|
||||
|
||||
Args:
|
||||
request: DataProfileCSVRequest
|
||||
- csv_content: CSV 文件内容(字符串)
|
||||
- max_unique_values: 分类变量显示的最大唯一值数量
|
||||
- include_quality_score: 是否包含质量评分
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"profile": {...},
|
||||
"quality": {...} (可选),
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
from io import StringIO
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# pandas 直接解析 CSV 字符串,自动推断类型
|
||||
df = pd.read_csv(StringIO(request.csv_content))
|
||||
|
||||
logger.info(f"[SSA] CSV 解析完成,开始生成数据画像: {df.shape}")
|
||||
|
||||
profile = generate_data_profile(df, request.max_unique_values)
|
||||
|
||||
result = {
|
||||
"success": True,
|
||||
"profile": profile
|
||||
}
|
||||
|
||||
if request.include_quality_score:
|
||||
result["quality"] = get_quality_score(profile)
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
result["execution_time"] = round(execution_time, 3)
|
||||
|
||||
logger.info(f"[SSA] 数据画像生成完成 (CSV): {execution_time:.3f}s")
|
||||
|
||||
return JSONResponse(content=result)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[SSA] CSV 数据画像生成失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
# ==================== Word 导出 API ====================
|
||||
|
||||
@app.get("/api/pandoc/status")
|
||||
|
||||
Reference in New Issue
Block a user