feat(ssa): Complete Phase 2A frontend integration - multi-step workflow end-to-end

Phase 2A: WorkflowPlannerService, WorkflowExecutorService, Python data quality, 6 bug fixes, DescriptiveResultView, multi-step R code/Word export, MVP UI reuse. V11 UI: Gemini-style, multi-task, single-page scroll, Word export. Architecture: Block-based rendering consensus (4 block types). New R tools: chi_square, correlation, descriptive, logistic_binary, mann_whitney, t_test_paired. Docs: dev summary, block-based plan, status updates, task list v2.0.

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
2026-02-20 23:09:27 +08:00
parent 23b422f758
commit 428a22adf2
62 changed files with 15416 additions and 299 deletions

View File

@@ -94,6 +94,8 @@ from operations.metric_time_transform import (
preview_multi_metric_to_matrix # ✨ 多指标转换预览方向2
)
from operations.fillna import fillna_simple, fillna_mice, get_column_missing_stats
# ✨ SSA Phase 2A: 数据画像
from operations.data_profile import generate_data_profile, get_quality_score
# ==================== Pydantic Models ====================
@@ -231,6 +233,21 @@ class FillnaStatsRequest(BaseModel):
column: str
# ✨ SSA Phase 2A: DataProfile 请求模型
class DataProfileRequest(BaseModel):
"""数据画像请求模型 (SSA Phase 2A)"""
data: List[Dict[str, Any]]
max_unique_values: int = 20 # 分类变量显示的最大唯一值数量
include_quality_score: bool = True # 是否包含质量评分
class DataProfileCSVRequest(BaseModel):
"""数据画像请求模型 - CSV 直传 (SSA Phase 2A)"""
csv_content: str # CSV 文件内容(字符串)
max_unique_values: int = 20
include_quality_score: bool = True
class FillnaSimpleRequest(BaseModel):
"""简单填补请求模型"""
data: List[Dict[str, Any]]
@@ -2125,6 +2142,129 @@ async def operation_fillna_mice(request: FillnaMiceRequest):
}, status_code=400)
# ==================== SSA Phase 2A: DataProfile API ====================
@app.post("/api/ssa/data-profile")
async def ssa_data_profile(request: DataProfileRequest):
"""
生成数据画像 (SSA Phase 2A)
用于 SSA 模块在用户上传数据时快速生成数据画像,
画像将喂给 LLM 以生成分析计划 (SAP)。
Args:
request: DataProfileRequest
- data: 数据 (JSON 格式)
- max_unique_values: 分类变量显示的最大唯一值数量
- include_quality_score: 是否包含质量评分
Returns:
{
"success": bool,
"profile": {
"columns": [...],
"summary": {...}
},
"quality": {...} (可选),
"execution_time": float
}
"""
try:
import pandas as pd
import time
start_time = time.time()
df = pd.DataFrame(request.data)
logger.info(f"[SSA] 开始生成数据画像: {df.shape}")
profile = generate_data_profile(df, request.max_unique_values)
result = {
"success": True,
"profile": profile
}
if request.include_quality_score:
result["quality"] = get_quality_score(profile)
execution_time = time.time() - start_time
result["execution_time"] = round(execution_time, 3)
logger.info(f"[SSA] 数据画像生成完成: {execution_time:.3f}s")
return JSONResponse(content=result)
except Exception as e:
logger.error(f"[SSA] 数据画像生成失败: {str(e)}")
return JSONResponse(content={
"success": False,
"error": str(e),
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
}, status_code=400)
@app.post("/api/ssa/data-profile-csv")
async def ssa_data_profile_csv(request: DataProfileCSVRequest):
"""
生成数据画像 - CSV 直传 (SSA Phase 2A)
直接接收 CSV 字符串,由 Python pandas 解析,
比 Node.js 解析后再转 JSON 更高效、更可靠。
Args:
request: DataProfileCSVRequest
- csv_content: CSV 文件内容(字符串)
- max_unique_values: 分类变量显示的最大唯一值数量
- include_quality_score: 是否包含质量评分
Returns:
{
"success": bool,
"profile": {...},
"quality": {...} (可选),
"execution_time": float
}
"""
try:
import pandas as pd
import time
from io import StringIO
start_time = time.time()
# pandas 直接解析 CSV 字符串,自动推断类型
df = pd.read_csv(StringIO(request.csv_content))
logger.info(f"[SSA] CSV 解析完成,开始生成数据画像: {df.shape}")
profile = generate_data_profile(df, request.max_unique_values)
result = {
"success": True,
"profile": profile
}
if request.include_quality_score:
result["quality"] = get_quality_score(profile)
execution_time = time.time() - start_time
result["execution_time"] = round(execution_time, 3)
logger.info(f"[SSA] 数据画像生成完成 (CSV): {execution_time:.3f}s")
return JSONResponse(content=result)
except Exception as e:
logger.error(f"[SSA] CSV 数据画像生成失败: {str(e)}")
return JSONResponse(content={
"success": False,
"error": str(e),
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
}, status_code=400)
# ==================== Word 导出 API ====================
@app.get("/api/pandoc/status")