feat(dc): Complete Tool C quick action buttons Phase 1-2 - 7 functions
Summary: - Implement 7 quick action functions (filter, recode, binning, conditional, dropna, compute, pivot) - Refactor to pre-written Python functions architecture (stable and secure) - Add 7 Python operations modules with full type hints - Add 7 frontend Dialog components with user-friendly UI - Fix NaN serialization issues and auto type conversion - Update all related documentation Technical Details: - Python: operations/ module (filter.py, recode.py, binning.py, conditional.py, dropna.py, compute.py, pivot.py) - Backend: QuickActionService.ts with 7 execute methods - Frontend: 7 Dialog components with complete validation - Toolbar: Enable 7 quick action buttons Status: Phase 1-2 completed, basic testing passed, ready for further testing
This commit is contained in:
@@ -62,6 +62,15 @@ from services.docx_extractor import extract_docx_mammoth, validate_docx_file
|
||||
from services.txt_extractor import extract_txt, validate_txt_file
|
||||
from services.dc_executor import validate_code, execute_pandas_code
|
||||
|
||||
# ✨ 导入预写的数据操作函数
|
||||
from operations.filter import apply_filter
|
||||
from operations.recode import apply_recode
|
||||
from operations.binning import apply_binning
|
||||
from operations.conditional import apply_conditional_column, apply_simple_binning
|
||||
from operations.dropna import drop_missing_values, get_missing_summary
|
||||
from operations.compute import compute_column, get_formula_examples
|
||||
from operations.pivot import pivot_long_to_wide, get_pivot_preview
|
||||
|
||||
|
||||
# ==================== Pydantic Models ====================
|
||||
|
||||
@@ -74,6 +83,59 @@ class ExecuteCodeRequest(BaseModel):
|
||||
data: List[Dict[str, Any]]
|
||||
code: str
|
||||
|
||||
# ✨ 预写函数请求模型
|
||||
class FilterRequest(BaseModel):
|
||||
"""筛选请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
conditions: List[Dict[str, Any]]
|
||||
logic: str = 'and'
|
||||
|
||||
class RecodeRequest(BaseModel):
|
||||
"""重编码请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
column: str
|
||||
mapping: Dict[Any, Any]
|
||||
create_new_column: bool = True
|
||||
new_column_name: str = None
|
||||
|
||||
class BinningRequest(BaseModel):
|
||||
"""分箱请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
column: str
|
||||
method: str
|
||||
new_column_name: str
|
||||
bins: List[Any] = None
|
||||
labels: List[Any] = None
|
||||
num_bins: int = 3
|
||||
|
||||
class ConditionalRequest(BaseModel):
|
||||
"""条件生成列请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
new_column_name: str
|
||||
rules: List[Dict[str, Any]]
|
||||
else_value: Any = None
|
||||
|
||||
class DropnaRequest(BaseModel):
|
||||
"""删除缺失值请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
method: str # 'row', 'column', 'both'
|
||||
threshold: float = 0.5
|
||||
subset: List[str] = None
|
||||
|
||||
class ComputeRequest(BaseModel):
|
||||
"""计算列请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
new_column_name: str
|
||||
formula: str
|
||||
|
||||
class PivotRequest(BaseModel):
|
||||
"""Pivot请求模型"""
|
||||
data: List[Dict[str, Any]]
|
||||
index_column: str
|
||||
pivot_column: str
|
||||
value_columns: List[str]
|
||||
aggfunc: str = 'first'
|
||||
|
||||
|
||||
# ==================== API路由 ====================
|
||||
|
||||
@@ -592,6 +654,577 @@ async def execute_pandas_code_endpoint(request: ExecuteCodeRequest):
|
||||
)
|
||||
|
||||
|
||||
# ==================== ✨ 预写函数API端点 ====================
|
||||
|
||||
@app.post("/api/operations/filter")
|
||||
async def operation_filter(request: FilterRequest):
|
||||
"""
|
||||
高级筛选操作(预写函数)
|
||||
|
||||
Args:
|
||||
request: FilterRequest
|
||||
- data: List[Dict] # 输入数据
|
||||
- conditions: List[Dict] # 筛选条件
|
||||
- logic: str # 'and' 或 'or'
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict],
|
||||
"output": str,
|
||||
"execution_time": float,
|
||||
"result_shape": [rows, cols]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 捕获打印输出
|
||||
captured_output = io.StringIO()
|
||||
sys.stdout = captured_output
|
||||
|
||||
try:
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
result_df = apply_filter(df, request.conditions, request.logic)
|
||||
|
||||
# 转换回JSON(处理NaN和inf值)
|
||||
import numpy as np
|
||||
result_df = result_df.replace([np.inf, -np.inf], None)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
logger.info(f"筛选成功: {len(request.data)} → {len(result_data)} 行")
|
||||
|
||||
return JSONResponse(content={
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"筛选操作失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time
|
||||
})
|
||||
|
||||
|
||||
@app.post("/api/operations/recode")
|
||||
async def operation_recode(request: RecodeRequest):
|
||||
"""
|
||||
数值映射(重编码)操作(预写函数)
|
||||
|
||||
Args:
|
||||
request: RecodeRequest
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict],
|
||||
"output": str,
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 捕获打印输出
|
||||
captured_output = io.StringIO()
|
||||
sys.stdout = captured_output
|
||||
|
||||
try:
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
result_df = apply_recode(
|
||||
df,
|
||||
request.column,
|
||||
request.mapping,
|
||||
request.create_new_column,
|
||||
request.new_column_name
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN和inf值)
|
||||
import numpy as np
|
||||
result_df = result_df.replace([np.inf, -np.inf], None)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
logger.info(f"重编码成功: {request.column}")
|
||||
|
||||
return JSONResponse(content={
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"重编码操作失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
})
|
||||
|
||||
|
||||
@app.post("/api/operations/binning")
|
||||
async def operation_binning(request: BinningRequest):
|
||||
"""
|
||||
生成分类变量(分箱)操作(预写函数)
|
||||
|
||||
Args:
|
||||
request: BinningRequest
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict],
|
||||
"output": str,
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 捕获打印输出
|
||||
captured_output = io.StringIO()
|
||||
sys.stdout = captured_output
|
||||
|
||||
try:
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
result_df = apply_binning(
|
||||
df,
|
||||
request.column,
|
||||
request.method,
|
||||
request.new_column_name,
|
||||
request.bins,
|
||||
request.labels,
|
||||
request.num_bins
|
||||
)
|
||||
|
||||
# 转换回JSON(处理Categorical类型和NaN值)
|
||||
# 1. 将Categorical列转为字符串
|
||||
for col in result_df.columns:
|
||||
if pd.api.types.is_categorical_dtype(result_df[col]):
|
||||
result_df[col] = result_df[col].astype(str)
|
||||
|
||||
# 2. 将NaN替换为None(避免JSON序列化错误)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
logger.info(f"分箱成功: {request.column} → {request.new_column_name}")
|
||||
|
||||
return JSONResponse(content={
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"分箱操作失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
})
|
||||
|
||||
|
||||
@app.post("/api/operations/conditional")
|
||||
async def operation_conditional(request: ConditionalRequest):
|
||||
"""
|
||||
条件生成列操作(预写函数)
|
||||
|
||||
根据多条件IF-THEN-ELSE规则生成新列
|
||||
|
||||
Args:
|
||||
request: ConditionalRequest
|
||||
- data: 数据
|
||||
- new_column_name: 新列名称
|
||||
- rules: 规则列表,每个规则包含 conditions, logic, result
|
||||
- else_value: 默认值
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict],
|
||||
"output": str,
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 捕获打印输出
|
||||
captured_output = io.StringIO()
|
||||
sys.stdout = captured_output
|
||||
|
||||
try:
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
result_df = apply_conditional_column(
|
||||
df,
|
||||
request.new_column_name,
|
||||
request.rules,
|
||||
request.else_value
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN值)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
logger.info(f"条件生成列成功: {request.new_column_name}")
|
||||
|
||||
return JSONResponse(content={
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"条件生成列操作失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
@app.post("/api/operations/dropna")
|
||||
async def operation_dropna(request: DropnaRequest):
|
||||
"""
|
||||
删除缺失值操作(预写函数)
|
||||
|
||||
Args:
|
||||
request: DropnaRequest
|
||||
- data: 数据
|
||||
- method: 删除方式 ('row', 'column', 'both')
|
||||
- threshold: 缺失率阈值(0-1)
|
||||
- subset: 仅检查指定列(可选)
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict],
|
||||
"output": str,
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 捕获打印输出
|
||||
captured_output = io.StringIO()
|
||||
sys.stdout = captured_output
|
||||
|
||||
try:
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
result_df = drop_missing_values(
|
||||
df,
|
||||
method=request.method,
|
||||
threshold=request.threshold,
|
||||
subset=request.subset
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN值)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
logger.info(f"删除缺失值成功: {request.method}")
|
||||
|
||||
return JSONResponse(content={
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"删除缺失值操作失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
@app.post("/api/operations/compute")
|
||||
async def operation_compute(request: ComputeRequest):
|
||||
"""
|
||||
计算列操作(预写函数)
|
||||
|
||||
基于公式计算新列
|
||||
|
||||
Args:
|
||||
request: ComputeRequest
|
||||
- data: 数据
|
||||
- new_column_name: 新列名称
|
||||
- formula: 计算公式
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict],
|
||||
"output": str,
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 捕获打印输出
|
||||
captured_output = io.StringIO()
|
||||
sys.stdout = captured_output
|
||||
|
||||
try:
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
result_df = compute_column(
|
||||
df,
|
||||
request.new_column_name,
|
||||
request.formula
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN值和inf值)
|
||||
import numpy as np
|
||||
# 1. 替换inf和-inf为None
|
||||
result_df = result_df.replace([np.inf, -np.inf], None)
|
||||
# 2. 替换NaN为None
|
||||
result_df = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
# 3. 转换为dict(此时已经没有NaN和inf)
|
||||
result_data = result_df.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
logger.info(f"计算列成功: {request.new_column_name}")
|
||||
|
||||
# 使用json.dumps手动序列化(处理NaN)
|
||||
import json
|
||||
response_content = {
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
}
|
||||
|
||||
# 手动序列化,NaN会被转为null
|
||||
json_str = json.dumps(response_content, allow_nan=True)
|
||||
# 替换NaN为null
|
||||
json_str = json_str.replace('NaN', 'null').replace('Infinity', 'null').replace('-Infinity', 'null')
|
||||
|
||||
return JSONResponse(content=json.loads(json_str))
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"计算列操作失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
@app.post("/api/operations/pivot")
|
||||
async def operation_pivot(request: PivotRequest):
|
||||
"""
|
||||
Pivot操作:长表转宽表(预写函数)
|
||||
|
||||
将纵向重复数据转为横向数据
|
||||
|
||||
Args:
|
||||
request: PivotRequest
|
||||
- data: 数据
|
||||
- index_column: 索引列
|
||||
- pivot_column: 透视列
|
||||
- value_columns: 值列列表
|
||||
- aggfunc: 聚合函数
|
||||
|
||||
Returns:
|
||||
{
|
||||
"success": bool,
|
||||
"result_data": List[Dict],
|
||||
"output": str,
|
||||
"execution_time": float
|
||||
}
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import time
|
||||
import io
|
||||
import sys
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 捕获打印输出
|
||||
captured_output = io.StringIO()
|
||||
sys.stdout = captured_output
|
||||
|
||||
try:
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
result_df = pivot_long_to_wide(
|
||||
df,
|
||||
request.index_column,
|
||||
request.pivot_column,
|
||||
request.value_columns,
|
||||
request.aggfunc
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN和inf值)
|
||||
result_df = result_df.replace([np.inf, -np.inf], None)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
|
||||
execution_time = time.time() - start_time
|
||||
|
||||
logger.info(f"Pivot成功: {request.index_column} × {request.pivot_column}")
|
||||
|
||||
return JSONResponse(content={
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
output = captured_output.getvalue()
|
||||
raise e
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Pivot操作失败: {str(e)}")
|
||||
return JSONResponse(content={
|
||||
"success": False,
|
||||
"error": str(e),
|
||||
"execution_time": time.time() - start_time if 'start_time' in locals() else 0
|
||||
}, status_code=400)
|
||||
|
||||
|
||||
# ==================== 启动配置 ====================
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user