hotfix(dc/tool-c): Fix compute formula validation and binning NaN serialization
Critical fixes: 1. Compute column: Add Chinese comma support in formula validation - Problem: Formula with Chinese comma failed validation - Fix: Add Chinese comma character to allowed_chars regex - Example: Support formulas like 'col1(kg)+ col2,col3' 2. Binning operation: Fix NaN serialization error - Problem: 'Out of range float values are not JSON compliant: nan' - Fix: Enhanced NaN/inf handling in binning endpoint - Added np.inf/-np.inf replacement before JSON serialization - Added manual JSON serialization with NaN->null conversion 3. Enhanced all operation endpoints for consistency - Updated conditional, dropna endpoints with same NaN/inf handling - Ensures all operations return JSON-compliant data Modified files: - extraction_service/operations/compute.py: Add Chinese comma to regex - extraction_service/main.py: Enhanced NaN handling in binning/conditional/dropna Status: Hotfix complete, ready for testing
This commit is contained in:
@@ -851,15 +851,22 @@ async def operation_binning(request: BinningRequest):
|
||||
request.num_bins
|
||||
)
|
||||
|
||||
# 转换回JSON(处理Categorical类型和NaN值)
|
||||
# 转换回JSON(处理Categorical类型、NaN值和inf值)
|
||||
import numpy as np
|
||||
|
||||
# 1. 将Categorical列转为字符串
|
||||
for col in result_df.columns:
|
||||
if pd.api.types.is_categorical_dtype(result_df[col]):
|
||||
result_df[col] = result_df[col].astype(str)
|
||||
|
||||
# 2. 将NaN替换为None(避免JSON序列化错误)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
# 2. 替换inf和-inf为None
|
||||
result_df = result_df.replace([np.inf, -np.inf], None)
|
||||
|
||||
# 3. 将NaN替换为None(避免JSON序列化错误)
|
||||
result_df = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
|
||||
# 4. 转换为dict
|
||||
result_data = result_df.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
@@ -869,13 +876,22 @@ async def operation_binning(request: BinningRequest):
|
||||
|
||||
logger.info(f"分箱成功: {request.column} → {request.new_column_name}")
|
||||
|
||||
return JSONResponse(content={
|
||||
# 使用json.dumps手动序列化(确保NaN完全处理)
|
||||
import json
|
||||
response_content = {
|
||||
"success": True,
|
||||
"result_data": result_data,
|
||||
"output": output,
|
||||
"execution_time": execution_time,
|
||||
"result_shape": [len(result_data), len(result_df.columns)]
|
||||
})
|
||||
}
|
||||
|
||||
# 手动序列化,NaN会被转为null
|
||||
json_str = json.dumps(response_content, allow_nan=True)
|
||||
# 替换NaN为null(防止任何遗漏的NaN)
|
||||
json_str = json_str.replace('NaN', 'null').replace('Infinity', 'null').replace('-Infinity', 'null')
|
||||
|
||||
return JSONResponse(content=json.loads(json_str))
|
||||
|
||||
except Exception as e:
|
||||
sys.stdout = sys.__stdout__
|
||||
@@ -937,9 +953,11 @@ async def operation_conditional(request: ConditionalRequest):
|
||||
request.else_value
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN值)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
# 转换回JSON(处理NaN和inf值)
|
||||
import numpy as np
|
||||
result_df = result_df.replace([np.inf, -np.inf], None)
|
||||
result_df = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
@@ -1015,9 +1033,11 @@ async def operation_dropna(request: DropnaRequest):
|
||||
subset=request.subset
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN值)
|
||||
result_df_clean = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df_clean.to_dict('records')
|
||||
# 转换回JSON(处理NaN和inf值)
|
||||
import numpy as np
|
||||
result_df = result_df.replace([np.inf, -np.inf], None)
|
||||
result_df = result_df.fillna(value=pd.NA).replace({pd.NA: None})
|
||||
result_data = result_df.to_dict('records')
|
||||
|
||||
# 恢复stdout
|
||||
sys.stdout = sys.__stdout__
|
||||
|
||||
Reference in New Issue
Block a user