hotfix(dc/tool-c): Fix compute formula validation and binning NaN serialization
Critical fixes: 1. Compute column: Add Chinese comma support in formula validation - Problem: Formula with Chinese comma failed validation - Fix: Add Chinese comma character to allowed_chars regex - Example: Support formulas like 'col1(kg)+ col2,col3' 2. Binning operation: Fix NaN serialization error - Problem: 'Out of range float values are not JSON compliant: nan' - Fix: Enhanced NaN/inf handling in binning endpoint - Added np.inf/-np.inf replacement before JSON serialization - Added manual JSON serialization with NaN->null conversion 3. Enhanced all operation endpoints for consistency - Updated conditional, dropna endpoints with same NaN/inf handling - Ensures all operations return JSON-compliant data Modified files: - extraction_service/operations/compute.py: Add Chinese comma to regex - extraction_service/main.py: Enhanced NaN handling in binning/conditional/dropna Status: Hotfix complete, ready for testing
This commit is contained in:
@@ -15,3 +15,4 @@
|
||||
__version__ = '1.0.0'
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -60,8 +60,8 @@ def validate_formula(formula: str, available_columns: list) -> tuple[bool, str]:
|
||||
return False, f'公式包含不允许的操作: {pattern}'
|
||||
|
||||
# ✨ 增强:检查是否只包含允许的字符(放宽限制,支持更多特殊字符)
|
||||
# 允许:英文字母、数字、下划线、中文、空格、运算符、括号(中英文)、逗号、点、冒号、等号
|
||||
allowed_chars = r'[a-zA-Z0-9_\u4e00-\u9fa5\s\+\-\*/\(\)\[\]\{\}\.,:\*\*=()【】、。:;!?]'
|
||||
# 允许:英文字母、数字、下划线、中文、空格、运算符、括号(中英文)、逗号(中英文)、点、冒号、等号
|
||||
allowed_chars = r'[a-zA-Z0-9_\u4e00-\u9fa5\s\+\-\*/\(\)\[\]\{\}\.,,:\*\*=()【】、。:;!?]'
|
||||
if not re.match(f'^{allowed_chars}+$', formula):
|
||||
# 找出不允许的字符
|
||||
invalid_chars = set(re.findall(f'[^{allowed_chars}]', formula))
|
||||
|
||||
@@ -148,3 +148,4 @@ def get_missing_summary(df: pd.DataFrame) -> dict:
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -108,3 +108,4 @@ def apply_filter(
|
||||
return result
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user