feat(rvw): harden json parsing and finalize 0316 rollout

Stabilize RVW editorial and methodology JSON parsing in production with layered repair and fallback handling, then publish the paired frontend task-level language selector updates. Also reset deployment checklist, record the 0316 deployment summary, and refresh the SAE runtime status with latest backend/frontend IPs. Made-with: Cursor
2026-03-16 00:24:33 +08:00
parent 707f783229
commit c3554fd61d
17 changed files with 704 additions and 239 deletions
--- a/backend/src/modules/rvw/services/utils.ts
+++ b/backend/src/modules/rvw/services/utils.ts
@@ -6,71 +6,111 @@
 import { MethodologyReview, MethodologyStatus } from '../types/index.js';
 import { jsonrepair } from 'jsonrepair';

+function tryParseJsonCandidate<T>(candidate: string): T | null {
+  const normalized = candidate.trim().replace(/^\uFEFF/, '');
+  if (!normalized) return null;
+  try {
+    return JSON.parse(normalized) as T;
+  } catch {
+    try {
+      const repaired = jsonrepair(normalized);
+      return JSON.parse(repaired) as T;
+    } catch {
+      return null;
+    }
+  }
+}
+
+function extractBalancedJsonCandidates(content: string): string[] {
+  const text = content || '';
+  const candidates: string[] = [];
+  const stack: string[] = [];
+  let start = -1;
+  let inString = false;
+  let escaped = false;
+
+  for (let i = 0; i < text.length; i++) {
+    const ch = text[i];
+
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+      } else if (ch === '\\') {
+        escaped = true;
+      } else if (ch === '"') {
+        inString = false;
+      }
+      continue;
+    }
+
+    if (ch === '"') {
+      inString = true;
+      continue;
+    }
+
+    if (ch === '{' || ch === '[') {
+      if (stack.length === 0) start = i;
+      stack.push(ch);
+      continue;
+    }
+
+    if (ch === '}' || ch === ']') {
+      if (stack.length === 0) continue;
+      const open = stack[stack.length - 1];
+      if ((open === '{' && ch === '}') || (open === '[' && ch === ']')) {
+        stack.pop();
+        if (stack.length === 0 && start >= 0) {
+          candidates.push(text.slice(start, i + 1));
+          start = -1;
+        }
+      } else {
+        // 栈失配时重置，继续寻找下一个合法片段
+        stack.length = 0;
+        start = -1;
+      }
+    }
+  }
+
+  return candidates;
+}
+
 /**
 * 从LLM响应中解析JSON
 * 支持多种格式：纯JSON、```json代码块、混合文本
 */
 export function parseJSONFromLLMResponse<T>(content: string): T {
-  try {
-    // 1. 尝试直接解析
-    return JSON.parse(content) as T;
-  } catch {
-    // 1.1 先尝试 jsonrepair（处理尾逗号、引号缺失等常见脏 JSON）
-    try {
-      const repaired = jsonrepair(content);
-      return JSON.parse(repaired) as T;
-    } catch {
-      // 继续后续提取策略
-    }
+  // 1) 直接解析 + jsonrepair
+  const direct = tryParseJsonCandidate<T>(content);
+  if (direct !== null) return direct;

-    // 2. 尝试提取```json代码块
-    const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)\n?```/);
-    if (jsonMatch) {
-      try {
-        return JSON.parse(jsonMatch[1].trim()) as T;
-      } catch {
-        // 尝试修复代码块 JSON
-        try {
-          const repaired = jsonrepair(jsonMatch[1].trim());
-          return JSON.parse(repaired) as T;
-        } catch {
-          // 继续尝试其他方法
-        }
-      }
-    }
-
-    // 3. 尝试提取{}或[]包裹的内容
-    const objectMatch = content.match(/(\{[\s\S]*\})/);
-    if (objectMatch) {
-      try {
-        return JSON.parse(objectMatch[1]) as T;
-      } catch {
-        try {
-          const repaired = jsonrepair(objectMatch[1]);
-          return JSON.parse(repaired) as T;
-        } catch {
-          // 继续尝试其他方法
-        }
-      }
-    }
-
-    const arrayMatch = content.match(/(\[[\s\S]*\])/);
-    if (arrayMatch) {
-      try {
-        return JSON.parse(arrayMatch[1]) as T;
-      } catch {
-        try {
-          const repaired = jsonrepair(arrayMatch[1]);
-          return JSON.parse(repaired) as T;
-        } catch {
-          // 失败
-        }
-      }
-    }
-
-    // 4. 所有尝试都失败
-    throw new Error('无法从LLM响应中解析JSON');
+  // 2) 提取 Markdown 代码块（```json / ```）
+  const fenceRegex = /```(?:json)?\s*\n?([\s\S]*?)\n?```/gi;
+  for (const match of content.matchAll(fenceRegex)) {
+    const parsed = tryParseJsonCandidate<T>(match[1] || '');
+    if (parsed !== null) return parsed;
  }
+
+  // 3) 平衡括号提取，逐候选尝试
+  const balancedCandidates = extractBalancedJsonCandidates(content);
+  for (const candidate of balancedCandidates) {
+    const parsed = tryParseJsonCandidate<T>(candidate);
+    if (parsed !== null) return parsed;
+  }
+
+  // 4) 最后兜底：贪婪正则对象 / 数组（兼容极端场景）
+  const objectMatch = content.match(/(\{[\s\S]*\})/);
+  if (objectMatch) {
+    const parsed = tryParseJsonCandidate<T>(objectMatch[1]);
+    if (parsed !== null) return parsed;
+  }
+  const arrayMatch = content.match(/(\[[\s\S]*\])/);
+  if (arrayMatch) {
+    const parsed = tryParseJsonCandidate<T>(arrayMatch[1]);
+    if (parsed !== null) return parsed;
+  }
+
+  // 5) 所有尝试都失败
+  throw new Error('无法从LLM响应中解析JSON');
 }

 /**