|
16 | 16 | "lower_strip": lambda s: s.lower().strip(), |
17 | 17 | } |
18 | 18 |
|
19 | | -def normalize(s: str, kind: Optional[str]) -> str: |
| 19 | + |
| 20 | +def normalize(s: Optional[str], kind: Optional[str]) -> str: |
| 21 | + """Normalize a string safely. Treat None as empty string.""" |
| 22 | + if s is None: |
| 23 | + s = "" |
20 | 24 | fn = NORMALIZERS.get(kind) |
21 | | - return fn(s) if fn else s |
| 25 | + try: |
| 26 | + return fn(s) if fn else s |
| 27 | + except Exception: |
| 28 | + # if normalization fails, return the original string |
| 29 | + return s |
22 | 30 |
|
23 | 31 | def exact_match(pred: str, gold: str, norm: Optional[str]) -> float: |
24 | 32 | return 1.0 if normalize(pred, norm) == normalize(gold, norm) else 0.0 |
@@ -50,12 +58,17 @@ def rouge_l(pred: str, gold: str) -> Optional[float]: |
50 | 58 | scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True) |
51 | 59 | return scorer.score(gold, pred)['rougeL'].fmeasure |
52 | 60 |
|
53 | | -def mc_accuracy(pred: str, answer: str, choices: List[str], norm: Optional[str]) -> float: |
| 61 | +def mc_accuracy(pred: Optional[str], answer: Optional[str], choices: List[str], norm: Optional[str]) -> float: |
54 | 62 | p = normalize(pred, norm) |
| 63 | + if not choices: |
| 64 | + return 0.0 |
55 | 65 | candidates = [normalize(c, norm) for c in choices] |
56 | 66 | # greedy: pick exact first, else contains |
57 | 67 | if p in candidates: |
58 | 68 | return 1.0 if p == normalize(answer, norm) else 0.0 |
59 | | - # fuzzy contains: prefer exact, then check if candidate is substring of pred or pred is substring of candidate |
60 | | - best = max(candidates, key=lambda c: ((c in p) or (p in c), -abs(len(c)-len(p)))) |
| 69 | + # fuzzy contains: prefer candidate contained in pred or vice versa, then length closeness |
| 70 | + try: |
| 71 | + best = max(candidates, key=lambda c: ((c in p) or (p in c), -abs(len(c) - len(p)))) |
| 72 | + except Exception: |
| 73 | + best = candidates[0] |
61 | 74 | return 1.0 if best == normalize(answer, norm) else 0.0 |
0 commit comments