diff --git a/simulstream/metrics/scorers/latency/mwersegmenter.py b/simulstream/metrics/scorers/latency/mwersegmenter.py index 3a13b8d..75fb40f 100644 --- a/simulstream/metrics/scorers/latency/mwersegmenter.py +++ b/simulstream/metrics/scorers/latency/mwersegmenter.py @@ -123,7 +123,7 @@ def _tokenize(self, text: List[str]) -> List[str]: encoded = [" ".join(self.segmenter.encode(p)) for p in pieces] tokenized_text.append(" ### ".join(encoded)) else: - tokenized_text.append(" ".join(self.segmenter.encode(text[i].strip()))) + tokenized_text.append(" ".join(self.segmenter.encode(text[i]))) return "\n".join(tokenized_text) else: return "\n".join(text)