diff --git a/slugify/slugify.py b/slugify/slugify.py index 9b5f27f..ae6eb9b 100644 --- a/slugify/slugify.py +++ b/slugify/slugify.py @@ -24,6 +24,20 @@ DEFAULT_SEPARATOR = '-' +def _convert_decimal_reference(match: re.Match[str]) -> str: + try: + return chr(int(match.group(1))) + except (OverflowError, ValueError): + return match.group(0) + + +def _convert_hexadecimal_reference(match: re.Match[str]) -> str: + try: + return chr(int(match.group(1), 16)) + except (OverflowError, ValueError): + return match.group(0) + + def smart_truncate( string: str, max_length: int = 0, @@ -134,17 +148,11 @@ def slugify( # decimal character reference if decimal: - try: - text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text) - except Exception: - pass + text = DECIMAL_PATTERN.sub(_convert_decimal_reference, text) # hexadecimal character reference if hexadecimal: - try: - text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text) - except Exception: - pass + text = HEX_PATTERN.sub(_convert_hexadecimal_reference, text) # re normalize text if allow_unicode: diff --git a/test.py b/test.py index fcec4b6..9e43934 100644 --- a/test.py +++ b/test.py @@ -166,6 +166,11 @@ def test_html_decimal_on(self): r = slugify(txt, decimal=True) self.assertEqual(r, 'z') + def test_html_decimal_on_with_invalid_reference(self): + txt = 'Ž �' + r = slugify(txt, decimal=True) + self.assertEqual(r, 'z-9999999999') + def test_html_decimal_off(self): txt = 'Ž' r = slugify(txt, entities=False, decimal=False) @@ -176,6 +181,11 @@ def test_html_hexadecimal_on(self): r = slugify(txt, hexadecimal=True) self.assertEqual(r, 'z') + def test_html_hexadecimal_on_with_invalid_reference(self): + txt = 'Ž �' + r = slugify(txt, hexadecimal=True) + self.assertEqual(r, 'z-x110000') + def test_html_hexadecimal_off(self): txt = 'Ž' r = slugify(txt, hexadecimal=False)