Skip to content

Commit 3a169cf

Browse files
authored
Refactor#22: Make LanguageCode model Pydantic-idiomatic (#85)
* refactor#22: make LanguageCode model Pydantic-idiomatic Signed-off-by: sushant-suse <[email protected]> * refactor#22: Renamed changelog file Signed-off-by: sushant-suse <[email protected]> * refactor#22: make LanguageCode model Pydantic-idiomatic and restored xpath() Signed-off-by: sushant-suse <[email protected]> * refactor #22: updated langauge.py file as per Toms comments Signed-off-by: sushant-suse <[email protected]> --------- Signed-off-by: sushant-suse <[email protected]>
1 parent 7d7dd9f commit 3a169cf

7 files changed

Lines changed: 92 additions & 76 deletions

File tree

changelog.d/85.refactor.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Refactored the :class:`~docbuild.models.language.LanguageCode` model to be more idiomatic to Pydantic by removing a custom ``__init__`` initializer and using a :meth:`~docbuild.models.language.LanguageCode.model_validator` method for string parsing.

src/docbuild/models/doctype.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def coerce_langs(cls, value: str | list[str | LanguageCode]) -> list[LanguageCod
204204
value = sorted(value.split(','))
205205
return sorted(
206206
[
207-
lang if isinstance(lang, LanguageCode) else LanguageCode(lang)
207+
lang if isinstance(lang, LanguageCode) else LanguageCode(language=lang)
208208
for lang in value
209209
]
210210
)
@@ -273,4 +273,4 @@ def xpath(self) -> str:
273273
language = ' or '.join([f'@lang={lang.language!r}' for lang in self.langs])
274274
language = f'language[{language}]'
275275

276-
return f'{product}/{docset}/builddocs/{language}'
276+
return f'{product}/{docset}/builddocs/{language}'

src/docbuild/models/language.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,14 @@
11
"""Language model for representing language codes."""
22

3-
from functools import total_ordering
3+
from functools import total_ordering, cached_property
44
import re
55
from typing import Any, ClassVar
66

7-
from pydantic import BaseModel, Field, computed_field
7+
from pydantic import BaseModel, Field, computed_field, model_validator, field_validator
88
from pydantic.config import ConfigDict
9-
from pydantic.functional_validators import field_validator
109

1110
from ..constants import ALLOWED_LANGUAGES
1211

13-
# Old definition:
14-
# Language allows all the definied languages, but also "*" (=ALL).
15-
# We only define "ALL" as uppercase to denote a constant, the rest is lowercase.
16-
# Language = StrEnum(
17-
# "Language",
18-
# # The dict is mapped like "de_de": "de-de"
19-
# {"ALL": "*"} | {item.replace("-", "_"): item
20-
# for item in sorted(ALLOWED_LANGUAGES)},
21-
# )
22-
2312

2413
@total_ordering
2514
class LanguageCode(BaseModel):
@@ -52,13 +41,13 @@ class LanguageCode(BaseModel):
5241
)
5342
"""Class variable containing all allowed languages."""
5443

55-
def __init__(self, language: str, **kwargs: dict[Any, Any]) -> None:
56-
"""Initialize the LanguageCode instance."""
57-
super().__init__(language=language.replace('_', '-'), **kwargs)
58-
if language == '*':
59-
self._lang, self._country = ('*', '*')
60-
else:
61-
self._lang, self._country = re.split(r'[_-]', language)
44+
@model_validator(mode='before')
45+
@classmethod
46+
def _convert_str_to_dict(cls, data: Any) -> Any:
47+
"""Allow initializing LanguageCode from a plain string."""
48+
if isinstance(data, str):
49+
return {'language': data}
50+
return data
6251

6352
def __str__(self) -> str:
6453
"""Implement str(self)."""
@@ -129,12 +118,19 @@ def matches(self, other: 'LanguageCode | str') -> bool:
129118
return (
130119
self.language == '*' or other_value == '*' or self.language == other_value
131120
)
121+
122+
@field_validator('language', mode='before')
123+
@classmethod
124+
def _normalize_language_separator(cls, value: str) -> str:
125+
"""Normalize separator from _ to -."""
126+
if isinstance(value, str):
127+
return value.replace('_', '-')
128+
return value
132129

133130
@field_validator('language')
134131
@classmethod
135132
def validate_language(cls, value: str) -> str:
136133
"""Check if the passed language adheres to the allowed language."""
137-
# value = value.replace("_", "-")
138134
if value not in cls.ALLOWED_LANGS:
139135
raise ValueError(
140136
(
@@ -144,14 +140,32 @@ def validate_language(cls, value: str) -> str:
144140
)
145141
return value
146142

143+
@cached_property
144+
def _parts(self) -> tuple[str, str] | tuple[str]:
145+
"""Split the `language` code into language and country.
146+
147+
This method parses the :attr:`language` string into its parts
148+
and caches the result per instance to avoid redundant parsing operations.
149+
150+
:returns: A tuple containing:
151+
- ``(language, country)`` if both parts are present.
152+
- ``('*',)`` if the language code is ``"*"``
153+
"""
154+
if self.language == '*':
155+
return ('*',)
156+
157+
# Use split('-') as the separator is already normalized
158+
parts = self.language.split('-')
159+
return (parts[0], parts[1]) if len(parts) > 1 else (parts[0],)
160+
147161
@computed_field(
148162
repr=False,
149163
title='The language part of the language code',
150164
examples=['en', 'de', 'ja'],
151165
)
152166
def lang(self) -> str:
153167
"""Extract the language part of the language code (property)."""
154-
return self._lang
168+
return self._parts[0]
155169

156170
@computed_field(
157171
repr=False,
@@ -160,4 +174,4 @@ def lang(self) -> str:
160174
)
161175
def country(self) -> str:
162176
"""Extract the country part of the language code (property)."""
163-
return self._country
177+
return self._parts[1] if len(self._parts) > 1 else '*'

src/docbuild/utils/merge.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def _merge_langs(
3030
:return: Merged sorted list of LanguageCode objects.
3131
"""
3232
if '*' in langs1 or '*' in langs2:
33-
return [LanguageCode('*')]
33+
return [LanguageCode(language='*')]
3434
return sorted(set(chain(langs1, langs2))) # sorted(set(langs1 + langs2))
3535

3636

@@ -187,4 +187,4 @@ def merge_doctypes(*doctypes: Doctype) -> list[Doctype]: # noqa: C901
187187
new_result.append(dt)
188188
result = _dedup_doctypes(new_result)
189189

190-
return _dedup_doctypes(result)
190+
return _dedup_doctypes(result)

tests/models/test_doctype.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def test_valid_doctype():
1414
assert doctype.product == Product.sles
1515
assert doctype.docset == ['15-SP6']
1616
assert doctype.lifecycle == LifecycleFlag.supported
17-
assert doctype.langs == [LanguageCode('en-us')]
17+
assert doctype.langs == [LanguageCode(language='en-us')]
1818

1919

2020
def test_str_in_doctype():
@@ -46,7 +46,7 @@ def test_string_langs_in_doctype():
4646
lifecycle='supported',
4747
langs='en-us',
4848
)
49-
assert doctype.langs == [LanguageCode('en-us')]
49+
assert doctype.langs == [LanguageCode(language='en-us')]
5050

5151

5252
def test_multiplestrings_langs_in_doctype():
@@ -56,7 +56,7 @@ def test_multiplestrings_langs_in_doctype():
5656
lifecycle='supported',
5757
langs='en-us,de-de',
5858
)
59-
assert doctype.langs == [LanguageCode('de-de'), LanguageCode('en-us')]
59+
assert doctype.langs == [LanguageCode(language='de-de'), LanguageCode(language='en-us')]
6060

6161

6262
@pytest.mark.parametrize(
@@ -68,7 +68,7 @@ def test_multiplestrings_langs_in_doctype():
6868
Product.sles,
6969
['15-SP6'],
7070
LifecycleFlag.unknown,
71-
[LanguageCode('en-us')],
71+
[LanguageCode(language='en-us')],
7272
),
7373
),
7474
(
@@ -77,52 +77,52 @@ def test_multiplestrings_langs_in_doctype():
7777
Product.sles,
7878
['15-SP5', '15-SP6'],
7979
LifecycleFlag.unknown,
80-
[LanguageCode('en-us')],
80+
[LanguageCode(language='en-us')],
8181
),
8282
),
8383
(
8484
'//en-us',
85-
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode('en-us')]),
85+
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode(language='en-us')]),
8686
),
8787
(
8888
'/*/*/en-us',
89-
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode('en-us')]),
89+
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode(language='en-us')]),
9090
),
9191
(
9292
'*//en-us',
93-
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode('en-us')]),
93+
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode(language='en-us')]),
9494
),
9595
(
9696
'/*/en-us',
97-
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode('en-us')]),
97+
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode(language='en-us')]),
9898
),
9999
(
100100
'*/*/en-us',
101-
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode('en-us')]),
101+
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode(language='en-us')]),
102102
),
103103
(
104104
'*/@beta/en-us',
105-
(Product.ALL, ['*'], LifecycleFlag.beta, [LanguageCode('en-us')]),
105+
(Product.ALL, ['*'], LifecycleFlag.beta, [LanguageCode(language='en-us')]),
106106
),
107107
(
108108
'*/*@beta/en-us',
109-
(Product.ALL, ['*'], LifecycleFlag.beta, [LanguageCode('en-us')]),
109+
(Product.ALL, ['*'], LifecycleFlag.beta, [LanguageCode(language='en-us')]),
110110
),
111111
(
112112
'sles/*@beta/en-us',
113-
(Product.sles, ['*'], LifecycleFlag.beta, [LanguageCode('en-us')]),
113+
(Product.sles, ['*'], LifecycleFlag.beta, [LanguageCode(language='en-us')]),
114114
),
115115
(
116116
'/sles/*@beta/en-us',
117-
(Product.sles, ['*'], LifecycleFlag.beta, [LanguageCode('en-us')]),
117+
(Product.sles, ['*'], LifecycleFlag.beta, [LanguageCode(language='en-us')]),
118118
),
119119
(
120120
'/*/*@supported/*',
121-
(Product.ALL, ['*'], LifecycleFlag.supported, [LanguageCode('*')]),
121+
(Product.ALL, ['*'], LifecycleFlag.supported, [LanguageCode(language='*')]),
122122
),
123123
(
124124
'/*/*/*',
125-
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode('*')]),
125+
(Product.ALL, ['*'], LifecycleFlag.unknown, [LanguageCode(language='*')]),
126126
),
127127
],
128128
)
@@ -200,9 +200,9 @@ def test_sorted_docsets_in_doctype():
200200
def test_sorted_langs_in_doctype():
201201
dt1 = Doctype.from_str('sles/15-SP6/en-us,zh-cn,de-de')
202202
assert dt1.langs == [
203-
'de-de',
204-
'en-us',
205-
'zh-cn',
203+
LanguageCode(language='de-de'),
204+
LanguageCode(language='en-us'),
205+
LanguageCode(language='zh-cn'),
206206
]
207207

208208

@@ -224,7 +224,7 @@ def test_sorted_langs_in_doctype_instantiation():
224224
lifecycle=LifecycleFlag.supported,
225225
langs=langs,
226226
)
227-
assert dt1.langs == sorted([LanguageCode(lang) for lang in langs])
227+
assert dt1.langs == sorted([LanguageCode(language=lang) for lang in langs])
228228

229229

230230
@pytest.mark.parametrize(
@@ -280,4 +280,4 @@ def test_sorted_langs_in_doctype_instantiation():
280280
def test_xpath_in_doctype(string, xpath):
281281
"""Test the XPath extraction from a Doctype."""
282282
doctype = Doctype.from_str(string)
283-
assert xpath == doctype.xpath()
283+
assert xpath == doctype.xpath()

0 commit comments

Comments
 (0)