Skip to content

Commit 0ab85cc

Browse files
Integrate <desc> and <category> for JSON (#155)
* Add serialization of missing Description The `<desc>` element in the XML configuration contains language-specific descriptions. It is used in the index pages of each product release. This PR adds the missing handling of this element. * Build `Description` model for `Manifest` * In `Deliverable`, create new methods (see below). They transform the `<desc>` element * `all_categories`: Return the categories of the deliverable. * `categories`: Return the categories from the product node. * `categories_from_root`: Return the categories from the root node. * For submodels of `Manifest`, add serialization method (e.g., for `lang` attribute) * Improve test cases * Update doc sources of auto API * Make `Category.rank` set automatically * Add translations to `Category.translations` --------- Co-authored-by: Sushant Gaurav <[email protected]>
1 parent 150bfc4 commit 0ab85cc

11 files changed

Lines changed: 390 additions & 9 deletions

File tree

changelog.d/155.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add serialization of missing :class:`~docserv.models.deliverable.Description`.

docs/source/reference/_autoapi/docbuild/models/deliverable/Deliverable.rst

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,50 @@ docbuild.models.deliverable.Deliverable
1111
deliverable.
1212

1313

14+
.. py:property:: all_categories
15+
:type: collections.abc.Generator[lxml.etree._Element, None, None]
16+
17+
18+
Return the groups (formerly categories) of the deliverable.
19+
20+
Yield all elements from the product and root node.
21+
22+
:yield: The elements categories/category and category.
23+
24+
25+
26+
.. py:property:: categories
27+
:type: collections.abc.Generator[lxml.etree._Element, None, None]
28+
29+
30+
Return the groups (formerly categories) from the product node.
31+
32+
Yield all elements under the product were this deliverable belongs to.
33+
34+
:yield: The elements categories/category and category.
35+
36+
37+
38+
.. py:property:: categories_from_root
39+
:type: collections.abc.Generator[lxml.etree._Element, None, None]
40+
41+
42+
Return the groups (formerly categories) from the root node.
43+
44+
Yield all elements under the root node.
45+
46+
:yield: The elements categories/category and category.
47+
48+
49+
50+
.. py:property:: desc
51+
:type: collections.abc.Generator[lxml.etree._Element, None, None]
52+
53+
54+
Return the <desc> from the product node.
55+
56+
57+
1458
.. py:property:: productid
1559
:type: str | None
1660

docs/source/reference/_autoapi/docbuild/models/manifest/Archive.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,9 @@ docbuild.models.manifest.Archive
1919
"zip": "/en-us/sles/16.0/sles-16.0-en-us.zip"
2020
}
2121
22+
23+
.. py:method:: serialize_lang(value: docbuild.models.language.LanguageCode, info: pydantic.SerializationInfo) -> str
24+
25+
Serialize LanguageCode to a string like 'en-us'.
26+
27+

docs/source/reference/_autoapi/docbuild/models/manifest/Category.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,14 @@ docbuild.models.manifest.Category
2525
]
2626
}
2727
28+
29+
.. py:method:: from_xml_node(node: lxml.etree._Element) -> collections.abc.Generator[Self, None, None]
30+
:classmethod:
31+
32+
33+
Extract categories from a parent XML node.
34+
35+
:param node: a node pointing to ``<product>``.
36+
:yield: A :class:`Category` instance for each category found.
37+
38+

docs/source/reference/_autoapi/docbuild/models/manifest/CategoryTranslation.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,9 @@ docbuild.models.manifest.CategoryTranslation
1919
"title": "About"
2020
}
2121
22+
23+
.. py:method:: serialize_lang(value: docbuild.models.language.LanguageCode, info: pydantic.SerializationInfo) -> str
24+
25+
Serialize LanguageCode to a string like 'en-us'.
26+
27+

docs/source/reference/_autoapi/docbuild/models/manifest/Description.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,20 @@ docbuild.models.manifest.Description
1919
"description": "<p>The English description for a product.</p>"
2020
}
2121
22+
23+
.. py:method:: serialize_lang(value: docbuild.models.language.LanguageCode, info: pydantic.SerializationInfo) -> str
24+
25+
Serialize LanguageCode to a string like 'en-us'.
26+
27+
28+
29+
.. py:method:: from_xml_node(node: lxml.etree._Element) -> collections.abc.Generator[Self, None, None]
30+
:classmethod:
31+
32+
33+
Extract descriptions from a parent XML node.
34+
35+
:param node: a node pointing to ``<product>``
36+
:yield:
37+
38+

src/docbuild/cli/cmd_metadata/metaprocess.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from ...constants import DEFAULT_DELIVERABLES
1717
from ...models.deliverable import Deliverable
1818
from ...models.doctype import Doctype
19-
from ...models.manifest import Document, Manifest
19+
from ...models.manifest import Category, Description, Document, Manifest
2020
from ...utils.contextmgr import PersistentOnErrorTemporaryDirectory, edit_json
2121
from ...utils.git import ManagedGitRepo
2222
from ..context import DocBuildContext
@@ -338,25 +338,31 @@ def store_productdocset_json(
338338

339339
for doctype, docset, files in collect_files_flat(doctypes, meta_cache_dir):
340340
# files: list[Path]
341+
# TODO: Create a Deliverable object?
341342
product = doctype.product.value
342-
stdout.print(f" > Processed group: {doctype} / {docset}")
343+
stdout.print(f" Processed group: {doctype} / {docset}")
343344
# The XPath logic is encapsulated within the Doctype model
344345
productxpath = f"./{doctype.product_xpath_segment()}"
345346
productnode = stitchnode.find(productxpath)
346347
docsetxpath = f"./{doctype.docset_xpath_segment(docset)}"
347348
docsetnode = productnode.find(docsetxpath)
349+
descriptions = Description.from_xml_node(productnode)
350+
351+
categories = Category.from_xml_node(productnode)
348352

349353
manifest = Manifest(
350354
productname=productnode.find("name").text,
351355
acronym=product,
352356
version=docset,
353357
lifecycle=docsetnode.attrib.get("lifecycle") or "",
358+
descriptions=descriptions,
359+
categories=categories,
354360
# * hide-productname is False by default in the Manifest model
355-
# * descriptions, categories, archives are empty lists by default
361+
# * archives are empty lists by default
356362
)
357363

358364
for f in files:
359-
stdout.print(f" {f}")
365+
stdout.print(f" | {f.stem}")
360366
try:
361367
with (meta_cache_dir / f).open(encoding="utf-8") as fh:
362368
loaded_doc_data = json.load(fh)
@@ -384,10 +390,16 @@ def store_productdocset_json(
384390
jsonfile = (
385391
jsondir / f"{docset}.json"
386392
) # e.g., /path/to/cache/product_id/docset_id.json
393+
394+
#
387395
jsonfile.write_text(manifest.model_dump_json(indent=2, by_alias=True))
388396
log.info(
389397
"Wrote merged metadata JSON for %s/%s => %s", product, docset, jsonfile
390398
)
399+
stdout.print(f" > Result: {jsonfile}")
400+
# The Category model handles the ranking logic internally,
401+
# so we need to reset the rank before processing a new product.
402+
Category.reset_rank()
391403

392404

393405
async def process(

src/docbuild/models/deliverable.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Module for defining the Deliverable model."""
22

3+
from collections.abc import Generator
34
from dataclasses import dataclass, field
45
from functools import cached_property
56
from pathlib import Path
@@ -28,6 +29,43 @@ class Deliverable:
2829
_product_node: etree._Element | None = field(repr=False, default=None)
2930
_meta: Metadata | None = None
3031

32+
@cached_property
33+
def all_categories(self) -> Generator[etree._Element, None, None]:
34+
"""Return the groups (formerly categories) of the deliverable.
35+
36+
Yield all elements from the product and root node.
37+
38+
:yield: The elements categories/category and category.
39+
"""
40+
yield from self.categories
41+
yield from self.categories_from_root
42+
43+
@cached_property
44+
def categories(self) -> Generator[etree._Element, None, None]:
45+
"""Return the groups (formerly categories) from the product node.
46+
47+
Yield all elements under the product were this deliverable belongs to.
48+
49+
:yield: The elements categories/category and category.
50+
"""
51+
yield from self.product_node.xpath("categories/category|category")
52+
53+
@cached_property
54+
def categories_from_root(self) -> Generator[etree._Element, None, None]:
55+
"""Return the groups (formerly categories) from the root node.
56+
57+
Yield all elements under the root node.
58+
59+
:yield: The elements categories/category and category.
60+
"""
61+
root = self.product_node.getroottree().getroot()
62+
yield from root.xpath("categories/category|category")
63+
64+
@cached_property
65+
def desc(self) -> Generator[etree._Element, None, None]:
66+
"""Return the <desc> from the product node."""
67+
yield from self.product_node.xpath("desc")
68+
3169
@cached_property
3270
def productid(self) -> str | None:
3371
"""Return the product ID."""

src/docbuild/models/manifest.py

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
"""Pydantic models for the metadata manifest structure."""
22

3+
from collections.abc import Generator
34
from datetime import date
4-
from typing import Self
5+
from typing import ClassVar, Self
56

7+
from lxml import etree
68
from pydantic import (
79
BaseModel,
810
Field,
911
SerializationInfo,
1012
field_serializer,
1113
field_validator,
14+
# model_validator,
1215
)
1316

1417
from ..models.language import LanguageCode
@@ -31,6 +34,36 @@ class Description(BaseModel):
3134
default: bool
3235
description: str
3336

37+
@field_serializer("lang")
38+
def serialize_lang(self: Self, value: LanguageCode, info: SerializationInfo) -> str:
39+
"""Serialize LanguageCode to a string like 'en-us'."""
40+
return str(value)
41+
42+
@classmethod
43+
def from_xml_node(
44+
cls: type[Self], node: etree._Element
45+
) -> Generator[Self, None, None]:
46+
"""Extract descriptions from a parent XML node.
47+
48+
:param node: a node pointing to ``<product>``
49+
:yield:
50+
"""
51+
for n in node.xpath("desc"):
52+
text = "".join(
53+
f"<{child.tag}>{
54+
' '.join(
55+
x.strip()
56+
for t in child.itertext()
57+
for x in t.splitlines()
58+
if x.strip()
59+
)
60+
}</{child.tag}>"
61+
for child in n.iterchildren()
62+
if child.tag != "title"
63+
)
64+
65+
yield cls(**{"default": False, **n.attrib}, description=text)
66+
3467

3568
class CategoryTranslation(BaseModel):
3669
"""Represents a translation for a category title.
@@ -45,9 +78,14 @@ class CategoryTranslation(BaseModel):
4578
"""
4679

4780
lang: LanguageCode
48-
default: bool
81+
default: bool = Field(default=False)
4982
title: str
5083

84+
@field_serializer("lang")
85+
def serialize_lang(self: Self, value: LanguageCode, info: SerializationInfo) -> str:
86+
"""Serialize LanguageCode to a string like 'en-us'."""
87+
return str(value)
88+
5189

5290
class Category(BaseModel):
5391
"""Represents a category for a product/docset.
@@ -67,9 +105,45 @@ class Category(BaseModel):
67105
}
68106
"""
69107

70-
id: str = Field(alias="categoryId")
108+
_current_rank: ClassVar[int] = 0
109+
110+
@staticmethod
111+
def _increment_rank() -> int:
112+
"""Increments the counter and returns the next value."""
113+
Category._current_rank += 1
114+
return Category._current_rank
115+
116+
id: str = Field(serialization_alias="categoryId")
117+
# Automatically called. Depends on the order of the XML element.
118+
rank: int = Field(default_factory=_increment_rank)
71119
translations: list[CategoryTranslation] = Field(default_factory=list)
72120

121+
@classmethod
122+
def reset_rank(cls: type[Self]) -> None:
123+
"""Reset the rank counter."""
124+
cls._current_rank = 0
125+
126+
@classmethod
127+
def from_xml_node(
128+
cls: type[Self], node: etree._Element
129+
) -> Generator[Self, None, None]:
130+
"""Extract categories from a parent XML node.
131+
132+
:param node: a node pointing to ``<product>``
133+
:yield: A :class:`Category` instance for each category found.
134+
"""
135+
for cat in node.xpath("category|categories/category"):
136+
langs = cat.xpath("language")
137+
translations = [
138+
CategoryTranslation(
139+
lang=lng.attrib.get("lang", "en-us"),
140+
default=lng.attrib.get("default", False),
141+
title=lng.attrib.get("title", ""),
142+
)
143+
for lng in langs
144+
]
145+
yield cls(id=cat.attrib.get("categoryid", ""), translations=translations)
146+
73147

74148
class Archive(BaseModel):
75149
"""Represents an archive (e.g., a ZIP file) for a product/docset.
@@ -87,6 +161,11 @@ class Archive(BaseModel):
87161
default: bool
88162
zip: str
89163

164+
@field_serializer("lang")
165+
def serialize_lang(self: Self, value: LanguageCode, info: SerializationInfo) -> str:
166+
"""Serialize LanguageCode to a string like 'en-us'."""
167+
return str(value)
168+
90169

91170
class DocumentFormat(BaseModel):
92171
"""Represents the available formats for a document.

0 commit comments

Comments
 (0)