Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/155.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add serialization of missing :class:`~docserv.models.deliverable.Description`.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,50 @@ docbuild.models.deliverable.Deliverable
deliverable.


.. py:property:: all_categories
:type: collections.abc.Generator[lxml.etree._Element, None, None]


Return the groups (formerly categories) of the deliverable.

Yield all elements from the product and root node.

:yield: The elements categories/category and category.



.. py:property:: categories
:type: collections.abc.Generator[lxml.etree._Element, None, None]


Return the groups (formerly categories) from the product node.

Yield all elements under the product were this deliverable belongs to.

:yield: The elements categories/category and category.



.. py:property:: categories_from_root
:type: collections.abc.Generator[lxml.etree._Element, None, None]


Return the groups (formerly categories) from the root node.

Yield all elements under the root node.

:yield: The elements categories/category and category.



.. py:property:: desc
:type: collections.abc.Generator[lxml.etree._Element, None, None]


Return the <desc> from the product node.



.. py:property:: productid
:type: str | None

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,9 @@ docbuild.models.manifest.Archive
"zip": "/en-us/sles/16.0/sles-16.0-en-us.zip"
}


.. py:method:: serialize_lang(value: docbuild.models.language.LanguageCode, info: pydantic.SerializationInfo) -> str

Serialize LanguageCode to a string like 'en-us'.


Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,14 @@ docbuild.models.manifest.Category
]
}


.. py:method:: from_xml_node(node: lxml.etree._Element) -> collections.abc.Generator[Self, None, None]
:classmethod:


Extract categories from a parent XML node.

:param node: a node pointing to ``<product>``.
:yield: A :class:`Category` instance for each category found.


Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,9 @@ docbuild.models.manifest.CategoryTranslation
"title": "About"
}


.. py:method:: serialize_lang(value: docbuild.models.language.LanguageCode, info: pydantic.SerializationInfo) -> str

Serialize LanguageCode to a string like 'en-us'.


Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,20 @@ docbuild.models.manifest.Description
"description": "<p>The English description for a product.</p>"
}


.. py:method:: serialize_lang(value: docbuild.models.language.LanguageCode, info: pydantic.SerializationInfo) -> str

Serialize LanguageCode to a string like 'en-us'.



.. py:method:: from_xml_node(node: lxml.etree._Element) -> collections.abc.Generator[Self, None, None]
:classmethod:


Extract descriptions from a parent XML node.

:param node: a node pointing to ``<product>``
:yield:


20 changes: 16 additions & 4 deletions src/docbuild/cli/cmd_metadata/metaprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ...constants import DEFAULT_DELIVERABLES
from ...models.deliverable import Deliverable
from ...models.doctype import Doctype
from ...models.manifest import Document, Manifest
from ...models.manifest import Category, Description, Document, Manifest
from ...utils.contextmgr import PersistentOnErrorTemporaryDirectory, edit_json
from ...utils.git import ManagedGitRepo
from ..context import DocBuildContext
Expand Down Expand Up @@ -338,25 +338,31 @@ def store_productdocset_json(

for doctype, docset, files in collect_files_flat(doctypes, meta_cache_dir):
# files: list[Path]
# TODO: Create a Deliverable object?
product = doctype.product.value
stdout.print(f" > Processed group: {doctype} / {docset}")
stdout.print(f" Processed group: {doctype} / {docset}")
# The XPath logic is encapsulated within the Doctype model
productxpath = f"./{doctype.product_xpath_segment()}"
productnode = stitchnode.find(productxpath)
docsetxpath = f"./{doctype.docset_xpath_segment(docset)}"
docsetnode = productnode.find(docsetxpath)
descriptions = Description.from_xml_node(productnode)

categories = Category.from_xml_node(productnode)

manifest = Manifest(
productname=productnode.find("name").text,
acronym=product,
version=docset,
lifecycle=docsetnode.attrib.get("lifecycle") or "",
descriptions=descriptions,
categories=categories,
# * hide-productname is False by default in the Manifest model
# * descriptions, categories, archives are empty lists by default
# * archives are empty lists by default
)

for f in files:
stdout.print(f" {f}")
stdout.print(f" | {f.stem}")
try:
with (meta_cache_dir / f).open(encoding="utf-8") as fh:
loaded_doc_data = json.load(fh)
Expand Down Expand Up @@ -384,10 +390,16 @@ def store_productdocset_json(
jsonfile = (
jsondir / f"{docset}.json"
) # e.g., /path/to/cache/product_id/docset_id.json

#
jsonfile.write_text(manifest.model_dump_json(indent=2, by_alias=True))
log.info(
"Wrote merged metadata JSON for %s/%s => %s", product, docset, jsonfile
)
stdout.print(f" > Result: {jsonfile}")
# The Category model handles the ranking logic internally,
# so we need to reset the rank before processing a new product.
Category.reset_rank()


async def process(
Expand Down
38 changes: 38 additions & 0 deletions src/docbuild/models/deliverable.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Module for defining the Deliverable model."""

from collections.abc import Generator
from dataclasses import dataclass, field
from functools import cached_property
from pathlib import Path
Expand Down Expand Up @@ -28,6 +29,43 @@ class Deliverable:
_product_node: etree._Element | None = field(repr=False, default=None)
_meta: Metadata | None = None

@cached_property
def all_categories(self) -> Generator[etree._Element, None, None]:
"""Return the groups (formerly categories) of the deliverable.

Yield all elements from the product and root node.

:yield: The elements categories/category and category.
"""
yield from self.categories
yield from self.categories_from_root

@cached_property
def categories(self) -> Generator[etree._Element, None, None]:
"""Return the groups (formerly categories) from the product node.

Yield all elements under the product were this deliverable belongs to.

:yield: The elements categories/category and category.
"""
yield from self.product_node.xpath("categories/category|category")

@cached_property
def categories_from_root(self) -> Generator[etree._Element, None, None]:
"""Return the groups (formerly categories) from the root node.

Yield all elements under the root node.

:yield: The elements categories/category and category.
"""
root = self.product_node.getroottree().getroot()
yield from root.xpath("categories/category|category")

@cached_property
def desc(self) -> Generator[etree._Element, None, None]:
"""Return the <desc> from the product node."""
yield from self.product_node.xpath("desc")

@cached_property
def productid(self) -> str | None:
"""Return the product ID."""
Expand Down
93 changes: 86 additions & 7 deletions src/docbuild/models/manifest.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""Pydantic models for the metadata manifest structure."""

from collections.abc import Generator, Iterable
from datetime import date
from typing import Self
from typing import ClassVar, Self

from lxml import etree
from pydantic import (
BaseModel,
Field,
SerializationInfo,
field_serializer,
field_validator,
# model_validator,
)

from ..models.language import LanguageCode
Expand All @@ -31,6 +34,36 @@ class Description(BaseModel):
default: bool
description: str

@field_serializer("lang")
def serialize_lang(self: Self, value: LanguageCode, info: SerializationInfo) -> str:
"""Serialize LanguageCode to a string like 'en-us'."""
return str(value)

@classmethod
def from_xml_node(
cls: type[Self], node: etree._Element
) -> Generator[Self, None, None]:
"""Extract descriptions from a parent XML node.

:param node: a node pointing to ``<product>``
:yield:
"""
for n in node.xpath("desc"):
text = "".join(
f"<{child.tag}>{
' '.join(
x.strip()
for t in child.itertext()
for x in t.splitlines()
if x.strip()
)
}</{child.tag}>"
for child in n.iterchildren()
if child.tag != "title"
)

yield cls(**{"default": False, **n.attrib}, description=text)


class CategoryTranslation(BaseModel):
"""Represents a translation for a category title.
Expand All @@ -45,9 +78,14 @@ class CategoryTranslation(BaseModel):
"""

lang: LanguageCode
default: bool
default: bool = Field(default=False)
title: str

@field_serializer("lang")
def serialize_lang(self: Self, value: LanguageCode, info: SerializationInfo) -> str:
"""Serialize LanguageCode to a string like 'en-us'."""
return str(value)


class Category(BaseModel):
"""Represents a category for a product/docset.
Expand All @@ -67,9 +105,45 @@ class Category(BaseModel):
}
"""

id: str = Field(alias="categoryId")
_current_rank: ClassVar[int] = 0

@staticmethod
def _increment_rank() -> int:
"""Increments the counter and returns the next value."""
Category._current_rank += 1
return Category._current_rank

id: str = Field(serialization_alias="categoryId")
# Automatically called. Depends on the order of the XML element.
rank: int = Field(default_factory=_increment_rank)
translations: list[CategoryTranslation] = Field(default_factory=list)

@classmethod
def reset_rank(cls: type[Self]) -> None:
"""Reset the rank counter."""
cls._current_rank = 0

@classmethod
def from_xml_node(
cls: type[Self], node: etree._Element
) -> Generator[Self, None, None]:
"""Extract categories from a parent XML node.

:param node: a node pointing to ``<product>``
:yield: A :class:`Category` instance for each category found.
"""
for cat in node.xpath("category|categories/category"):
langs = cat.xpath("language")
translations = [
CategoryTranslation(
lang=lng.attrib.get("lang", "en-us"),
default=lng.attrib.get("default", False),
title=lng.attrib.get("title", ""),
)
for lng in langs
]
yield cls(id=cat.attrib.get("categoryid", ""), translations=translations)


class Archive(BaseModel):
"""Represents an archive (e.g., a ZIP file) for a product/docset.
Expand All @@ -87,6 +161,11 @@ class Archive(BaseModel):
default: bool
zip: str

@field_serializer("lang")
def serialize_lang(self: Self, value: LanguageCode, info: SerializationInfo) -> str:
"""Serialize LanguageCode to a string like 'en-us'."""
return str(value)


class DocumentFormat(BaseModel):
"""Represents the available formats for a document.
Expand Down Expand Up @@ -224,10 +303,10 @@ class Manifest(BaseModel):
version: str
lifecycle: str | LifecycleFlag = Field(default=LifecycleFlag.unknown)
hide_productname: bool = Field(default=False, alias="hide-productname")
descriptions: list[Description] = Field(default_factory=list)
categories: list[Category] = Field(default_factory=list)
documents: list[Document] = Field(default_factory=list)
archives: list[Archive] = Field(default_factory=list)
descriptions: Iterable[Description] = Field(default_factory=list)
categories: Iterable[Category] = Field(default_factory=list)
documents: Iterable[Document] = Field(default_factory=list)
archives: Iterable[Archive] = Field(default_factory=list)


if __name__ == "__main__": # pragma: nocover
Expand Down
Loading