openSUSE
diff --git a/‎changelog.d/140.refactor.rst‎
Lines changed: 1 addition & 0 deletions b/‎changelog.d/140.refactor.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎devel/activate-aliases.sh‎
Lines changed: 3 additions & 0 deletions b/‎devel/activate-aliases.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/docbuild/cli/cmd_metadata/metaprocess.py‎
Lines changed: 34 additions & 57 deletions b/‎src/docbuild/cli/cmd_metadata/metaprocess.py‎
Lines changed: 34 additions & 57 deletions
diff --git a/‎src/docbuild/models/doctype.py‎
Lines changed: 27 additions & 21 deletions b/‎src/docbuild/models/doctype.py‎
Lines changed: 27 additions & 21 deletions
diff --git a/‎src/docbuild/models/lifecycle.py‎
Lines changed: 49 additions & 10 deletions b/‎src/docbuild/models/lifecycle.py‎
Lines changed: 49 additions & 10 deletions
@@ -0,0 +1 @@
+Refactor JSON structure for robust metadata handling in ``docserv.cli.cmd_metadata.metaprocess:store_productdocset_json``. Introduce Pydantic :class:`~docbuild.models.manifest.Manifest` model to encapsulate document metadata, enhancing validation and serialization.
@@ -4,6 +4,9 @@
 # For testing:
 alias upytest="uv run --frozen pytest"
 
+# General Python command
+alias upython="uv run --frozen python"
+
 # For the interactive Python shell with the project's environment:
 alias uipython="uv run --frozen ipython --ipython-dir=.ipython"
 
 
@@ -9,12 +9,14 @@
 from typing import Any
 
 from lxml import etree
+from pydantic import ValidationError
 from rich.console import Console
 
 from ...config.xml.stitch import create_stitchfile
 from ...constants import DEFAULT_DELIVERABLES
 from ...models.deliverable import Deliverable
 from ...models.doctype import Doctype
+from ...models.manifest import Document, Manifest
 from ...utils.contextmgr import PersistentOnErrorTemporaryDirectory, edit_json
 from ...utils.git import ManagedGitRepo
 from ..context import DocBuildContext
@@ -338,76 +340,51 @@ def store_productdocset_json(
         # files: list[Path]
         product = doctype.product.value
         stdout.print(f" > Processed group: {doctype} / {docset}")
-        # TODO: This XPath should be done in the Doctype model
-        # For the time being, it doesn't add to the coverage
-        productxpath = "./product"
-        if product != "*":  # pragma: no cover
-            productxpath += f'[@productid="{product}"]'
-
+        # The XPath logic is encapsulated within the Doctype model
+        productxpath = f"./{doctype.product_xpath_segment()}"
         productnode = stitchnode.find(productxpath)
-        docsetxpath = "./docset"
-        if docset != "*":  # pragma: no cover
-            docsetxpath += f'[@setid="{docset}"]'
-
+        docsetxpath = f"./{doctype.docset_xpath_segment(docset)}"
         docsetnode = productnode.find(docsetxpath)
-        # TODO: end
-
-        # Create a new structure for each group of product/docset
-        structure = {
-            "productname": productnode.find("name").text,
-            "acronym": product,
-            "version": docset,
-            "lifecycle": docsetnode.attrib.get("lifecycle"),
-            "hide-productname": False,
-            "descriptions": [
-                # TODO
-                # { "lang", "...",
-                #   "default": True|False,
-                #   "description": "..."
-                # }
-            ],
-            "categories": [
-                # TODO
-                # {
-                #  "categoryId": "...",
-                #  "rank": INT,
-                #  "translations": [
-                #    {
-                #      "lang", "...",
-                #      "default": True|False,
-                #      "title": "..."
-                #    }
-                #  ]
-            ],
-            "documents": [],  # Will be filled below
-            "archives": [
-                # TODO
-                # {
-                #   "lang": "...",
-                #   "default": True|False,
-                #   "zip": "LANG/PRODUCT/DOCSET/PRODUCT-DOCSET-LANG.zip",
-                # }
-            ],
-        }
+
+        manifest = Manifest(
+            productname=productnode.find("name").text,
+            acronym=product,
+            version=docset,
+            lifecycle=docsetnode.attrib.get("lifecycle") or "",
+            # * hide-productname is False by default in the Manifest model
+            # * descriptions, categories, archives are empty lists by default
+        )
+
         for f in files:
             stdout.print(f" {f}")
             try:
-                with (meta_cache_dir / f).open() as fh:
-                    doc = json.load(fh)
-                if not doc:
-                    console_err.print(f"Warning: Empty metadata file {f}")
+                with (meta_cache_dir / f).open(encoding="utf-8") as fh:
+                    loaded_doc_data = json.load(fh)
+                if not loaded_doc_data:
+                    log.warning("Empty metadata file %s", f)
                     continue
+                doc_model = Document.model_validate(loaded_doc_data)
+                manifest.documents.append(doc_model)
+
+            except json.JSONDecodeError as e:
+                log.error("Error decoding metadata file %s: %s", f, e)
+                continue
 
-                structure["documents"].extend(doc.get("docs", []))
+            except ValidationError as e:
+                log.error("Error validating metadata file %s: %s", f, e)
+                continue
 
             except Exception as e:
-                console_err.print(f"Error reading metadata file {f}: {e}")
+                log.error("Error reading metadata file %s: %s", f, e)
+                continue
 
         # stdout.print(json.dumps(structure, indent=2), markup=True)
         jsondir = meta_cache_dir / product
         jsondir.mkdir(parents=True, exist_ok=True)
-        jsonfile = jsondir / f"{docset}.json"
-        jsonfile.write_text(json.dumps(structure, indent=2))
+        jsonfile = (
+            jsondir / f"{docset}.json"
+        )  # e.g., /path/to/cache/product_id/docset_id.json
+        jsonfile.write_text(manifest.model_dump_json(indent=2, by_alias=True))
         log.info(
             "Wrote merged metadata JSON for %s/%s => %s", product, docset, jsonfile
         )
 
@@ -100,7 +100,7 @@ class Doctype(BaseModel):
     )
 
     # dunder methods
-    def __eq__(self, other: object) -> bool:
+    def __eq__(self: Self, other: object) -> bool:
         """Check equality with another Doctype, ignoring order in docset/langs."""
         if not isinstance(other, Doctype):
             return NotImplemented
@@ -112,7 +112,7 @@ def __eq__(self, other: object) -> bool:
             and set(self.langs) == set(other.langs)
         )
 
-    def __lt__(self, other: object) -> bool:
+    def __lt__(self: Self, other: object) -> bool:
         """Check if this Doctype is less than another Doctype."""
         if not isinstance(other, Doctype):
             return NotImplemented
@@ -125,13 +125,13 @@ def __lt__(self, other: object) -> bool:
             self.langs,  # we rely on sorted languages
         ) < (other.product, other.lifecycle, other.docset, other.langs)
 
-    def __str__(self) -> str:
+    def __str__(self: Self) -> str:
         """Implement str(self)."""
         langs_str = ",".join(lang.language for lang in self.langs)
         docset_str = ",".join(self.docset)
         return f"{self.product.value}/{docset_str}@{self.lifecycle.name}/{langs_str}"
 
-    def __repr__(self) -> str:
+    def __repr__(self: Self) -> str:
         """Implement repr(self)."""
         langs_str = ",".join(lang.language for lang in self.langs)
         docset_str = ",".join(self.docset)
@@ -143,7 +143,7 @@ def __repr__(self) -> str:
             f")"
         )
 
-    def __contains__(self, other: "Doctype") -> bool:
+    def __contains__(self: Self, other: "Doctype") -> bool:
         """Return if bool(other in self).
 
         Every part of a Doctype is compared element-wise.
@@ -160,7 +160,7 @@ def __contains__(self, other: "Doctype") -> bool:
             ],
         )
 
-    def __hash__(self) -> int:
+    def __hash__(self: Self) -> int:
         """Implement hash(self)."""
         return hash(
             (
@@ -183,21 +183,9 @@ def coerce_docset(cls, value: str | list[str]) -> list[str]:
         """Convert a string into a list."""
         return sorted(value.split(",")) if isinstance(value, str) else sorted(value)
 
-    @field_validator("lifecycle", mode="before")
-    @classmethod
-    def coerce_lifecycle(cls, value: str | LifecycleFlag) -> LifecycleFlag:
-        """Convert a string into a LifecycleFlag."""
-        # value = "" if value is None else value
-        if isinstance(value, str):
-            # Delegate it to the LifecycleFlag to deal with
-            # the correct parsing and validation
-            lifecycles = LifecycleFlag.from_str(value)
-            return lifecycles
-        return LifecycleFlag(value)
-
     @field_validator("langs", mode="before")
     @classmethod
-    def coerce_langs(cls, value: str | list[str | LanguageCode]) -> list[LanguageCode]:
+    def coerce_langs(cls: type["Doctype"], value: str | list[str | LanguageCode]) -> list[LanguageCode]:
         """Convert a comma-separated string or a list of strings into LanguageCode."""
         # Allow list of strings or Language enums
         if isinstance(value, str):
@@ -210,7 +198,7 @@ def coerce_langs(cls, value: str | list[str | LanguageCode]) -> list[LanguageCod
         )
 
     @classmethod
-    def from_str(cls, doctype_str: str) -> Self:
+    def from_str(cls: type["Doctype"], doctype_str: str) -> Self:
         """Parse a string that adheres to the doctype format."""
         match = cls._DOCTYPE_REGEX.match(doctype_str)
 
@@ -229,7 +217,7 @@ def from_str(cls, doctype_str: str) -> Self:
             langs=langs,
         )
 
-    def xpath(self) -> str:
+    def xpath(self: Self) -> str:
         """Return an XPath expression for this Doctype to find all deliverables.
 
         >>> result = Doctype.from_str("sles/15-SP6@supported/en-us,de-de").xpath()
@@ -274,3 +262,21 @@ def xpath(self) -> str:
             language = f"language[{language}]"
 
         return f"{product}/{docset}/builddocs/{language}"
+
+    def product_xpath_segment(self: Self) -> str:
+        """Return the XPath segment for the product node.
+
+        Example: "product[@productid='sles']" or "product"
+        """
+        if self.product != Product.ALL:
+            return f"product[@productid={self.product.value!r}]"
+        return "product"
+
+    def docset_xpath_segment(self: Self, docset: str) -> str:
+        """Return the XPath segment for the docset node.
+
+        Example: "docset[@setid='15-SP6']" or "docset"
+        """
+        if docset != "*":
+            return f"docset[@setid={docset!r}]"
+        return "docset"
@@ -25,16 +25,19 @@ class LifecycleFlag(Flag):
     unsupported = auto()
     """Unsupported lifecycle state."""
 
-    # NOTE: Putting a compiled regex (or other helper) as a class
-    # variable on an Enum/Flag is error-prone:
-    # the Enum metaclass treats class attributes specially and may
-    # convert them into members or otherwise interfere.
-    #
-    # Solution: This class variable will be attached after class creation.
-    # _SEPARATOR = re.compile(r'[|,]')  # Static class variable
+    @classmethod
+    def _missing_(cls: type[Self], value: object) -> Self | None:
+        """Handle missing values by attempting to parse strings."""
+        if isinstance(value, str):
+            try:
+                return cls.from_str(value)
+            except ValueError:
+                # Let the default error handling take over for invalid strings
+                return None
+        return super()._missing_(value)
 
     @classmethod
-    def from_str(cls: "LifecycleFlag", value: str) -> "LifecycleFlag":
+    def from_str(cls: type[Self], value: str) -> Self:
         """Convert a string to a LifecycleFlag object.
 
         The string accepts the values 'supported', 'beta', 'hidden',
@@ -53,7 +56,7 @@ def from_str(cls: "LifecycleFlag", value: str) -> "LifecycleFlag":
         <LifecycleFlag.unknown: 0>
 
         """
-        separator = cls._SEPARATOR  # will exist after we attach it
+        separator = cls._SEPARATOR  # type: ignore[attr-defined]
         try:
             flag = cls(0)  # Start with an empty flag
             parts = [v.strip() for v in separator.split(value) if v.strip()]
@@ -92,4 +95,40 @@ def __contains__(self: Self, other: str | Flag) -> bool:
 
 
 # attach after class creation so EnumMeta doesn't touch it
-LifecycleFlag._SEPARATOR: ClassVar[re.Pattern] = re.compile(r"[|,]")
+# NOTE: Putting a compiled regex (or other helper) as a class
+# variable on an Enum/Flag is error-prone:
+# the Enum metaclass treats class attributes specially and may convert them into members.
+LifecycleFlag._SEPARATOR: ClassVar[re.Pattern] = re.compile(r"[|,]")  # type: ignore[attr-defined]
+
+if __name__ == "__main__":  # pragma: nocover
+    from rich import print  # noqa: A004
+
+    # Example usage
+    lc = LifecycleFlag("supported,beta")
+    print(f"{lc=}")
+    print(f"{lc.name=}")
+    print(f"{lc.value=}")
+    print(f"{lc.supported=}")
+    print(f"{lc.beta=}")
+
+    print("=" * 20)
+
+    lc = LifecycleFlag.from_str("supported,beta")
+    print(f"{lc=}")
+    print(f"{lc.name=}")
+    print(f"{lc.value=}")
+    print(f"{lc.supported=}")
+    print(f"{lc.beta=}")
+
+    print("=" * 20)
+
+    lc = LifecycleFlag.from_str("supported|beta")
+    print(f"{lc=}")
+    print(f"{lc.name=}")
+    print(f"{lc.value=}")
+    print(f"{lc.supported=}")
+    print(f"{lc.beta=}")
+
+    print("=" * 20)
+    print(f"{dir(lc)=}")
+    # print(f"{lc.foo=}")
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+Refactor JSON structure for robust metadata handling in ``docserv.cli.cmd_metadata.metaprocess:store_productdocset_json``. Introduce Pydantic :class:`~docbuild.models.manifest.Manifest` model to encapsulate document metadata, enhancing validation and serialization.