Skip to content

Commit 87e63ee

Browse files
authored
Merge pull request #136 from openSUSE/toms/complexity-process_file
Reduce complexity of `process_file`
2 parents c4aecc7 + 3220a82 commit 87e63ee

8 files changed

Lines changed: 183 additions & 64 deletions

File tree

changelog.d/135.refactor.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Reduce complexity of :func:`~docbuild.cli.cmd_validate.process.process_file`. Split into smaller functions.

src/docbuild/cli/cmd_validate/process.py

Lines changed: 91 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import asyncio
44
from collections.abc import Iterator
5+
from dataclasses import dataclass
56
from datetime import date
67
import logging
78
from pathlib import Path
@@ -215,6 +216,85 @@ async def run_python_checks(
215216
return check_results
216217

217218

219+
@dataclass
220+
class ValidationResult:
221+
"""Normalized result of RNG validation.
222+
223+
:ivar success: True when validation passed.
224+
:ivar exit_code: Exit code to return when validation fails (0 for success).
225+
:ivar message: Optional human-readable message describing the failure.
226+
"""
227+
228+
success: bool
229+
exit_code: int
230+
message: str = ""
231+
232+
233+
def build_shortname(filepath: Path | str) -> str:
234+
"""Return a shortened display name for ``filepath``.
235+
236+
:param filepath: Path-like object to shorten.
237+
:returns: Shortened display name (last two path components or full path).
238+
"""
239+
path_obj = Path(filepath)
240+
return "/".join(path_obj.parts[-2:]) if len(path_obj.parts) >= 2 else str(filepath)
241+
242+
243+
async def run_validation(filepath: Path | str, method: str) -> ValidationResult:
244+
"""Run RNG validation using the selected method and normalize result.
245+
246+
:param filepath: Path to the XML file to validate.
247+
:param method: Validation method name ("jing" or "lxml").
248+
:returns: A :class:`ValidationResult` describing the outcome.
249+
"""
250+
path_obj = Path(filepath)
251+
if method == "lxml":
252+
rng_success, rng_output = await asyncio.to_thread(
253+
validate_rng_lxml, path_obj, XMLDATADIR / "product-config-schema.rng"
254+
)
255+
if rng_success:
256+
return ValidationResult(True, 0, "")
257+
return ValidationResult(False, 10, rng_output or "")
258+
259+
if method == "jing":
260+
jing_result = await validate_rng(path_obj, idcheck=True)
261+
if jing_result.returncode != 0:
262+
output = (jing_result.stdout or "") + (jing_result.stderr or "")
263+
return ValidationResult(False, 10, output.strip())
264+
return ValidationResult(True, 0, "")
265+
266+
return ValidationResult(False, 11, f"Unknown validation method: {method}")
267+
268+
269+
async def parse_tree(filepath: Path | str) -> etree._ElementTree:
270+
"""Parse XML file using lxml in a background thread.
271+
272+
Exceptions from :func:`lxml.etree.parse` (for example
273+
:class:`lxml.etree.XMLSyntaxError`) are propagated to the caller.
274+
275+
:param filepath: Path to the XML file to parse.
276+
:returns: Parsed :class:`lxml.etree._ElementTree`.
277+
"""
278+
return await asyncio.to_thread(etree.parse, str(filepath), parser=None)
279+
280+
281+
async def run_checks_and_display(
282+
tree: etree._ElementTree, shortname: str, context: DocBuildContext, max_len: int
283+
) -> bool:
284+
"""Execute registered Python checks and print formatted results.
285+
286+
:param tree: Parsed XML tree to check.
287+
:param shortname: Short name used for display output.
288+
:param context: :class:`DocBuildContext` used to read verbosity.
289+
:param max_len: Maximum length used for aligned output.
290+
:returns: True when all checks succeeded (or when no checks are registered).
291+
"""
292+
check_results = await run_python_checks(tree)
293+
if check_results:
294+
display_results(shortname, check_results, context.verbose, max_len)
295+
return all(result.success for _, result in check_results)
296+
297+
218298
async def process_file(
219299
filepath: Path | str,
220300
context: DocBuildContext,
@@ -228,63 +308,23 @@ async def process_file(
228308
:param rng_schema_path: Optional path to an RNG schema for validation.
229309
:return: An exit code (0 for success, non-zero for failure).
230310
"""
231-
# Shorten the filename to last two parts for display
232-
path_obj = Path(filepath)
233-
shortname = (
234-
"/".join(path_obj.parts[-2:]) if len(path_obj.parts) >= 2 else str(filepath)
235-
)
236-
237-
# IDEA: Should we replace jing and validate with etree.RelaxNG?
238-
#
239-
# 1. RNG Validation
240-
validation_method = context.validation_method
311+
shortname = build_shortname(filepath)
241312

242-
if validation_method == "lxml":
243-
# Use lxml-based validator (requires .rng schema)
244-
rng_success, rng_output = await asyncio.to_thread(
245-
validate_rng_lxml,
246-
path_obj,
247-
XMLDATADIR / "product-config-schema.rng",
248-
)
249-
elif validation_method == "jing":
250-
# Use existing jing-based validator (.rnc or .rng)
251-
jing_result = await validate_rng(path_obj, idcheck=True)
252-
else:
313+
# 1. RNG Validation (normalized)
314+
validation = await run_validation(filepath, context.validation_method)
315+
if not validation.success:
253316
console_err.print(
254317
f"{shortname:<{max_len}}: RNG validation => [red]failed[/red]"
255318
)
256-
console_err.print(
257-
f" [bold red]Error:[/] Unknown validation method: {validation_method}"
258-
)
259-
return 11 # Custom error code for unknown validation method
319+
if validation.message:
320+
console_err.print(f" [bold red]Error:[/] {validation.message}")
321+
return validation.exit_code
260322

261-
# Handle validation result for jing
262-
if validation_method == "jing":
263-
if jing_result.returncode != 0:
264-
console_err.print(
265-
f"{shortname:<{max_len}}: RNG validation => [red]failed[/red]"
266-
)
267-
output = (jing_result.stdout or "") + (jing_result.stderr or "")
268-
if output:
269-
console_err.print(f" [bold red]Error:[/] {output.strip()}")
270-
return 10 # Specific error code for RNG failure
271-
272-
# Handle validation result for lxml
273-
if validation_method == "lxml":
274-
if not rng_success:
275-
console_err.print(
276-
f"{shortname:<{max_len}}: RNG validation => [red]failed[/red]"
277-
)
278-
if rng_output:
279-
console_err.print(f" [bold red]Error:[/] {rng_output}")
280-
return 10
281-
282-
# 2. Python-based checks
323+
# 2. Parse XML and run Python checks
283324
try:
284-
tree = await asyncio.to_thread(etree.parse, str(filepath), parser=None)
325+
tree = await parse_tree(filepath)
285326

286327
except etree.XMLSyntaxError as err:
287-
# This can happen if xmllint passes but lxml's parser is stricter.
288328
console_err.print(
289329
f"{shortname:<{max_len}}: XML Syntax Error => [red]failed[/red]"
290330
)
@@ -295,14 +335,8 @@ async def process_file(
295335
console_err.print(f" [bold red]Error:[/] {err}")
296336
return 200
297337

298-
# Run all checks for this file
299-
check_results = await run_python_checks(tree)
300-
301-
if check_results:
302-
# Display results based on verbosity level
303-
display_results(shortname, check_results, context.verbose, max_len)
304-
305-
return 0 if all(result.success for _, result in check_results) else 1
338+
success = await run_checks_and_display(tree, shortname, context, max_len)
339+
return 0 if success else 1
306340

307341

308342
async def process(

tests/cli/cmd_repo/test_cmd_clone.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for the 'docbuild repo clone' command."""
22

33
import logging
4+
import re
45
from unittest.mock import AsyncMock
56

67
import pytest
@@ -112,15 +113,16 @@ async def test_process_stitchnode_none(monkeypatch, tmp_path):
112113
(tmp_path / "config").mkdir()
113114
(tmp_path / "repos").mkdir()
114115

115-
with pytest.raises(ValueError, match="Stitch node could not be created."):
116+
with pytest.raises(ValueError,
117+
match=re.escape("Stitch node could not be created.")):
116118
await mod_process.process(context, repos=())
117119

118120

119121
async def test_process_configdir_none():
120122
context = DocBuildContext(envconfig={"paths": {}})
121123
with pytest.raises(
122124
ValueError,
123-
match="Could not get a value from envconfig.paths.config_dir",
125+
match=re.escape("Could not get a value from envconfig.paths.config_dir"),
124126
):
125127
await mod_process.process(context, repos=())
126128

@@ -129,6 +131,6 @@ async def test_process_repodir_none():
129131
context = DocBuildContext(envconfig={"paths": {"config_dir": "/dummy/config"}})
130132
with pytest.raises(
131133
ValueError,
132-
match="Could not get a value from envconfig.paths.repo_dir",
134+
match=re.escape("Could not get a value from envconfig.paths.repo_dir"),
133135
):
134136
await mod_process.process(context, repos=())

tests/cli/cmd_validate/test_process.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for the XML validation process module."""
22

33
from pathlib import Path
4+
import re
45
from subprocess import CompletedProcess
56
from unittest.mock import AsyncMock, MagicMock, patch
67

@@ -58,7 +59,8 @@ async def test_process_file_with_generic_parsing_error(
5859

5960
async def test_process_no_envconfig(mock_context):
6061
mock_context.envconfig = None
61-
with pytest.raises(ValueError, match="No envconfig found in context."):
62+
with pytest.raises(ValueError,
63+
match=re.escape("No envconfig found in context.")):
6264
await process(mock_context, xmlfiles=(Path("dummy.xml"),))
6365

6466

tests/cli/cmd_validate/validate/test_cmd_validate.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from collections.abc import Iterator
44
from os import PathLike
55
from pathlib import Path
6+
import re
67
from subprocess import CompletedProcess
78
import tempfile
89
from unittest.mock import AsyncMock, Mock, patch
@@ -123,7 +124,8 @@ async def test_process_invalid_paths_config(self):
123124
context = Mock(spec=DocBuildContext)
124125
context.envconfig = {"paths": "not_a_dict"}
125126

126-
with pytest.raises(ValueError, match="'paths.config' must be a dictionary"):
127+
with pytest.raises(ValueError,
128+
match=re.escape("'paths.config' must be a dictionary")):
127129
await process_mod.process(context, [])
128130

129131
async def test_process_with_no_xml_files(self, mock_context, caplog):

tests/cli/cmd_validate/validate/test_process_validation.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,3 +223,79 @@ async def test_process_file_with_xmlsyntax_error(capsys, tmp_path):
223223
result = await process_mod.process_file(xmlfile, mock_context, 40)
224224

225225
assert result == 20
226+
227+
228+
def test_validate_rng_lxml_success(monkeypatch):
229+
"""When RelaxNG validates the XML, function returns (True, '')."""
230+
fake_xml_doc = object()
231+
232+
# parse called twice: first for schema, second for xml file
233+
calls = {"n": 0}
234+
235+
def fake_parse(path):
236+
calls["n"] += 1
237+
return object() if calls["n"] == 1 else fake_xml_doc
238+
239+
monkeypatch.setattr(process_mod.etree, "parse", fake_parse)
240+
241+
fake_relaxng = Mock()
242+
fake_relaxng.validate.return_value = True
243+
monkeypatch.setattr(process_mod.etree, "RelaxNG", lambda doc: fake_relaxng)
244+
245+
ok, msg = process_mod.validate_rng_lxml(Path("/tmp/f.xml"), Path("/tmp/schema.rng"))
246+
assert ok is True
247+
assert msg == ""
248+
249+
250+
def test_validate_rng_lxml_validation_failure(monkeypatch):
251+
"""When RelaxNG.validate() returns False, function returns (False, error_log)."""
252+
monkeypatch.setattr(process_mod.etree, "parse", lambda p: object())
253+
fake_relaxng = Mock()
254+
fake_relaxng.validate.return_value = False
255+
fake_relaxng.error_log = "some relaxng errors"
256+
monkeypatch.setattr(process_mod.etree, "RelaxNG", lambda doc: fake_relaxng)
257+
258+
ok, msg = process_mod.validate_rng_lxml(Path("/tmp/f.xml"), Path("/tmp/schema.rng"))
259+
assert ok is False
260+
assert "some relaxng errors" in msg
261+
262+
263+
def test_validate_rng_lxml_xml_syntax_error(monkeypatch):
264+
"""If lxml raises XMLSyntaxError, function returns a descriptive message."""
265+
266+
def raise_syntax(path):
267+
raise process_mod.etree.XMLSyntaxError("bad xml", None, 0, 0, "file")
268+
269+
monkeypatch.setattr(process_mod.etree, "parse", raise_syntax)
270+
271+
ok, msg = process_mod.validate_rng_lxml(Path("/tmp/f.xml"), Path("/tmp/schema.rng"))
272+
assert ok is False
273+
assert "XML or RNG syntax error" in msg
274+
275+
276+
def test_validate_rng_lxml_relaxng_parse_error(monkeypatch):
277+
"""If RelaxNG constructor raises RelaxNGParseError, return an error message."""
278+
monkeypatch.setattr(process_mod.etree, "parse", lambda p: object())
279+
280+
def raise_relaxng(doc):
281+
raise process_mod.etree.RelaxNGParseError("schema parse failure")
282+
283+
monkeypatch.setattr(process_mod.etree, "RelaxNG", raise_relaxng)
284+
285+
ok, msg = process_mod.validate_rng_lxml(Path("/tmp/f.xml"), Path("/tmp/schema.rng"))
286+
assert ok is False
287+
assert "RELAX NG schema parsing error" in msg
288+
289+
290+
def test_validate_rng_lxml_generic_exception(monkeypatch):
291+
"""Any other exception is caught and reported as an unexpected error."""
292+
monkeypatch.setattr(process_mod.etree, "parse", lambda p: object())
293+
294+
def raise_generic(doc):
295+
raise Exception("boom")
296+
297+
monkeypatch.setattr(process_mod.etree, "RelaxNG", raise_generic)
298+
299+
ok, msg = process_mod.validate_rng_lxml(Path("/tmp/f.xml"), Path("/tmp/schema.rng"))
300+
assert ok is False
301+
assert "An unexpected error occurred during validation" in msg

tests/models/test_language.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def test_compare_with_one_language_and_with_different_object():
7373
def test_language_code_is_frozen():
7474
lang = LanguageCode(language="en-us")
7575

76-
with pytest.raises(ValidationError, match=".*frozen_instance.*"):
76+
with pytest.raises(ValidationError,
77+
match=r".*frozen_instance.*"):
7778
lang.language = "de-de"
7879

7980

tests/utils/test_pidlock.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from multiprocessing import Event
77
from pathlib import Path
88
import platform
9+
import re
910
import time
1011
from unittest.mock import Mock, patch
1112

@@ -143,7 +144,7 @@ def test_lock_is_reentrant_per_process(lock_setup):
143144
# (internal API misuse)
144145
with pytest.raises(
145146
RuntimeError,
146-
match="Lock already acquired by this PidFileLock instance."
147+
match=re.escape("Lock already acquired by this PidFileLock instance.")
147148
):
148149
with lock2:
149150
pass

0 commit comments

Comments
 (0)