From 865f71496c3f5a834508a3e792d461b5a3b12130 Mon Sep 17 00:00:00 2001 From: Heitor Neiva Date: Wed, 20 May 2026 11:22:03 -0700 Subject: [PATCH 1/2] fix(schema): suppress noisy tracebacks on schema validation errors Schema violations are user-input/project errors, not taskgraph bugs, so the deep generator-chain traceback add no value in printing the traceback Introduce SchemaValidationError and have the kind loader and top-level CLI handler log just the message instead of printing a huge useless stacktrace. --- src/taskgraph/generator.py | 14 ++++++++++++++ src/taskgraph/main.py | 5 +++++ src/taskgraph/util/schema.py | 22 +++++++++++++++------- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/taskgraph/generator.py b/src/taskgraph/generator.py index 044069ae5..bc8f9c400 100644 --- a/src/taskgraph/generator.py +++ b/src/taskgraph/generator.py @@ -27,6 +27,7 @@ from .taskgraph import TaskGraph from .transforms.base import TransformConfig, TransformSequence from .util.python_path import find_object +from .util.schema import SchemaValidationError from .util.verify import verifications from .util.yaml import load_yaml @@ -303,6 +304,9 @@ def _load_tasks_serial(self, kinds, kind_graph, parameters): }, self._write_artifacts, ) + except SchemaValidationError as exc: + logger.error(f"Error loading tasks for kind {kind_name}:\n{exc}") + raise except Exception: logger.exception(f"Error loading tasks for kind {kind_name}:") raise @@ -358,6 +362,16 @@ def submit_ready_kinds(): for future in done: if exc := future.exception(): executor.shutdown(wait=False, cancel_futures=True) + kind_name = futures_to_kind.get(future, "?") + if isinstance(exc, SchemaValidationError): + logger.error( + f"Error loading tasks for kind {kind_name}:\n{exc}" + ) + else: + logger.error( + f"Error loading tasks for kind {kind_name}:", + exc_info=(type(exc), exc, exc.__traceback__), + ) raise exc kind = futures_to_kind.pop(future) futures.remove(future) diff --git a/src/taskgraph/main.py b/src/taskgraph/main.py index 5b72d9201..d1ab84e12 100644 --- a/src/taskgraph/main.py +++ b/src/taskgraph/main.py @@ -22,6 +22,8 @@ import appdirs import yaml +from taskgraph.util.schema import SchemaValidationError + Command = namedtuple("Command", ["func", "args", "kwargs", "defaults"]) commands = {} @@ -1207,6 +1209,9 @@ def main(args=sys.argv[1:]): args = parser.parse_args(args) try: return args.command(vars(args)) + except SchemaValidationError: + # Message was already logged; skip the traceback for user input errors. + sys.exit(1) except Exception: traceback.print_exc() sys.exit(1) diff --git a/src/taskgraph/util/schema.py b/src/taskgraph/util/schema.py index 3f9350668..a78e74b5a 100644 --- a/src/taskgraph/util/schema.py +++ b/src/taskgraph/util/schema.py @@ -23,6 +23,14 @@ ] +class SchemaValidationError(Exception): + """Raised when user-supplied data fails schema validation. + + Callers should display the message without a Python traceback, since + these reflect input errors rather than programmer bugs. + """ + + def validate_schema(schema, obj, msg_prefix): """ Validate that object satisfies schema. If not, generate a useful exception @@ -65,9 +73,13 @@ def validate_schema(schema, obj, msg_prefix): msg = [msg_prefix] for error in exc.errors: msg.append(str(error)) - raise Exception("\n".join(msg) + "\n" + pprint.pformat(obj)) + raise SchemaValidationError( + "\n".join(msg) + "\n" + pprint.pformat(obj) + ) from None else: - raise Exception(f"{msg_prefix}\n{str(exc)}\n{pprint.pformat(obj)}") + raise SchemaValidationError( + f"{msg_prefix}\n{str(exc)}\n{pprint.pformat(obj)}" + ) from None class OptionallyKeyedBy: @@ -456,11 +468,7 @@ def validate(cls, data): """Validate data against this schema.""" if taskgraph.fast: return data - - try: - msgspec.convert(data, cls) - except (msgspec.ValidationError, msgspec.DecodeError) as e: - raise msgspec.ValidationError(str(e)) + msgspec.convert(data, cls) class IndexSchema(Schema): From 6130b5792696e18668dce18938399ea5ef42f543 Mon Sep 17 00:00:00 2001 From: Heitor Neiva Date: Thu, 21 May 2026 08:43:19 -0700 Subject: [PATCH 2/2] add(tests): Add tests for schema validation errors --- test/test_generator.py | 41 ++++++++++++++++++++++++++++++++++++ test/test_main.py | 40 +++++++++++++++++++++++++++++++++++ test/test_util_schema.py | 45 +++++++++++++++++++++++++++++++++++----- 3 files changed, 121 insertions(+), 5 deletions(-) diff --git a/test/test_generator.py b/test/test_generator.py index 0353295ab..2024f6178 100644 --- a/test/test_generator.py +++ b/test/test_generator.py @@ -13,6 +13,7 @@ from taskgraph import generator, graph from taskgraph.generator import Kind, load_tasks_for_kind, load_tasks_for_kinds from taskgraph.loader.default import loader as default_loader +from taskgraph.util.schema import SchemaValidationError linuxonly = pytest.mark.skipif( platform.system() != "Linux" or os.environ.get("TASKGRAPH_USE_THREADS"), @@ -70,6 +71,46 @@ def test_kind_ordering_multithread(mocker, maketgg): assert mocked_tpe.loaded_kinds == ["_fake1", "_fake2", "_fake3"] +def test_schema_validation_error_logged_without_traceback(mocker, caplog, maketgg): + """SchemaValidationError from a kind is logged at ERROR level with no + traceback attached (it's user input, not a programmer bug).""" + + def fail_load_tasks(self, *args, **kwargs): + raise SchemaValidationError("bad task data") + + mocker.patch.object(Kind, "load_tasks", fail_load_tasks) + tgg = maketgg() + + with caplog.at_level("ERROR"): + with pytest.raises(SchemaValidationError): + tgg._run_until("full_task_set") + + schema_records = [r for r in caplog.records if "Error loading tasks" in r.message] + assert len(schema_records) == 1 + # logger.error attaches no exc_info — that's the whole point. + assert schema_records[0].exc_info is None + assert "bad task data" in schema_records[0].message + + +def test_other_exceptions_still_log_traceback(mocker, caplog, maketgg): + """Non-SchemaValidationError exceptions still go through logger.exception + so real programmer bugs surface their traceback.""" + + def fail_load_tasks(self, *args, **kwargs): + raise RuntimeError("unexpected bug") + + mocker.patch.object(Kind, "load_tasks", fail_load_tasks) + tgg = maketgg() + + with caplog.at_level("ERROR"): + with pytest.raises(RuntimeError): + tgg._run_until("full_task_set") + + records = [r for r in caplog.records if "Error loading tasks" in r.message] + assert len(records) == 1 + assert records[0].exc_info is not None + + def test_full_task_set(maketgg): "The full_task_set property has all tasks" tgg = maketgg() diff --git a/test/test_main.py b/test/test_main.py index 674f4819e..13c750d95 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -11,11 +11,13 @@ import taskgraph from taskgraph.actions import registry +from taskgraph.generator import Kind from taskgraph.graph import Graph from taskgraph.main import format_kind_graph_mermaid, get_filtered_taskgraph from taskgraph.main import main as taskgraph_main from taskgraph.task import Task from taskgraph.taskgraph import TaskGraph +from taskgraph.util.schema import SchemaValidationError from taskgraph.util.vcs import GitRepository, HgRepository from taskgraph.util.yaml import load_yaml @@ -70,6 +72,44 @@ def test_show_taskgraph_parallel(run_taskgraph): assert res == 0 +def test_main_schema_validation_error_no_traceback( + mocker, run_taskgraph, capsys, caplog +): + """main() exits 1 without printing a Python traceback when a kind raises + SchemaValidationError — the message was already logged.""" + + def fail_load_tasks(self, *args, **kwargs): + raise SchemaValidationError("bad task data") + + mocker.patch.object(Kind, "load_tasks", fail_load_tasks) + + with caplog.at_level("ERROR"): + res = run_taskgraph(["full"]) + + assert res == 1 + captured = capsys.readouterr() + assert "Traceback" not in captured.err + assert "Traceback" not in captured.out + assert any("bad task data" in r.message for r in caplog.records) + + +def test_main_other_exception_prints_traceback(mocker, run_taskgraph, capsys): + """main() still prints a traceback for unexpected exceptions — + schema-error suppression must not swallow real bugs.""" + + def fail_load_tasks(self, *args, **kwargs): + raise RuntimeError("real bug") + + mocker.patch.object(Kind, "load_tasks", fail_load_tasks) + + res = run_taskgraph(["full"]) + assert res == 1 + + captured = capsys.readouterr() + assert "Traceback" in captured.err + assert "RuntimeError: real bug" in captured.err + + def test_show_taskgraph_parallel_bad_params(tmp_path): # Create parameter files that will cause processing errors bad_params_dir = tmp_path / "bad_params" diff --git a/test/test_util_schema.py b/test/test_util_schema.py index cb67412ca..e10700b2b 100644 --- a/test/test_util_schema.py +++ b/test/test_util_schema.py @@ -12,6 +12,7 @@ from taskgraph.util.schema import ( IndexSchema, Schema, + SchemaValidationError, optionally_keyed_by, resolve_keyed_by, validate_schema, @@ -23,16 +24,50 @@ class SampleSchema(Schema): y: str -class TestValidateSchema(unittest.TestCase): +class TestSchemaValidationError(unittest.TestCase): + """Schema validation errors use a dedicated class so callers can display + the message without a full Python traceback.""" + def test_valid(self): validate_schema(SampleSchema, {"x": 10, "y": "foo"}, "pfx") - def test_invalid(self): - try: + def test_msgspec_schema_raises_schema_validation_error(self): + with self.assertRaises(SchemaValidationError): validate_schema(SampleSchema, {"x": "not-int"}, "pfx") + + def test_dict_schema_raises_schema_validation_error(self): + with self.assertRaises(SchemaValidationError): + validate_schema({"name": str}, {"name": 123}, "pfx") + + def test_exception_chain_suppressed(self): + """validate_schema uses `raise ... from None` so the underlying + msgspec/voluptuous error doesn't show up as 'During handling of ...'""" + try: + validate_schema(SampleSchema, {"x": 1, "y": "a", "extra": True}, "pfx") + except SchemaValidationError as exc: + self.assertIsNone(exc.__cause__) + self.assertTrue(exc.__suppress_context__) + else: self.fail("no exception raised") - except Exception as e: - self.assertTrue(str(e).startswith("pfx\n")) + + def test_message_contains_prefix_and_object(self): + try: + validate_schema(SampleSchema, {"x": "not-int", "y": "a"}, "my-prefix") + except SchemaValidationError as exc: + msg = str(exc) + self.assertTrue(msg.startswith("my-prefix\n")) + self.assertIn("'x': 'not-int'", msg) + else: + self.fail("no exception raised") + + def test_schema_validate_propagates_msgspec_error(self): + """Schema.validate raises msgspec.ValidationError directly without + wrapping in an extra try/except.""" + with self.assertRaises(msgspec.ValidationError) as cm: + SampleSchema.validate({"x": 1, "y": "a", "extra": True}) + # The redundant try/except that re-raised was removed, so there should + # be no chained context exception. + self.assertIsNone(cm.exception.__context__) class TestSchemaFeatures(unittest.TestCase):