From 5cdbf8f35d3ca0a51c91f45501ca8d9c39c16fb7 Mon Sep 17 00:00:00 2001 From: Zo Bot Date: Mon, 15 Jun 2026 19:36:35 +0000 Subject: [PATCH 1/2] =?UTF-8?q?raise=20a=20clear=20ValueError=20for=20empt?= =?UTF-8?q?y=20CSV=20input=20in=20load=5Fcsv=20=E2=80=94=20when=20a=20user?= =?UTF-8?q?=20runs=20csv-diff=20against=20an=20empty=20file,=20csv.reader?= =?UTF-8?q?=20returns=20no=20rows=20and=20the=20previous=20code=20let=20St?= =?UTF-8?q?opIteration=20bubble=20out=20of=20next(fp),=20producing=20a=20c?= =?UTF-8?q?onfusing=20traceback=20at=20the=20top=20of=20the=20call=20stack?= =?UTF-8?q?=20with=20no=20indication=20that=20the=20input=20was=20empty;?= =?UTF-8?q?=20the=20new=20try/except=20translates=20StopIteration=20into?= =?UTF-8?q?=20a=20typed=20ValueError=20with=20a=20descriptive=20message=20?= =?UTF-8?q?so=20the=20CLI=20shows=20'CSV=20input=20is=20empty=20(no=20head?= =?UTF-8?q?er=20row=20found)'=20and=20downstream=20loaders=20/=20Click=20e?= =?UTF-8?q?rror=20handling=20can=20react=20to=20it=20explicitly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- csv_diff/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/csv_diff/__init__.py b/csv_diff/__init__.py index 59a2eaf..4dfd7f4 100644 --- a/csv_diff/__init__.py +++ b/csv_diff/__init__.py @@ -15,7 +15,10 @@ def load_csv(fp, key=None, dialect=None): # Oh well, we tried. Fallback to the default. pass fp = csv.reader(fp, dialect=(dialect or "excel")) - headings = next(fp) + try: + headings = next(fp) + except StopIteration: + raise ValueError("CSV input is empty (no header row found)") rows = [dict(zip(headings, line)) for line in fp] if key: keyfn = lambda r: r[key] From e93ffac3acc6ee3b3a02461a6b0d9065e2bbbfc0 Mon Sep 17 00:00:00 2001 From: Zo Bot Date: Fri, 26 Jun 2026 19:47:33 +0000 Subject: [PATCH 2/2] render change and unchanged values through a shared helper that escapes internal double quotes and backslashes human_text() wrapped prev/current values in literal '"..."' on the change line and the unchanged row summary, but only used plain str(value) in human_row() for added/removed rows. A value containing a double quote, e.g. "hello \"world\"", rendered as 'name: "hello "world"" => "goodbye "cruel" world"' - the inner quotes were indistinguishable from the wrapping quotes, so a downstream reader could not tell where each value started or ended. The same ambiguity applied to backslash characters (\\ rendered as \\ inside the quoted value). The new _format_quoted() helper centralises the rendering: stringify, escape backslashes first, then double quotes, and wrap in a single pair of double quotes. The change lines and the unchanged summary now both go through it, so 'Cleo' renders as '"Cleo"' (matching the existing convention for change values) and 'hello "world"' renders as '"hello \\"world\\""' instead of the previous ambiguous form. Non-string values (ints, None) stringified via str() keep the existing behaviour for change lines. --- csv_diff/__init__.py | 21 ++++++++++++++++++-- tests/test_human_text.py | 41 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/csv_diff/__init__.py b/csv_diff/__init__.py index 4dfd7f4..4a6fb3c 100644 --- a/csv_diff/__init__.py +++ b/csv_diff/__init__.py @@ -152,7 +152,11 @@ def human_text(result, key=None, singular=None, plural=None, current=None, extra block.append(" {}: {}".format(key, details["key"])) for field, (prev_value, current_value) in details["changes"].items(): block.append( - ' {}: "{}" => "{}"'.format(field, prev_value, current_value) + " {}: {} => {}".format( + field, + _format_quoted(prev_value), + _format_quoted(current_value), + ) ) if extras: current_item = current[details["key"]] @@ -163,7 +167,7 @@ def human_text(result, key=None, singular=None, plural=None, current=None, extra block = [] block.append(" Unchanged:") for field, value in details["unchanged"].items(): - block.append(' {}: "{}"'.format(field, value)) + block.append(" {}: {}".format(field, _format_quoted(value))) block.append("") change_blocks.append("\n".join(block)) summary.append("\n".join(change_blocks)) @@ -200,6 +204,19 @@ def human_text(result, key=None, singular=None, plural=None, current=None, extra return (", ".join(title) + "\n\n" + ("\n".join(summary))).strip() +def _format_quoted(value): + """Render value as a double-quoted string with internal quotes/backslashes escaped. + + Values are rendered the same way for every path that prints them as + ``"..."``: a backslash escapes any backslash, then any double quote, so the + output is unambiguous even when the value itself contains those characters. + Non-string values are stringified first so the helper is safe to call with + integers, floats, ``None``, or anything else produced by ``compare``. + """ + s = "" if value is None else str(value) + return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"' + + def human_row(row, prefix=""): bits = [] for key, value in row.items(): diff --git a/tests/test_human_text.py b/tests/test_human_text.py index f1bd381..ff85a12 100644 --- a/tests/test_human_text.py +++ b/tests/test_human_text.py @@ -222,3 +222,44 @@ def test_no_key(): ).strip() == human_text(diff) ) + + +def test_row_changed_escapes_inner_quotes_in_change_values(): + # Regression: values containing double quotes used to render as + # name: "hello "world"" => "goodbye "cruel" world", where the + # internal quotes were indistinguishable from the wrapping quotes. + diff = compare( + load_csv(io.StringIO('id,name\na,"hello ""world"""\n'), key="id"), + load_csv(io.StringIO('id,name\na,"goodbye ""cruel"" world"\n'), key="id"), + ) + expected = ( + "1 row changed\n" + "\n" + " id: a\n" + ' name: "hello \\"world\\"" => "goodbye \\"cruel\\" world"' + ) + assert expected == human_text(diff, "id") + + + +def test_row_changed_escapes_inner_quotes_in_unchanged_values(): + # Regression: unchanged rows with quote-containing values used to + # render as name: "has "quote"" which was ambiguous. + diff = compare( + load_csv(io.StringIO('id,tag,name\na,outer,"hello ""world"""\n'), key="id"), + load_csv(io.StringIO('id,tag,name\na,outer,"hello ""world"" v2"\n'), key="id"), + show_unchanged=True, + ) + expected = ( + "1 row changed\n" + "\n" + " id: a\n" + ' name: "hello \\"world\\"" => "hello \\"world\\" v2"\n' + "\n" + " Unchanged:\n" + ' tag: "outer"' + ) + assert expected == human_text(diff, "id") + + +