diff --git a/csv_diff/__init__.py b/csv_diff/__init__.py index 59a2eaf..4a6fb3c 100644 --- a/csv_diff/__init__.py +++ b/csv_diff/__init__.py @@ -15,7 +15,10 @@ def load_csv(fp, key=None, dialect=None): # Oh well, we tried. Fallback to the default. pass fp = csv.reader(fp, dialect=(dialect or "excel")) - headings = next(fp) + try: + headings = next(fp) + except StopIteration: + raise ValueError("CSV input is empty (no header row found)") rows = [dict(zip(headings, line)) for line in fp] if key: keyfn = lambda r: r[key] @@ -149,7 +152,11 @@ def human_text(result, key=None, singular=None, plural=None, current=None, extra block.append(" {}: {}".format(key, details["key"])) for field, (prev_value, current_value) in details["changes"].items(): block.append( - ' {}: "{}" => "{}"'.format(field, prev_value, current_value) + " {}: {} => {}".format( + field, + _format_quoted(prev_value), + _format_quoted(current_value), + ) ) if extras: current_item = current[details["key"]] @@ -160,7 +167,7 @@ def human_text(result, key=None, singular=None, plural=None, current=None, extra block = [] block.append(" Unchanged:") for field, value in details["unchanged"].items(): - block.append(' {}: "{}"'.format(field, value)) + block.append(" {}: {}".format(field, _format_quoted(value))) block.append("") change_blocks.append("\n".join(block)) summary.append("\n".join(change_blocks)) @@ -197,6 +204,19 @@ def human_text(result, key=None, singular=None, plural=None, current=None, extra return (", ".join(title) + "\n\n" + ("\n".join(summary))).strip() +def _format_quoted(value): + """Render value as a double-quoted string with internal quotes/backslashes escaped. + + Values are rendered the same way for every path that prints them as + ``"..."``: a backslash escapes any backslash, then any double quote, so the + output is unambiguous even when the value itself contains those characters. + Non-string values are stringified first so the helper is safe to call with + integers, floats, ``None``, or anything else produced by ``compare``. + """ + s = "" if value is None else str(value) + return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"' + + def human_row(row, prefix=""): bits = [] for key, value in row.items(): diff --git a/tests/test_human_text.py b/tests/test_human_text.py index f1bd381..ff85a12 100644 --- a/tests/test_human_text.py +++ b/tests/test_human_text.py @@ -222,3 +222,44 @@ def test_no_key(): ).strip() == human_text(diff) ) + + +def test_row_changed_escapes_inner_quotes_in_change_values(): + # Regression: values containing double quotes used to render as + # name: "hello "world"" => "goodbye "cruel" world", where the + # internal quotes were indistinguishable from the wrapping quotes. + diff = compare( + load_csv(io.StringIO('id,name\na,"hello ""world"""\n'), key="id"), + load_csv(io.StringIO('id,name\na,"goodbye ""cruel"" world"\n'), key="id"), + ) + expected = ( + "1 row changed\n" + "\n" + " id: a\n" + ' name: "hello \\"world\\"" => "goodbye \\"cruel\\" world"' + ) + assert expected == human_text(diff, "id") + + + +def test_row_changed_escapes_inner_quotes_in_unchanged_values(): + # Regression: unchanged rows with quote-containing values used to + # render as name: "has "quote"" which was ambiguous. + diff = compare( + load_csv(io.StringIO('id,tag,name\na,outer,"hello ""world"""\n'), key="id"), + load_csv(io.StringIO('id,tag,name\na,outer,"hello ""world"" v2"\n'), key="id"), + show_unchanged=True, + ) + expected = ( + "1 row changed\n" + "\n" + " id: a\n" + ' name: "hello \\"world\\"" => "hello \\"world\\" v2"\n' + "\n" + " Unchanged:\n" + ' tag: "outer"' + ) + assert expected == human_text(diff, "id") + + +