From ee21c35c77a3ab0e9e632da8ceb1e971936db5fd Mon Sep 17 00:00:00 2001
From: Zo Bot <github-automation@zo.computer>
Date: Mon, 22 Jun 2026 21:27:41 +0000
Subject: [PATCH] skip blank rows in load_csv so a trailing newline no longer
 triggers KeyError

Closes #29. A file ending with \n (or any number of blank trailing lines)
used to produce a dict entry that was missing every column, which then
crashed with KeyError when the caller tried to access the key column.
csv.reader returns an empty list for a fully blank row, so filtering
those out before building dicts is enough - blank rows in the middle of
the file are skipped the same way. Whitespace-only or comma-only lines
still parse as data, which preserves the existing behaviour for inputs
where those carry meaning.
---
 csv_diff/__init__.py   |  7 +++++-
 tests/test_csv_diff.py | 54 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/csv_diff/__init__.py b/csv_diff/__init__.py
index 59a2eaf..223b306 100644
--- a/csv_diff/__init__.py
+++ b/csv_diff/__init__.py
@@ -16,7 +16,12 @@ def load_csv(fp, key=None, dialect=None):
             pass
     fp = csv.reader(fp, dialect=(dialect or "excel"))
     headings = next(fp)
-    rows = [dict(zip(headings, line)) for line in fp]
+    # Skip blank rows so trailing newlines (or stray blank lines inside the
+    # file) don't get treated as data rows that are missing every column -
+    # that surfaced as an unhelpful KeyError on the key column. A row is
+    # "blank" if csv.reader returned no fields for it, which is what happens
+    # when the final record is just "\n" or a file ends with extra blank lines.
+    rows = [dict(zip(headings, line)) for line in fp if line]
     if key:
         keyfn = lambda r: r[key]
     else:
diff --git a/tests/test_csv_diff.py b/tests/test_csv_diff.py
index 0e3670f..fc51655 100644
--- a/tests/test_csv_diff.py
+++ b/tests/test_csv_diff.py
@@ -51,6 +51,22 @@
 1,Cleo,5
 2,Pancakes,3"""
 
+# Trailing blank line - the most common case (POSIX text files end in \n,
+# many editors add an extra one). Used to raise KeyError on the key column.
+SEVEN_TRAILING = """id,name,age
+1,Cleo,4
+2,Pancakes,2
+"""
+
+# Multiple trailing blank lines, plus a blank row in the middle of the file.
+EIGHT_BLANK = """id,name,age
+
+1,Cleo,4
+2,Pancakes,2
+
+
+"""
+
 
 def test_row_changed():
     diff = compare(
@@ -115,3 +131,41 @@ def test_tsv():
         "columns_added": [],
         "columns_removed": [],
     } == diff
+
+
+def test_trailing_blank_line_is_skipped():
+    # https://github.com/simonw/csv-diff/issues/29 - a file ending in "\n"
+    # (or any number of empty trailing lines) used to produce a dict entry
+    # missing every column, which then KeyError'd on the key column. The
+    # loader should silently skip blank rows so the rest of the diff runs.
+    loaded = load_csv(io.StringIO(SEVEN_TRAILING), key="id")
+    assert loaded == {
+        "1": {"id": "1", "name": "Cleo", "age": "4"},
+        "2": {"id": "2", "name": "Pancakes", "age": "2"},
+    }
+
+
+def test_multiple_blank_lines_and_interior_blank_skipped():
+    # A blank row in the middle and several trailing blank lines should all
+    # be dropped - the loader treats csv.reader's empty `line` as the marker
+    # of a row that contributed no fields.
+    loaded = load_csv(io.StringIO(EIGHT_BLANK), key="id")
+    assert loaded == {
+        "1": {"id": "1", "name": "Cleo", "age": "4"},
+        "2": {"id": "2", "name": "Pancakes", "age": "2"},
+    }
+
+
+def test_compare_with_trailing_blank_lines():
+    # End-to-end: comparing two identical files where both have a trailing
+    # newline should report no changes (regression check for issue #29).
+    a = "id,name,age\n1,Cleo,4\n"
+    b = "id,name,age\n1,Cleo,4\n"
+    diff = compare(load_csv(io.StringIO(a), key="id"), load_csv(io.StringIO(b), key="id"))
+    assert diff == {
+        "added": [],
+        "removed": [],
+        "changed": [],
+        "columns_added": [],
+        "columns_removed": [],
+    }