Skip to content

Commit a8bef0e

Browse files
Cover aligned-zip fallback in symilar's _find_common
Adds a functional test that mocks `_HASH_BUCKET_PRODUCT_LIMIT` to zero and runs symilar over repeated-block content so the aligned-zip fallback path is always exercised. Covers the previously-untested branch flagged in the codecov report on #10881. Addresses Jacob's review request for test coverage on the "other form of the algorithm" introduced to cap quadratic behavior.
1 parent bc95dfc commit a8bef0e

1 file changed

Lines changed: 30 additions & 0 deletions

File tree

tests/checkers/unittest_symilar.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,3 +470,33 @@ def test_bad_short_form_option(capsys: CaptureFixture) -> None:
470470
assert ex.value.code == 2
471471
assert not out
472472
assert "unrecognized arguments: -j=0" in err
473+
474+
475+
def test_hash_bucket_product_limit_fallback(
476+
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
477+
) -> None:
478+
"""When a hash bucket's Cartesian product exceeds
479+
``_HASH_BUCKET_PRODUCT_LIMIT``, ``_find_common`` falls back to aligned-zip
480+
pairing. Mock the limit to zero so the fallback path is always taken over
481+
a file with repeated blocks and verify duplicate detection still reports
482+
the expected similar lines.
483+
484+
Regression test for https://github.com/pylint-dev/pylint/pull/10881.
485+
"""
486+
monkeypatch.setattr(symilar, "_HASH_BUCKET_PRODUCT_LIMIT", 0)
487+
# Three copies of the same 5-line block produce hash buckets with more
488+
# than one index, exercising the aligned-zip fallback meaningfully.
489+
block = "a = 1\nb = 2\nc = 3\nd = 4\ne = 5\n"
490+
file_a = tmp_path / "a.py"
491+
file_b = tmp_path / "b.py"
492+
file_a.write_text(block * 3)
493+
file_b.write_text(block * 3)
494+
495+
output = StringIO()
496+
with redirect_stdout(output), pytest.raises(SystemExit) as ex:
497+
symilar.Run([str(file_a), str(file_b)])
498+
assert ex.value.code == 0
499+
out = output.getvalue()
500+
assert "15 similar lines in 2 files" in out
501+
assert f"=={file_a}:[0:15]" in out
502+
assert f"=={file_b}:[0:15]" in out

0 commit comments

Comments
 (0)