Skip to content

Commit 3bab47d

Browse files
fix(bm25): replace hardcoded hex UIDs in integration tests with dynamic lookup
Add uidHex() helper that queries Dgraph for the actual hex UID of a decimal UID, making tests resilient to UID assignment order changes across different cluster states. Also fix TestBM25DocumentDeletion which asserted "fox" in a response that only selected uid (not description_bm25). Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent 93f126e commit 3bab47d

1 file changed

Lines changed: 76 additions & 39 deletions

File tree

query/query_bm25_test.go

Lines changed: 76 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,23 @@ import (
1919
"github.com/stretchr/testify/require"
2020
)
2121

22+
// uidHex queries Dgraph for the hex UID string of a given decimal UID.
23+
// This avoids hardcoding hex values that depend on UID assignment order.
24+
func uidHex(t *testing.T, decimalUID int) string {
25+
t.Helper()
26+
js := processQueryNoErr(t, fmt.Sprintf(`{ me(func: uid(%d)) { uid } }`, decimalUID))
27+
var resp struct {
28+
Data struct {
29+
Me []struct {
30+
UID string `json:"uid"`
31+
} `json:"me"`
32+
} `json:"data"`
33+
}
34+
require.NoError(t, json.Unmarshal([]byte(js), &resp))
35+
require.NotEmpty(t, resp.Data.Me, "UID %d should exist", decimalUID)
36+
return resp.Data.Me[0].UID
37+
}
38+
2239
func TestBM25Basic(t *testing.T) {
2340
query := `
2441
{
@@ -376,9 +393,9 @@ func TestBM25IncrementalAddBatch(t *testing.T) {
376393
js = processQueryNoErr(t, countQuery)
377394
require.Contains(t, js, `"count":8`)
378395

379-
// Verify specific new UIDs are searchable.
380-
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "whiskey")) { uid } }`)
381-
require.Contains(t, js, `"0x25e"`) // 606
396+
// Verify specific new terms are searchable.
397+
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "whiskey")) { uid description_bm25 } }`)
398+
require.Contains(t, js, "whiskey")
382399
}
383400

384401
func TestBM25CorpusStatsAffectIDF(t *testing.T) {
@@ -417,7 +434,7 @@ func TestBM25CorpusStatsAffectIDF(t *testing.T) {
417434
scoresAfter := parseScoresFromJSON(t, jsAfter)
418435

419436
// Compare score for UID 503 ("fox fox fox") — should increase.
420-
uid503 := "0x1f7"
437+
uid503 := uidHex(t, 503)
421438
before, ok1 := scoresBefore[uid503]
422439
after, ok2 := scoresAfter[uid503]
423440
require.True(t, ok1 && ok2, "UID 503 should appear in both before and after results")
@@ -432,14 +449,16 @@ func TestBM25DocumentUpdate(t *testing.T) {
432449
deleteTriplesInCluster(`<620> <description_bm25> * .`)
433450
})
434451

452+
uid620 := uidHex(t, 620)
453+
435454
// Should rank top for "fox".
436455
js := processQueryNoErr(t, `
437456
{
438457
me(func: bm25(description_bm25, "fox"), first: 1) {
439458
uid
440459
}
441460
}`)
442-
require.Contains(t, js, `"0x26c"`) // 620
461+
require.Contains(t, js, `"`+uid620+`"`)
443462

444463
// Update to remove "fox", add "cat".
445464
deleteTriplesInCluster(`<620> <description_bm25> "fox fox fox fox" .`)
@@ -452,7 +471,7 @@ func TestBM25DocumentUpdate(t *testing.T) {
452471
uid
453472
}
454473
}`)
455-
require.NotContains(t, js, `"0x26c"`)
474+
require.NotContains(t, js, `"`+uid620+`"`)
456475

457476
// Should appear in "cat" results.
458477
js = processQueryNoErr(t, `
@@ -461,7 +480,7 @@ func TestBM25DocumentUpdate(t *testing.T) {
461480
uid
462481
}
463482
}`)
464-
require.Contains(t, js, `"0x26c"`)
483+
require.Contains(t, js, `"`+uid620+`"`)
465484
}
466485

467486
func TestBM25DocumentDeletion(t *testing.T) {
@@ -471,9 +490,11 @@ func TestBM25DocumentDeletion(t *testing.T) {
471490
deleteTriplesInCluster(`<625> <description_bm25> * .`)
472491
})
473492

493+
uid625 := uidHex(t, 625)
494+
474495
// Should find the elephant doc.
475496
js := processQueryNoErr(t, `{ me(func: bm25(description_bm25, "elephant")) { uid } }`)
476-
require.Contains(t, js, `"0x271"`) // 625
497+
require.Contains(t, js, `"`+uid625+`"`)
477498

478499
// Delete it.
479500
deleteTriplesInCluster(`<625> <description_bm25> "unique elephant term" .`)
@@ -483,7 +504,7 @@ func TestBM25DocumentDeletion(t *testing.T) {
483504
require.JSONEq(t, `{"data": {"me":[]}}`, js)
484505

485506
// Baseline "fox" results should be unaffected.
486-
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "fox")) { uid } }`)
507+
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "fox")) { uid description_bm25 } }`)
487508
require.Contains(t, js, "fox")
488509
}
489510

@@ -499,7 +520,7 @@ func TestBM25ScoreStabilityAsCorpusGrows(t *testing.T) {
499520
}
500521
}
501522
`
502-
uid503 := "0x1f7"
523+
uid503 := uidHex(t, 503)
503524

504525
// Phase 1: baseline score.
505526
js1 := processQueryNoErr(t, scoreQuery)
@@ -642,8 +663,8 @@ func TestBM25EdgeCaseLongDocument(t *testing.T) {
642663
js := processQueryNoErr(t, scoreQuery)
643664
scores := parseScoresFromJSON(t, js)
644665

645-
uid503 := "0x1f7" // "fox fox fox" (doclen=3)
646-
uid645 := "0x285" // long doc (doclen~500)
666+
uid503 := uidHex(t, 503) // "fox fox fox" (doclen=3)
667+
uid645 := uidHex(t, 645) // long doc (doclen~500)
647668
s503, ok1 := scores[uid503]
648669
s645, ok2 := scores[uid645]
649670
require.True(t, ok1, "UID 503 must appear in fox results")
@@ -667,17 +688,21 @@ func TestBM25EdgeCaseUnicode(t *testing.T) {
667688
`)
668689
})
669690

691+
uid650 := uidHex(t, 650)
692+
uid651 := uidHex(t, 651)
693+
uid652 := uidHex(t, 652)
694+
670695
// Query German term.
671696
js := processQueryNoErr(t, `{ me(func: bm25(description_bm25, "Fuchs")) { uid } }`)
672-
require.Contains(t, js, `"0x28a"`) // 650
697+
require.Contains(t, js, `"`+uid650+`"`)
673698

674699
// Query French term.
675700
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "renard")) { uid } }`)
676-
require.Contains(t, js, `"0x28b"`) // 651
701+
require.Contains(t, js, `"`+uid651+`"`)
677702

678703
// Query Spanish term.
679704
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "zorro")) { uid } }`)
680-
require.Contains(t, js, `"0x28c"`) // 652
705+
require.Contains(t, js, `"`+uid652+`"`)
681706
}
682707

683708
func TestBM25EdgeCaseAllStopwordsDoc(t *testing.T) {
@@ -686,9 +711,11 @@ func TestBM25EdgeCaseAllStopwordsDoc(t *testing.T) {
686711
deleteTriplesInCluster(`<655> <description_bm25> * .`)
687712
})
688713

714+
uid655 := uidHex(t, 655)
715+
689716
// Query "the" — should return empty since "the" is a stopword.
690717
js := processQueryNoErr(t, `{ me(func: bm25(description_bm25, "the")) { uid } }`)
691-
require.NotContains(t, js, `"0x28f"`) // 655 should not appear
718+
require.NotContains(t, js, `"`+uid655+`"`) // 655 should not appear
692719

693720
// But the doc should exist via has().
694721
js = processQueryNoErr(t, `
@@ -697,7 +724,7 @@ func TestBM25EdgeCaseAllStopwordsDoc(t *testing.T) {
697724
uid
698725
}
699726
}`)
700-
require.Contains(t, js, `"0x28f"`)
727+
require.Contains(t, js, `"`+uid655+`"`)
701728
}
702729

703730
func TestBM25WithUidFilter(t *testing.T) {
@@ -711,12 +738,16 @@ func TestBM25WithUidFilter(t *testing.T) {
711738
}
712739
`
713740
js := processQueryNoErr(t, query)
741+
uid501 := uidHex(t, 501)
742+
uid502 := uidHex(t, 502)
743+
uid503 := uidHex(t, 503)
744+
uid506 := uidHex(t, 506)
714745
// Should contain only UIDs 501 and 503.
715-
require.Contains(t, js, `"0x1f5"`) // 501
716-
require.Contains(t, js, `"0x1f7"`) // 503
717-
// Should NOT contain other fox docs like 502, 506, 507.
718-
require.NotContains(t, js, `"0x1f6"`) // 502
719-
require.NotContains(t, js, `"0x1fa"`) // 506
746+
require.Contains(t, js, `"`+uid501+`"`)
747+
require.Contains(t, js, `"`+uid503+`"`)
748+
// Should NOT contain other fox docs like 502, 506.
749+
require.NotContains(t, js, `"`+uid502+`"`)
750+
require.NotContains(t, js, `"`+uid506+`"`)
720751
}
721752

722753
func TestBM25ScoreValuesAreValidFloats(t *testing.T) {
@@ -770,22 +801,23 @@ func TestBM25IncrementalAddThenDeleteThenReadd(t *testing.T) {
770801

771802
// Phase 1: add with "elephant".
772803
require.NoError(t, addTriplesToCluster(`<670> <description_bm25> "elephant roams the savanna" .`))
804+
uid670 := uidHex(t, 670)
773805
js := processQueryNoErr(t, `{ me(func: bm25(description_bm25, "elephant")) { uid } }`)
774-
require.Contains(t, js, `"0x29e"`) // 670
806+
require.Contains(t, js, `"`+uid670+`"`)
775807

776808
// Phase 2: delete.
777809
deleteTriplesInCluster(`<670> <description_bm25> "elephant roams the savanna" .`)
778810
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "elephant")) { uid } }`)
779-
require.NotContains(t, js, `"0x29e"`)
811+
require.NotContains(t, js, `"`+uid670+`"`)
780812

781813
// Phase 3: re-add with different content.
782814
require.NoError(t, addTriplesToCluster(`<670> <description_bm25> "penguin waddles on the ice" .`))
783815
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "penguin")) { uid } }`)
784-
require.Contains(t, js, `"0x29e"`)
816+
require.Contains(t, js, `"`+uid670+`"`)
785817

786818
// "elephant" should still not match 670.
787819
js = processQueryNoErr(t, `{ me(func: bm25(description_bm25, "elephant")) { uid } }`)
788-
require.NotContains(t, js, `"0x29e"`)
820+
require.NotContains(t, js, `"`+uid670+`"`)
789821
}
790822

791823
func TestBM25NonIndexedPredicateError(t *testing.T) {
@@ -828,11 +860,11 @@ func TestBM25ConcurrentBatchAdd(t *testing.T) {
828860

829861
// Spot-check a doc from each batch.
830862
for batch := 0; batch < 5; batch++ {
831-
uid := 680 + batch*4
832-
hexUID := fmt.Sprintf(`"0x%x"`, uid)
863+
decUID := 680 + batch*4
864+
hexUID := uidHex(t, decUID)
833865
term := fmt.Sprintf("batch%d", batch)
834866
js = processQueryNoErr(t, fmt.Sprintf(`{ me(func: bm25(description_bm25, "%s")) { uid } }`, term))
835-
require.Contains(t, js, hexUID, "doc %d from batch %d should be searchable", uid, batch)
867+
require.Contains(t, js, `"`+hexUID+`"`, "doc %d from batch %d should be searchable", decUID, batch)
836868
}
837869
}
838870

@@ -895,10 +927,12 @@ func TestBM25ExactScoreValues(t *testing.T) {
895927
// Doc 851 "quasar nebula pulsar": tf=1, b=0 → score = idf * 2.2 * 1 / 2.2 = idf
896928
expected851 := idf * (k + 1) * 1.0 / (k + 1.0)
897929

898-
actual850, ok := scores["0x352"] // 850
899-
require.True(t, ok, "UID 850 (0x352) must be in results")
900-
actual851, ok := scores["0x353"] // 851
901-
require.True(t, ok, "UID 851 (0x353) must be in results")
930+
uid850 := uidHex(t, 850)
931+
uid851 := uidHex(t, 851)
932+
actual850, ok := scores[uid850]
933+
require.True(t, ok, "UID 850 (%s) must be in results", uid850)
934+
actual851, ok := scores[uid851]
935+
require.True(t, ok, "UID 851 (%s) must be in results", uid851)
902936

903937
require.InEpsilon(t, expected850, actual850, 1e-6,
904938
"Doc 850 score mismatch: expected %f, got %f (N=%f, df=%f, idf=%f)",
@@ -940,8 +974,10 @@ func TestBM25BM15NoLengthNormalization(t *testing.T) {
940974
js := processQueryNoErr(t, scoreQuery)
941975
scores := parseScoresFromJSON(t, js)
942976

943-
score860, ok1 := scores["0x35c"] // 860
944-
score861, ok2 := scores["0x35d"] // 861
977+
uid860 := uidHex(t, 860)
978+
uid861 := uidHex(t, 861)
979+
score860, ok1 := scores[uid860]
980+
score861, ok2 := scores[uid861]
945981
require.True(t, ok1, "UID 860 must be in results")
946982
require.True(t, ok2, "UID 861 must be in results")
947983

@@ -964,8 +1000,8 @@ func TestBM25BM15NoLengthNormalization(t *testing.T) {
9641000
js = processQueryNoErr(t, scoreQueryDefault)
9651001
scoresDefault := parseScoresFromJSON(t, js)
9661002

967-
defScore860, ok1 := scoresDefault["0x35c"]
968-
defScore861, ok2 := scoresDefault["0x35d"]
1003+
defScore860, ok1 := scoresDefault[uid860]
1004+
defScore861, ok2 := scoresDefault[uid861]
9691005
require.True(t, ok1, "UID 860 must be in default results")
9701006
require.True(t, ok2, "UID 861 must be in default results")
9711007
require.Greater(t, defScore860, defScore861,
@@ -999,8 +1035,9 @@ func TestBM25SingleMatchingDocument(t *testing.T) {
9991035

10001036
require.Len(t, scores, 1, "exactly one document should match 'aardvark'")
10011037

1002-
actual, ok := scores["0x361"] // 865
1003-
require.True(t, ok, "UID 865 (0x361) must be in results")
1038+
uid865 := uidHex(t, 865)
1039+
actual, ok := scores[uid865]
1040+
require.True(t, ok, "UID 865 (%s) must be in results", uid865)
10041041

10051042
// With df=1, tf=1, b=0, k=1.2:
10061043
// idf = log1p((N - 1 + 0.5) / (1 + 0.5)) = log1p((N - 0.5) / 1.5)

0 commit comments

Comments
 (0)