Skip to content

Commit 53c46c2

Browse files
feat: Rebuild full search document for deletes
1 parent ddd197e commit 53c46c2

2 files changed

Lines changed: 132 additions & 26 deletions

File tree

openedx/core/djangoapps/content/search/api.py

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,12 @@
2222
from meilisearch.models.task import TaskInfo
2323
from opaque_keys import OpaqueKey
2424
from opaque_keys.edx.keys import CourseKey, UsageKey
25-
from opaque_keys.edx.locator import LibraryCollectionLocator, LibraryContainerLocator, LibraryLocatorV2
25+
from opaque_keys.edx.locator import (
26+
LibraryCollectionLocator,
27+
LibraryContainerLocator,
28+
LibraryLocatorV2,
29+
LibraryUsageLocatorV2,
30+
)
2631
from openedx_content import api as content_api
2732
from openedx_content import models_api as content_models
2833
from rest_framework.request import Request
@@ -333,6 +338,28 @@ def _update_index_docs(docs) -> None:
333338
_wait_for_meili_tasks(tasks)
334339

335340

341+
def _replace_index_docs(docs) -> None:
342+
"""
343+
Helper function that fully replaces the documents in the search index.
344+
345+
We use this when nested fields need to be removed from indexed documents, because
346+
Meilisearch partial updates do not reliably clear nested facet values.
347+
"""
348+
if not docs:
349+
return
350+
351+
client = _get_meilisearch_client()
352+
current_rebuild_index_name = _get_running_rebuild_index_name()
353+
354+
tasks = []
355+
if current_rebuild_index_name:
356+
# Keep the temporary rebuild index in sync with the live index.
357+
tasks.append(client.index(current_rebuild_index_name).add_documents(docs))
358+
tasks.append(client.index(STUDIO_INDEX_NAME).add_documents(docs))
359+
360+
_wait_for_meili_tasks(tasks)
361+
362+
336363
def only_if_meilisearch_enabled(f):
337364
"""
338365
Only call `f` if meilisearch is enabled
@@ -1030,13 +1057,65 @@ def upsert_content_library_index_docs(library_key: LibraryLocatorV2, full_index:
10301057
_update_index_docs(docs)
10311058

10321059

1060+
def _build_full_content_object_index_doc(key: OpaqueKey) -> tuple[dict | None, bool]:
1061+
"""
1062+
Build the current full search document for an object whose associations changed.
1063+
1064+
Returns a tuple of (doc, should_delete_index_doc). The second value is True when the
1065+
object no longer exists in a form that should remain indexed.
1066+
"""
1067+
if isinstance(key, LibraryCollectionLocator):
1068+
doc = searchable_doc_for_collection(key)
1069+
if doc.get("_disabled") or not doc.get(Fields.type):
1070+
return None, True
1071+
elif isinstance(key, LibraryContainerLocator):
1072+
doc = searchable_doc_for_container(key)
1073+
if not doc.get(Fields.display_name):
1074+
return None, True
1075+
doc.update(searchable_doc_collections(key))
1076+
match key.container_type:
1077+
case content_models.Unit.type_code:
1078+
doc.update(searchable_doc_containers(key, "subsections"))
1079+
case content_models.Subsection.type_code:
1080+
doc.update(searchable_doc_containers(key, "sections"))
1081+
elif isinstance(key, LibraryUsageLocatorV2):
1082+
try:
1083+
library_block = lib_api.get_component_from_usage_key(key)
1084+
except lib_api.ContentLibraryBlockNotFound:
1085+
return None, True
1086+
library_block_metadata = lib_api.LibraryXBlockMetadata.from_component(key.context_key, library_block)
1087+
doc = searchable_doc_for_library_block(library_block_metadata)
1088+
doc.update(searchable_doc_collections(key))
1089+
doc.update(searchable_doc_containers(key, "units"))
1090+
elif isinstance(key, UsageKey):
1091+
xblock = modulestore().get_item(key)
1092+
if xblock.scope_ids.block_type in EXCLUDED_XBLOCK_TYPES:
1093+
return None, False
1094+
doc = searchable_doc_for_course_block(xblock)
1095+
else: # pragma: no cover
1096+
raise TypeError(f"Unsupported content object key type: {type(key)!r}")
1097+
1098+
doc.update(searchable_doc_tags(key))
1099+
return doc, False
1100+
1101+
10331102
def upsert_content_object_tags_index_doc(key: OpaqueKey):
10341103
"""
10351104
Updates the tags data in document for the given Course/Library item
10361105
"""
1037-
doc = {Fields.id: meili_id_from_opaque_key(key)}
1038-
doc.update(searchable_doc_tags(key))
1039-
_update_index_docs([doc])
1106+
try:
1107+
doc, should_delete_doc = _build_full_content_object_index_doc(key)
1108+
except ItemNotFoundError:
1109+
should_delete_doc = True
1110+
doc = None
1111+
1112+
if should_delete_doc:
1113+
delete_index_doc(key)
1114+
return
1115+
if doc is None:
1116+
return
1117+
1118+
_replace_index_docs([doc])
10401119

10411120

10421121
def upsert_item_collections_index_docs(opaque_key: OpaqueKey):

openedx/core/djangoapps/content/search/tests/test_api.py

Lines changed: 49 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -608,22 +608,22 @@ def test_index_xblock_tags(self, mock_meilisearch) -> None:
608608

609609
# Build expected docs with tags at each stage
610610
doc_sequential_with_tags1 = {
611-
"id": self.doc_sequential["id"],
611+
**copy.deepcopy(self.doc_sequential),
612612
"tags": {
613613
'taxonomy': ['A'],
614614
'level0': ['A > one', 'A > two']
615-
}
615+
},
616616
}
617617
doc_sequential_with_tags2 = {
618-
"id": self.doc_sequential["id"],
618+
**copy.deepcopy(self.doc_sequential),
619619
"tags": {
620620
'taxonomy': ['A', 'B'],
621621
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
622-
}
622+
},
623623
}
624624

625-
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
626-
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
625+
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 2
626+
mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
627627
[
628628
call([doc_sequential_with_tags1]),
629629
call([doc_sequential_with_tags2]),
@@ -667,22 +667,22 @@ def test_index_library_block_tags(self, mock_meilisearch) -> None:
667667

668668
# Build expected docs with tags at each stage
669669
doc_problem_with_tags1 = {
670-
"id": self.doc_problem1["id"],
670+
**copy.deepcopy(self.doc_problem1),
671671
"tags": {
672672
'taxonomy': ['A'],
673673
'level0': ['A > one', 'A > two']
674-
}
674+
},
675675
}
676676
doc_problem_with_tags2 = {
677-
"id": self.doc_problem1["id"],
677+
**copy.deepcopy(self.doc_problem1),
678678
"tags": {
679679
'taxonomy': ['A', 'B'],
680680
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
681-
}
681+
},
682682
}
683683

684-
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
685-
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
684+
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 2
685+
mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
686686
[
687687
call([doc_problem_with_tags1]),
688688
call([doc_problem_with_tags2]),
@@ -873,22 +873,22 @@ def test_index_tags_in_collections(self, mock_meilisearch) -> None:
873873

874874
# Build expected docs with tags at each stage
875875
doc_collection_with_tags1 = {
876-
"id": "lib-collectionorg1libmycol-5b647617",
876+
**copy.deepcopy(self.collection_dict),
877877
"tags": {
878878
'taxonomy': ['A'],
879879
'level0': ['A > one', 'A > two']
880-
}
880+
},
881881
}
882882
doc_collection_with_tags2 = {
883-
"id": "lib-collectionorg1libmycol-5b647617",
883+
**copy.deepcopy(self.collection_dict),
884884
"tags": {
885885
'taxonomy': ['A', 'B'],
886886
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
887-
}
887+
},
888888
}
889889

890-
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
891-
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
890+
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 2
891+
mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
892892
[
893893
call([doc_collection_with_tags1]),
894894
call([doc_collection_with_tags2]),
@@ -1144,30 +1144,57 @@ def test_index_tags_in_containers(self, container_type, container_id, mock_meili
11441144
tagging_api.tag_object(container_key, self.taxonomyB, ["three", "four"])
11451145

11461146
# Build expected docs with tags at each stage
1147+
container_doc = copy.deepcopy(getattr(self, f"{container_type}_dict"))
1148+
if container_type == "unit":
1149+
container_doc["subsections"] = {
1150+
"display_name": [self.subsection_dict["display_name"]],
1151+
"key": [self.subsection_key],
1152+
}
1153+
elif container_type == "subsection":
1154+
container_doc["sections"] = {
1155+
"display_name": [self.section_dict["display_name"]],
1156+
"key": [self.section_key],
1157+
}
1158+
11471159
doc_unit_with_tags1 = {
1160+
**container_doc,
11481161
"id": container_id,
11491162
"tags": {
11501163
'taxonomy': ['A'],
11511164
'level0': ['A > one', 'A > two']
1152-
}
1165+
},
11531166
}
11541167
doc_unit_with_tags2 = {
1168+
**copy.deepcopy(container_doc),
11551169
"id": container_id,
11561170
"tags": {
11571171
'taxonomy': ['A', 'B'],
11581172
'level0': ['A > one', 'A > two', 'B > four', 'B > three']
1159-
}
1173+
},
11601174
}
11611175

1162-
assert mock_meilisearch.return_value.index.return_value.update_documents.call_count == 2
1163-
mock_meilisearch.return_value.index.return_value.update_documents.assert_has_calls(
1176+
assert mock_meilisearch.return_value.index.return_value.add_documents.call_count == 2
1177+
mock_meilisearch.return_value.index.return_value.add_documents.assert_has_calls(
11641178
[
11651179
call([doc_unit_with_tags1]),
11661180
call([doc_unit_with_tags2]),
11671181
],
11681182
any_order=True,
11691183
)
11701184

1185+
@override_settings(MEILISEARCH_ENABLED=True)
1186+
def test_replace_full_doc_when_container_tags_are_cleared(self, mock_meilisearch) -> None:
1187+
expected_doc = copy.deepcopy(self.unit_dict)
1188+
expected_doc["subsections"] = {
1189+
"display_name": [self.subsection_dict["display_name"]],
1190+
"key": [self.subsection_key],
1191+
}
1192+
expected_doc["tags"] = {}
1193+
1194+
api.upsert_content_object_tags_index_doc(self.unit.container_key)
1195+
1196+
mock_meilisearch.return_value.index.return_value.add_documents.assert_called_once_with([expected_doc])
1197+
11711198
@override_settings(MEILISEARCH_ENABLED=True)
11721199
def test_block_in_units(self, mock_meilisearch) -> None:
11731200
"""Test that search index is updated correctly when we add a problem block to a unit"""

0 commit comments

Comments
 (0)