From f3eade9159e8a45a3d0da88ab2552a2fc1aff91e Mon Sep 17 00:00:00 2001 From: Braden MacDonald Date: Mon, 4 May 2026 17:13:47 -0700 Subject: [PATCH] fix: make studio reindex more robust, provide better logging (#38498) --- openedx/core/djangoapps/content/search/api.py | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py index 974fd90622bc..371e830260f0 100644 --- a/openedx/core/djangoapps/content/search/api.py +++ b/openedx/core/djangoapps/content/search/api.py @@ -541,7 +541,11 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable reconcile_index(status_cb=status_cb, warn_cb=warn_cb) -def index_course(course_key: CourseKey, index_name: str | None = None) -> list: +def index_course( + course_key: CourseKey, + index_name: str | None = None, + status_cb: Callable[[str], None] | None = None, +) -> list[dict]: """ Rebuilds the index for a given course. """ @@ -550,9 +554,16 @@ def index_course(course_key: CourseKey, index_name: str | None = None) -> list: docs = [] if index_name is None: index_name = STUDIO_INDEX_NAME + if status_cb is None: + status_cb = log.info + # Pre-fetch the course with all of its children: course = store.get_course(course_key, depth=None) + if course is None: + status_cb(f"Error: course {course_key} does not seem to exist! It may have been incompletely deleted.") + return [] + def add_with_children(block): """Recursively index the given XBlock/component""" doc = searchable_doc_for_course_block(block) @@ -585,6 +596,8 @@ def rebuild_index( # pylint: disable=too-many-statements keys_indexed = [] if incremental: keys_indexed = list(IncrementalIndexCompleted.objects.values_list("context_key", flat=True)) + if keys_indexed: + status_cb(f"Resuming incremental index - {len(keys_indexed)} courses/libraries already indexed.") lib_keys = [ lib.library_key for lib in lib_api.ContentLibrary.objects.select_related("org").only("org", "slug").order_by("-id") @@ -698,7 +711,8 @@ def index_container_batch(batch, num_done, library_key) -> int: collections = content_api.get_collections(library.learning_package_id, enabled=True) num_collections = collections.count() num_collections_done = 0 - status_cb(f"{num_collections_done}/{num_collections}. Now indexing collections in library {lib_key}") + if num_collections: + status_cb(f"Now indexing {num_collections} collections in library {lib_key}") paginator = Paginator(collections, 100) for p in paginator.page_range: num_collections_done = index_collection_batch( @@ -706,15 +720,14 @@ def index_container_batch(batch, num_done, library_key) -> int: num_collections_done, lib_key, ) - if incremental: - IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key) - status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}") + status_cb(f"Indexed {num_collections_done}/{num_collections} collections in library {lib_key}") # Similarly, batch process Containers (units, sections, etc) in pages of 100 containers = content_api.get_containers(library.learning_package_id) num_containers = containers.count() num_containers_done = 0 - status_cb(f"{num_containers_done}/{num_containers}. Now indexing containers in library {lib_key}") + if num_containers: + status_cb(f"Now indexing {num_containers} containers in library {lib_key}") paginator = Paginator(containers, 100) for p in paginator.page_range: num_containers_done = index_container_batch( @@ -722,7 +735,9 @@ def index_container_batch(batch, num_done, library_key) -> int: num_containers_done, lib_key, ) - status_cb(f"{num_containers_done}/{num_containers} containers indexed for library {lib_key}") + status_cb(f"Indexed {num_containers_done}/{num_containers} containers in library {lib_key}") + + # Mark this library as indexed: if incremental: IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key) @@ -732,7 +747,7 @@ def index_container_batch(batch, num_done, library_key) -> int: status_cb("Indexing courses...") # To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses: - paginator = Paginator(CourseOverview.objects.only("id", "display_name"), 1000) + paginator = Paginator(CourseOverview.objects.only("id", "display_name").order_by("-created", "id"), 1000) for p in paginator.page_range: for course in paginator.page(p).object_list: status_cb( @@ -741,7 +756,7 @@ def index_container_batch(batch, num_done, library_key) -> int: if course.id in keys_indexed: num_contexts_done += 1 continue - course_docs = index_course(course.id, index_name) + course_docs = index_course(course.id, index_name, status_cb) if incremental: IncrementalIndexCompleted.objects.get_or_create(context_key=course.id) num_contexts_done += 1