From f3eade9159e8a45a3d0da88ab2552a2fc1aff91e Mon Sep 17 00:00:00 2001
From: Braden MacDonald <braden@opencraft.com>
Date: Mon, 4 May 2026 17:13:47 -0700
Subject: [PATCH] fix: make studio reindex more robust, provide better logging
 (#38498)

---
 openedx/core/djangoapps/content/search/api.py | 33 ++++++++++++++-----
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/openedx/core/djangoapps/content/search/api.py b/openedx/core/djangoapps/content/search/api.py
index 974fd90622bc..371e830260f0 100644
--- a/openedx/core/djangoapps/content/search/api.py
+++ b/openedx/core/djangoapps/content/search/api.py
@@ -541,7 +541,11 @@ def init_index(status_cb: Callable[[str], None] | None = None, warn_cb: Callable
     reconcile_index(status_cb=status_cb, warn_cb=warn_cb)
 
 
-def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
+def index_course(
+    course_key: CourseKey,
+    index_name: str | None = None,
+    status_cb: Callable[[str], None] | None = None,
+) -> list[dict]:
     """
     Rebuilds the index for a given course.
     """
@@ -550,9 +554,16 @@ def index_course(course_key: CourseKey, index_name: str | None = None) -> list:
     docs = []
     if index_name is None:
         index_name = STUDIO_INDEX_NAME
+    if status_cb is None:
+        status_cb = log.info
+
     # Pre-fetch the course with all of its children:
     course = store.get_course(course_key, depth=None)
 
+    if course is None:
+        status_cb(f"Error: course {course_key} does not seem to exist! It may have been incompletely deleted.")
+        return []
+
     def add_with_children(block):
         """Recursively index the given XBlock/component"""
         doc = searchable_doc_for_course_block(block)
@@ -585,6 +596,8 @@ def rebuild_index(  # pylint: disable=too-many-statements
     keys_indexed = []
     if incremental:
         keys_indexed = list(IncrementalIndexCompleted.objects.values_list("context_key", flat=True))
+        if keys_indexed:
+            status_cb(f"Resuming incremental index - {len(keys_indexed)} courses/libraries already indexed.")
     lib_keys = [
         lib.library_key
         for lib in lib_api.ContentLibrary.objects.select_related("org").only("org", "slug").order_by("-id")
@@ -698,7 +711,8 @@ def index_container_batch(batch, num_done, library_key) -> int:
             collections = content_api.get_collections(library.learning_package_id, enabled=True)
             num_collections = collections.count()
             num_collections_done = 0
-            status_cb(f"{num_collections_done}/{num_collections}. Now indexing collections in library {lib_key}")
+            if num_collections:
+                status_cb(f"Now indexing {num_collections} collections in library {lib_key}")
             paginator = Paginator(collections, 100)
             for p in paginator.page_range:
                 num_collections_done = index_collection_batch(
@@ -706,15 +720,14 @@ def index_container_batch(batch, num_done, library_key) -> int:
                     num_collections_done,
                     lib_key,
                 )
-            if incremental:
-                IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key)
-            status_cb(f"{num_collections_done}/{num_collections} collections indexed for library {lib_key}")
+            status_cb(f"Indexed {num_collections_done}/{num_collections} collections in library {lib_key}")
 
             # Similarly, batch process Containers (units, sections, etc) in pages of 100
             containers = content_api.get_containers(library.learning_package_id)
             num_containers = containers.count()
             num_containers_done = 0
-            status_cb(f"{num_containers_done}/{num_containers}. Now indexing containers in library {lib_key}")
+            if num_containers:
+                status_cb(f"Now indexing {num_containers} containers in library {lib_key}")
             paginator = Paginator(containers, 100)
             for p in paginator.page_range:
                 num_containers_done = index_container_batch(
@@ -722,7 +735,9 @@ def index_container_batch(batch, num_done, library_key) -> int:
                     num_containers_done,
                     lib_key,
                 )
-                status_cb(f"{num_containers_done}/{num_containers} containers indexed for library {lib_key}")
+                status_cb(f"Indexed {num_containers_done}/{num_containers} containers in library {lib_key}")
+
+            # Mark this library as indexed:
             if incremental:
                 IncrementalIndexCompleted.objects.get_or_create(context_key=lib_key)
 
@@ -732,7 +747,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
         status_cb("Indexing courses...")
         # To reduce memory usage on large instances, split up the CourseOverviews into pages of 1,000 courses:
 
-        paginator = Paginator(CourseOverview.objects.only("id", "display_name"), 1000)
+        paginator = Paginator(CourseOverview.objects.only("id", "display_name").order_by("-created", "id"), 1000)
         for p in paginator.page_range:
             for course in paginator.page(p).object_list:
                 status_cb(
@@ -741,7 +756,7 @@ def index_container_batch(batch, num_done, library_key) -> int:
                 if course.id in keys_indexed:
                     num_contexts_done += 1
                     continue
-                course_docs = index_course(course.id, index_name)
+                course_docs = index_course(course.id, index_name, status_cb)
                 if incremental:
                     IncrementalIndexCompleted.objects.get_or_create(context_key=course.id)
                 num_contexts_done += 1