Skip to content

Commit 3fa7794

Browse files
feat: update openedx-core: new catalog models + backfill migration (#38023)
* feat: use new version of openedx-core * feat: Use openedx_catalog app, backfill it with all known courses * feat: properly set "created" timestamp on course runs during backfill * fix: better normalization of language codes * feat: keep courses in sync with CourseRun/CatalogCourse * feat: delete CourseRun/CatalogCourse when deleting a course * refactor: course_id -> course_key, run -> run_code, display_name -> title * fix: don't use SplitModulestoreCourseIndex for getting list of all courses
1 parent f4cb7b9 commit 3fa7794

12 files changed

Lines changed: 383 additions & 10 deletions

File tree

cms/envs/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,9 @@ def make_lms_template_path(settings):
907907

908908
'openedx_events',
909909

910+
# Core models to represent courses
911+
"openedx_catalog",
912+
910913
# Core apps that power libraries
911914
"openedx_content",
912915
*openedx_content_backcompat_apps_to_install(),

lms/envs/common.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2020,6 +2020,9 @@
20202020

20212021
'openedx_events',
20222022

2023+
# Core models to represent courses
2024+
"openedx_catalog",
2025+
20232026
# Core apps that power libraries
20242027
"openedx_content",
20252028
*openedx_content_backcompat_apps_to_install(),
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""
2+
Data migration to populate the new CourseRun and CatalogCourse models.
3+
"""
4+
5+
# Generated by Django 5.2.11 on 2026-02-13 21:47
6+
import logging
7+
8+
from django.conf import settings
9+
from django.db import migrations
10+
from organizations.api import ensure_organization, exceptions as org_exceptions
11+
12+
log = logging.getLogger(__name__)
13+
14+
# https://github.com/openedx/openedx-platform/issues/38036
15+
NORMALIZE_LANGUAGE_CODES = {
16+
"zh-hans": "zh-cn",
17+
"zh-hant": "zh-hk",
18+
"ca@valencia": "ca-es-valencia",
19+
}
20+
21+
22+
def backfill_openedx_catalog(apps, schema_editor) -> None:
23+
"""
24+
Populate the new CourseRun and CatalogCourse models.
25+
"""
26+
CourseOverview = apps.get_model("course_overviews", "CourseOverview")
27+
CatalogCourse = apps.get_model("openedx_catalog", "CatalogCourse")
28+
CourseRun = apps.get_model("openedx_catalog", "CourseRun")
29+
30+
created_catalog_course_ids: set[int] = set()
31+
all_course_runs = CourseOverview.objects.order_by("-created")
32+
for course_overview in all_course_runs:
33+
course_key = course_overview.id
34+
org_code: str = course_key.org
35+
course_code: str = course_key.course
36+
run_code: str = course_key.run
37+
38+
# Ensure that the Organization exists.
39+
try:
40+
org_data = ensure_organization(org_code)
41+
except org_exceptions.InvalidOrganizationException as exc:
42+
# Note: IFF the org exists among the modulestore courses but not in the Organizations database table,
43+
# and if auto-create is disabled (it's enabled by default), this will raise InvalidOrganizationException. It
44+
# would be up to the operator to decide how they want to resolve that.
45+
raise ValueError(
46+
f'The organization short code "{org_code}" exists in modulestore ({course_key}) but '
47+
"not the Organizations table, and auto-creating organizations is disabled. You can resolve this by "
48+
"creating the Organization manually (e.g. from the Django admin) or turning on auto-creation. "
49+
"You can set active=False to prevent this Organization from being used other than for historical data. "
50+
) from exc
51+
if org_data["short_name"] != org_code:
52+
# On most installations, the 'short_name' database column is case insensitive (unfortunately)
53+
log.warning(
54+
'The course with ID "%s" does not match its Organization.short_name "%s"',
55+
course_key,
56+
org_data["short_name"],
57+
)
58+
59+
# Fetch the CourseOverview if it exists
60+
try:
61+
course_overview = CourseOverview.objects.get(id=course_key)
62+
except CourseOverview.DoesNotExist:
63+
course_overview = None # Course exists in modulestore but details aren't cached into CourseOverview yet
64+
title: str = (course_overview.display_name if course_overview else None) or course_code
65+
66+
# Determine the course language.
67+
# Note that in Studio, the options for course language generally came from the ALL_LANGUAGES setting, which is
68+
# mostly two-letter language codes with no locale, except it uses "zh_HANS" for Mandarin and "zh_HANT" for
69+
# Cantonese. We normalize those to "zh-cn" and "zh-hk" for consistency with our platform UI languages /
70+
# Transifex, but you can still access the "old" version using the CatalogCourse.language_short
71+
# getter/setter for backwards compatbility. See https://github.com/openedx/openedx-platform/issues/38036
72+
language = settings.LANGUAGE_CODE
73+
if course_overview and course_overview.language:
74+
language = course_overview.language.lower()
75+
language = language.replace("_", "-") # Ensure we use hyphens for consistency (`en-us` not `en_us`)
76+
# Normalize this language code. The previous/non-normalized code will still be available via the
77+
# "language_short" property for backwards compatibility.
78+
language = NORMALIZE_LANGUAGE_CODES.get(language, language)
79+
if len(language) > 2 and language[2] != "-":
80+
# This seems like an invalid value; revert to the default:
81+
log.warning(
82+
'The course with ID "%s" has invalid language "%s" - using default language "%s" instead.',
83+
course_key,
84+
language,
85+
settings.LANGUAGE_CODE,
86+
)
87+
language = settings.LANGUAGE_CODE
88+
89+
# Ensure that the CatalogCourse exists.
90+
cc, cc_created = CatalogCourse.objects.get_or_create(
91+
org_id=org_data["id"],
92+
course_code=course_code,
93+
defaults={
94+
# The default title for the catalog course will be the same name as the newest run, since we iterate
95+
# over "all_course_runs" in "-created" order.
96+
"title": title,
97+
"language": language,
98+
},
99+
)
100+
if cc_created:
101+
created_catalog_course_ids.add(cc.pk)
102+
103+
if cc.course_code != course_code:
104+
raise ValueError(
105+
f"The course {course_key} exists in modulestore with a different capitalization of its "
106+
f'course code compared to other instances of the same run ("{course_code}" vs "{cc.course_code}"). '
107+
"This really should not happen. To fix it, delete the inconsistent course runs (!). "
108+
)
109+
110+
# Create the CourseRun
111+
new_run, run_created = CourseRun.objects.get_or_create(
112+
catalog_course=cc,
113+
run_code=run_code,
114+
course_key=course_key,
115+
defaults={"title": title},
116+
)
117+
118+
# Correct the "created" timestamp. Since it has auto_now_add=True, we can't set its value except using update()
119+
# The CourseOverview should have the "created" date unless it's missing or the course was created before
120+
# the CourseOverview model existed. In any case, it should be good enough. Otherwise use the default (now).
121+
if course_overview:
122+
if course_overview.created < cc.created and cc.pk in created_catalog_course_ids:
123+
# Use the 'created' date from the oldest course run that we process.
124+
CatalogCourse.objects.filter(pk=cc.pk).update(created=course_overview.created)
125+
if run_created:
126+
CourseRun.objects.filter(pk=new_run.pk).update(created=course_overview.created)
127+
128+
129+
class Migration(migrations.Migration):
130+
dependencies = [
131+
("openedx_catalog", "0001_initial"),
132+
("course_overviews", "0029_alter_historicalcourseoverview_options"),
133+
("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"),
134+
]
135+
136+
operations = [
137+
migrations.RunPython(backfill_openedx_catalog, reverse_code=migrations.RunPython.noop),
138+
]

openedx/core/djangoapps/content/course_overviews/signals.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
Signal handler for invalidating cached course overviews
33
"""
44

5-
65
import logging
76

87
from django.db import transaction
98
from django.db.models.signals import post_save
109
from django.dispatch import Signal
1110
from django.dispatch.dispatcher import receiver
1211

12+
from openedx_catalog import api as catalog_api
13+
from openedx_catalog.models_api import CourseRun
1314
from openedx.core.djangoapps.signals.signals import COURSE_CERT_DATE_CHANGE
1415
from xmodule.data import CertificatesDisplayBehaviors
1516
from xmodule.modulestore.django import SignalHandler
@@ -33,6 +34,8 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable
3334
"""
3435
Catches the signal that a course has been published in Studio and updates the corresponding CourseOverview cache
3536
entry.
37+
38+
Also sync course data to the openedx_catalog CourseRun model.
3639
"""
3740
try:
3841
previous_course_overview = CourseOverview.objects.get(id=course_key)
@@ -41,6 +44,51 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable
4144
updated_course_overview = CourseOverview.load_from_module_store(course_key)
4245
_check_for_course_changes(previous_course_overview, updated_course_overview)
4346

47+
# Currently, SplitModulestoreCourseIndex is the ultimate source of truth for
48+
# which courses exist. When a course is published, we sync that data to
49+
# CourseOverview, and from CourseOverview to CourseRun.
50+
51+
# In the future, CourseRun will be the "source of truth" and each CourseRun
52+
# may optionally point to content and get synced to CourseOverview.
53+
54+
# Ensure a CourseRun exists for this course
55+
try:
56+
course_run = catalog_api.get_course_run(course_key)
57+
except CourseRun.DoesNotExist:
58+
# Presumably this is a newly-created course. Create the CourseRun.
59+
course_run = catalog_api.create_course_run_for_modulestore_course_with(
60+
course_key=course_key,
61+
title=updated_course_overview.display_name,
62+
language_short=updated_course_overview.language,
63+
)
64+
65+
# Keep the CourseRun up to date as the course is edited:
66+
if updated_course_overview.display_name != course_run.title:
67+
catalog_api.sync_course_run_details(course_key, title=updated_course_overview.display_name)
68+
# If this course is the only run in the CatalogCourse, should we update the title of
69+
# the CatalogCourse to match the run's new title? Currently the only way to edit the name of
70+
# a CatalogCourse is via the Django admin. But it's also not used anywhere yet.
71+
72+
if (
73+
updated_course_overview.language
74+
and updated_course_overview.language != course_run.catalog_course.language_short
75+
):
76+
if course_run.catalog_course.runs.count() == 1:
77+
# This is the only run in this CatalogCourse. Update the language of the CatalogCourse
78+
catalog_api.update_catalog_course(
79+
course_run.catalog_course,
80+
language_short=updated_course_overview.language,
81+
)
82+
else:
83+
LOG.warning(
84+
'Course run "%s" language "%s" does not match its catalog course language, "%s"',
85+
str(course_key),
86+
updated_course_overview.language,
87+
course_run.catalog_course.language_short,
88+
)
89+
90+
# In the future, this will also sync schedule and other metadata to the CourseRun's related models
91+
4492

4593
@receiver(SignalHandler.course_deleted)
4694
def _listen_for_course_delete(sender, course_key, **kwargs): # pylint: disable=unused-argument
@@ -56,6 +104,16 @@ def _listen_for_course_delete(sender, course_key, **kwargs): # pylint: disable=
56104
sender=None,
57105
courserun_key=courserun_key,
58106
)
107+
# Delete the openedx_catalog CourseRun to keep it in sync:
108+
try:
109+
course_run_obj = catalog_api.get_course_run(course_key)
110+
except CourseRun.DoesNotExist:
111+
pass
112+
else:
113+
catalog_course = course_run_obj.catalog_course
114+
catalog_api.delete_course_run(course_key)
115+
if catalog_course.runs.count() == 0:
116+
catalog_api.delete_catalog_course(catalog_course)
59117

60118

61119
@receiver(post_save, sender=CourseOverview)

0 commit comments

Comments
 (0)