-
Notifications
You must be signed in to change notification settings - Fork 4.3k
feat: Use openedx_catalog app, backfill its CourseRuns [FC-0117] #38023
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 14 commits
97e1631
73f2ad1
133d4b0
24c5120
1970173
953637d
c569920
84e3acf
2902fe8
be726d3
a54bbaa
8c0e9e0
92d6b20
d0e557c
5d2ec4b
972d09b
f6a9d6c
7d0ba14
9dcd146
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| """ | ||
| Data migration to populate the new CourseRun and CatalogCourse models. | ||
| """ | ||
|
|
||
| # Generated by Django 5.2.11 on 2026-02-13 21:47 | ||
| import logging | ||
|
|
||
| from django.conf import settings | ||
| from django.db import migrations | ||
| from organizations.api import ensure_organization, exceptions as org_exceptions | ||
|
|
||
| log = logging.getLogger(__name__) | ||
|
|
||
| # https://github.com/openedx/openedx-platform/issues/38036 | ||
| NORMALIZE_LANGUAGE_CODES = { | ||
| "zh-hans": "zh-cn", | ||
| "zh-hant": "zh-hk", | ||
| "ca@valencia": "ca-es-valencia", | ||
| } | ||
|
|
||
|
|
||
| def backfill_openedx_catalog(apps, schema_editor): | ||
| """ | ||
| Populate the new CourseRun and CatalogCourse models. | ||
| """ | ||
| # CourseOverview is a cache model derived from modulestore; modulestore is the source of truth for courses, so we'll | ||
| # use it to get the list of "all courses on the system" to populate the new CourseRun and CatalogCourse models. | ||
| CourseIndex = apps.get_model("split_modulestore_django", "SplitModulestoreCourseIndex") | ||
| CourseOverview = apps.get_model("course_overviews", "CourseOverview") | ||
| CatalogCourse = apps.get_model("openedx_catalog", "CatalogCourse") | ||
| CourseRun = apps.get_model("openedx_catalog", "CourseRun") | ||
|
|
||
| created_catalog_course_ids: set[int] = set() | ||
| all_course_runs = CourseIndex.objects.filter(base_store="mongodb", library_version="").order_by("-pk") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lobotomized Old Mongo courses still exist for the purposes of displaying in the Learner Dashboard, reporting enrollments, etc. That's unlikely to change because there are grades and credentials associated with that user data. I think we should derive the list of course runs only from CourseOverview.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (I mean, I guess we could do it from the mixed modulestore interface, but I assume you wanted to use the model because it's fast.)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I'll change it to just use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated.
We really need to fix our devstack to include useful default courses including something like this, if we're expected to support it indefinitely. |
||
| for course_idx in all_course_runs: | ||
| org_code: str = course_idx.course_id.org | ||
| course_code: str = course_idx.course_id.course | ||
| run_code: str = course_idx.course_id.run | ||
|
|
||
| # Ensure that the Organization exists. | ||
| try: | ||
| org_data = ensure_organization(org_code) | ||
| except org_exceptions.InvalidOrganizationException as exc: | ||
| # Note: IFF the org exists among the modulestore courses but not in the Organizations database table, | ||
| # and if auto-create is disabled (it's enabled by default), this will raise InvalidOrganizationException. It | ||
| # would be up to the operator to decide how they want to resolve that. | ||
| raise ValueError( | ||
| f'The organization short code "{org_code}" exists in modulestore ({course_idx.course_id}) but ' | ||
| "not the Organizations table, and auto-creating organizations is disabled. You can resolve this by " | ||
| "creating the Organization manually (e.g. from the Django admin) or turning on auto-creation. " | ||
| "You can set active=False to prevent this Organization from being used other than for historical data. " | ||
| ) from exc | ||
|
Comment on lines
+45
to
+50
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a great error message, thank you. |
||
| if org_data["short_name"] != org_code: | ||
| # On most installations, the 'short_name' database column is case insensitive (unfortunately) | ||
| log.warning( | ||
| 'The course with ID "%s" does not match its Organization.short_name "%s"', | ||
| course_idx.course_id, | ||
| org_data["short_name"], | ||
| ) | ||
|
|
||
| # Fetch the CourseOverview if it exists | ||
| try: | ||
| course_overview = CourseOverview.objects.get(id=course_idx.course_id) | ||
| except CourseOverview.DoesNotExist: | ||
| course_overview = None # Course exists in modulestore but details aren't cached into CourseOverview yet | ||
| display_name: str = (course_overview.display_name if course_overview else None) or course_code | ||
|
|
||
| # Determine the course language. | ||
| # Note that in Studio, the options for course language generally came from the ALL_LANGUAGES setting, which is | ||
| # mostly two-letter language codes with no locale, except it uses "zh_HANS" for Mandarin and "zh_HANT" for | ||
| # Cantonese. We normalize those to "zh-cn" and "zh-hk" for consistency with our platform UI languages / | ||
| # Transifex, but you can still access the "old" version using the CatalogCourse.language_short | ||
| # getter/setter for backwards compatbility. See https://github.com/openedx/openedx-platform/issues/38036 | ||
| language = settings.LANGUAGE_CODE | ||
| if course_overview and course_overview.language: | ||
| language = course_overview.language.lower() | ||
| language = language.replace("_", "-") # Ensure we use hyphens for consistency (`en-us` not `en_us`) | ||
| # Normalize this language code. The previous/non-normalized code will still be available via the | ||
| # "language_short" property for backwards compatibility. | ||
| language = NORMALIZE_LANGUAGE_CODES.get(language, language) | ||
| if len(language) > 2 and language[2] != "-": | ||
| # This seems like an invalid value; revert to the default: | ||
| log.warning( | ||
| 'The course with ID "%s" has invalid language "%s" - using default language "%s" instead.', | ||
| course_idx.course_id, | ||
| language, | ||
| settings.LANGUAGE_CODE, | ||
| ) | ||
| language = settings.LANGUAGE_CODE | ||
|
|
||
| # Ensure that the CatalogCourse exists. | ||
| cc, cc_created = CatalogCourse.objects.get_or_create( | ||
| org_id=org_data["id"], | ||
| course_code=course_code, | ||
| defaults={ | ||
| # The default display_name for the catalog course will be the same name as the newest run, since we | ||
| # iterate over "all_course_runs" in "-pk" order (should be same as reverse chronological) | ||
| "display_name": display_name, | ||
| "language": language, | ||
| }, | ||
| ) | ||
| if cc_created: | ||
| created_catalog_course_ids.add(cc.pk) | ||
|
|
||
| if cc.course_code != course_code: | ||
| raise ValueError( | ||
| f"The course {course_idx.course_id} exists in modulestore with a different capitalization of its " | ||
| f'course code compared to other instances of the same run ("{course_code}" vs "{cc.course_code}"). ' | ||
| "This really should not happen. To fix it, delete the inconsistent course runs (!). " | ||
| ) | ||
|
|
||
| # Create the CourseRun | ||
| new_run, run_created = CourseRun.objects.get_or_create( | ||
| catalog_course=cc, | ||
| run_code=run_code, | ||
| course_key=course_idx.course_id, | ||
| defaults={"display_name": display_name}, | ||
| ) | ||
|
|
||
| # Correct the "created" timestamp. Since it has auto_now_add=True, we can't set its value except using update() | ||
| # The CourseOverview should have the "created" date unless it's missing or the course was created before | ||
| # the CourseOverview model existed. In any case, it should be good enough. Otherwise use the default (now). | ||
| if course_overview: | ||
| if course_overview.created < cc.created and cc.pk in created_catalog_course_ids: | ||
| # Use the 'created' date from the oldest course run that we process. | ||
| CatalogCourse.objects.filter(pk=cc.pk).update(created=course_overview.created) | ||
| if run_created: | ||
| CourseRun.objects.filter(pk=new_run.pk).update(created=course_overview.created) | ||
|
|
||
|
|
||
| class Migration(migrations.Migration): | ||
| dependencies = [ | ||
| ("openedx_catalog", "0001_initial"), | ||
| ("course_overviews", "0029_alter_historicalcourseoverview_options"), | ||
| ("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"), | ||
| ] | ||
|
|
||
| operations = [ | ||
| migrations.RunPython(backfill_openedx_catalog, reverse_code=migrations.RunPython.noop), | ||
| ] | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🤯