Skip to content

Commit 12e9af4

Browse files
fix!: split modulestore's has_course(ignore_case=True) was not working (#38044)
BREAKING CHANGE: this forces course IDs in modulestore to be unique (case insensitive). This was always supposed to be the case, but it wasn't working properly on MySQL. Upgrading past this commit may cause a migration failure if you have conflicting course IDs - see the migration 0004 docstring for details.
1 parent 3e522d5 commit 12e9af4

5 files changed

Lines changed: 117 additions & 12 deletions

File tree

cms/djangoapps/contentstore/tests/test_contentstore.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,6 +1275,7 @@ def assert_course_creation_failed(self, error_message):
12751275
resp = self.client.ajax_post('/course/', self.course_data)
12761276
self.assertEqual(resp.status_code, 200)
12771277
data = parse_json(resp)
1278+
assert 'ErrMsg' in data, "Expected the course creation to fail"
12781279
self.assertRegex(data['ErrMsg'], error_message)
12791280
if test_enrollment:
12801281
# One test case involves trying to create the same course twice. Hence for that course,
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Generated by Django 5.2.11 on 2026-02-19 23:23
2+
3+
import django.db.models.functions.text
4+
from django.db import migrations, models
5+
import opaque_keys.edx.django.models
6+
7+
8+
class Migration(migrations.Migration):
9+
"""
10+
Force course_id to be case-insensitively unique.
11+
12+
Note: due to a bug we had for several years, it's possible that this
13+
migration will fail because you have some duplicate entries in the
14+
split_modulestore_django_splitmodulestorecourseindex database table that
15+
differ only in case. Note that other parts of the system (like
16+
CourseOverview) are case-insensitive so only allow one version of the course
17+
ID to be stored, but this table would allow multiple if they differ in case.
18+
Such courses would likely be broken/buggy because so many other parts of the
19+
system use a case-insensitive course_id.
20+
21+
So if you encounter this issue and the migration fails due to a duplicate:
22+
Try to figure out which is the "correct" ID (e.g. the one used in the
23+
CourseOverview, CourseEnrollment, and CoursewareStudentModule tables) and
24+
then delete the "incorrect" ID from this table. You can easily do
25+
this from the Django admin at:
26+
(studio)/admin/split_modulestore_django/splitmodulestorecourseindex/
27+
28+
Be sure to take a screenshot or record the `objectid`, `draft_version`,
29+
`published_version`, and `wiki_slug` fields before you do so, so that you
30+
can reverse the deletion if needed.
31+
32+
Deleting rows from this table will NOT delete any actual course content from
33+
MongoDB nor cascade the deletion to any other rows in other MySQL tables; it
34+
just removes a duplicate record that points to [unused?] course data.
35+
"""
36+
37+
dependencies = [
38+
("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"),
39+
]
40+
41+
operations = [
42+
# Make 'course_id' case-insensitively unique (in addition to its existing case-sensitively unique index)
43+
migrations.AddConstraint(
44+
model_name="splitmodulestorecourseindex",
45+
constraint=models.UniqueConstraint(
46+
django.db.models.functions.text.Lower("course_id"),
47+
name="splitmodulestorecourseindex_courseid_unique_ci",
48+
),
49+
),
50+
# Mark the 'course_id' field as 'case_sensitive=True' now that LearningContextKeyField supports it upstream.
51+
# This part of the migration should have no effect on the database, as migration 0001 already explicitly set a
52+
# case-sensitive collation. We just now have a way to indicate that in the model field definition.
53+
migrations.AlterField(
54+
model_name="splitmodulestorecourseindex",
55+
name="course_id",
56+
field=opaque_keys.edx.django.models.LearningContextKeyField(
57+
case_sensitive=True, max_length=255, unique=True
58+
),
59+
),
60+
migrations.AlterField(
61+
model_name="historicalsplitmodulestorecourseindex",
62+
name="course_id",
63+
field=opaque_keys.edx.django.models.LearningContextKeyField(
64+
case_sensitive=True, db_index=True, max_length=255
65+
),
66+
),
67+
]

common/djangoapps/split_modulestore_django/models.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,13 @@ class SplitModulestoreCourseIndex(models.Model):
3434
# For compatibility with MongoDB, each course index must have an ObjectId. We still have an integer primary key too.
3535
objectid = models.CharField(max_length=24, null=False, blank=False, unique=True)
3636

37-
# The ID of this course (or library). Must start with "course-v1:" or "library-v1:"
38-
course_id = LearningContextKeyField(max_length=255, db_index=True, unique=True, null=False)
37+
# course_id: The ID of this course (or library). Must start with "course-v1:" or "library-v1:"
38+
# This is case-sensitive; however, many other parts of the system aren't case sensitive, so we add an explicit index
39+
# on Lower(course_id) to make this case-insensitively unique as well.
40+
# So: (1) queries of course_id by default are case-sensitive. (2) queries that want to be case-insensitive need to
41+
# explicitly compare Lower(course_id) with the lowercase key in question. (3) Course IDs that differ only in case
42+
# are prohibited.
43+
course_id = LearningContextKeyField(case_sensitive=True, unique=True, null=False)
3944
# Extract the "org" value from the course_id key so that we can search by org.
4045
# This gets set automatically by clean()
4146
org = models.CharField(max_length=255, db_index=True)
@@ -81,6 +86,13 @@ def __str__(self):
8186
class Meta:
8287
ordering = ["course_id"]
8388
verbose_name_plural = "Split modulestore course indexes"
89+
constraints = [
90+
# Explicitly force "course_id" to be case-insensitively unique
91+
models.UniqueConstraint(
92+
models.functions.Lower("course_id"),
93+
name="splitmodulestorecourseindex_courseid_unique_ci",
94+
),
95+
]
8496

8597
def as_v1_schema(self):
8698
""" Return in the same format as was stored in MongoDB """
@@ -160,6 +172,6 @@ def clean(self):
160172
def save(self, *args, **kwargs):
161173
""" Save this model """
162174
# Override to ensure that full_clean()/clean() is always called, so that the checks in clean() above are run.
163-
# But don't validate_unique(), it just runs extra queries and the database enforces it anyways.
164-
self.full_clean(validate_unique=False)
175+
# But don't run validations; they just run extra queries and the database enforces them anyways.
176+
self.full_clean(validate_unique=False, validate_constraints=False)
165177
return super().save(*args, **kwargs)

common/djangoapps/split_modulestore_django/tests/test_models.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22
from datetime import datetime
33

44
from bson.objectid import ObjectId
5+
from django.db import IntegrityError, transaction
56
from django.test import TestCase
67
from opaque_keys.edx.keys import CourseKey
8+
import pytest
79

810
from common.djangoapps.split_modulestore_django.models import SplitModulestoreCourseIndex
911
from xmodule.modulestore import ModuleStoreEnum # lint-amnesty, pylint: disable=wrong-import-order
@@ -16,12 +18,22 @@ def test_course_id_case_sensitive(self):
1618
"""
1719
Make sure the course_id column is case sensitive.
1820
21+
2021 note:
22+
1923
Although the platform code generally tries to prevent having two courses whose IDs differ only by case
2024
(e.g. https://git.io/J6voR , note `ignore_case=True`), we found at least one pair of courses on stage that
2125
differs only by case in its `org` ID (`edx` vs `edX`). So for backwards compatibility with MongoDB and to avoid
2226
issues for anyone else with similar course IDs that differ only by case, we've made the new version case
2327
sensitive too. The system still tries to prevent creation of courses that differ only by course (that hasn't
2428
changed), but now the MySQL version won't break if that has somehow happened.
29+
30+
2026 note:
31+
32+
Due to a serious bug where the system was NOT preventing duplicate courses from being created on MySQL, we
33+
decided to tighten this up and introduce a new UNIQUE(LOWER(course_id)) index to prevent duplicates at the
34+
database level. This does mean that the migration will fail if anyone has such duplicates in their system, but
35+
those duplicates are going to have other bugs anyways; in such cases we recommend deleting one of the duplicate
36+
SplitModulestoreCourseIndex entries, or changing its course_id to a dummy value.
2537
"""
2638
course_index_common = {
2739
"course": "TL101",
@@ -38,8 +50,16 @@ def test_course_id_case_sensitive(self):
3850
data1 = SplitModulestoreCourseIndex.fields_from_v1_schema(course_index_1)
3951
data2 = SplitModulestoreCourseIndex.fields_from_v1_schema(course_index_2)
4052
SplitModulestoreCourseIndex(**data1).save()
41-
# This next line will fail if the course_id column is not case-sensitive:
42-
SplitModulestoreCourseIndex(**data2).save()
43-
# Also check deletion, to ensure the course_id historical record is not unique or case sensitive:
44-
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string("course-v1:edx+TL101+2015")).delete()
45-
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string("course-v1:edX+TL101+2015")).delete()
53+
# This next line should fail, because the database itself prevents duplicates:
54+
with pytest.raises(IntegrityError):
55+
with transaction.atomic():
56+
SplitModulestoreCourseIndex(**data2).save()
57+
58+
id_1 = "course-v1:edx+TL101+2015"
59+
id_2 = "course-v1:edX+TL101+2015"
60+
61+
# Retrieving a course by its exact course_id should work:
62+
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string(id_1))
63+
# but retrieving a course with the wrong case should throw an error:
64+
with pytest.raises(SplitModulestoreCourseIndex.DoesNotExist):
65+
SplitModulestoreCourseIndex.objects.get(course_id=CourseKey.from_string(id_2))

xmodule/modulestore/split_mongo/mongo_connection.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515

1616
from ccx_keys.locator import CCXLocator
1717
from django.core.cache import caches, InvalidCacheBackendError
18+
from django.db.models import F
19+
from django.db.models.functions import Lower
20+
from django.db.models.lookups import Exact
1821
from django.db.transaction import TransactionManagementError
1922
import pymongo
2023
# Import this just to export it
@@ -659,13 +662,15 @@ def get_course_index(self, key, ignore_case=False):
659662
# We never include the branch or the version in the course key in the SplitModulestoreCourseIndex table:
660663
key = key.for_branch(None).version_agnostic()
661664
if not ignore_case:
662-
query = {"course_id": key}
665+
query_expr = Exact(F("course_id"), str(key))
663666
else:
664667
# Case insensitive search is important when creating courses to reject course IDs that differ only by
665668
# capitalization.
666-
query = {"course_id__iexact": key}
669+
# WARNING: 'course_id__iexact=key' does not work on this table as it uses a case-sensitive collation.
670+
# We need to use the following explicit lowercase comparison in order to correctly query:
671+
query_expr = Exact(Lower("course_id"), str(key).lower())
667672
try:
668-
return SplitModulestoreCourseIndex.objects.get(**query).as_v1_schema()
673+
return SplitModulestoreCourseIndex.objects.get(query_expr).as_v1_schema()
669674
except SplitModulestoreCourseIndex.DoesNotExist:
670675
# The mongo implementation does not retrieve by string key; it retrieves by (org, course, run) tuple.
671676
# As a result, it will handle read requests for a CCX key like

0 commit comments

Comments
 (0)