Skip to content

Commit d5a273c

Browse files
authored
feat!: Expand codejail darklaunch normalizers; append by default (#36682)
For darklaunch comparisons where the two sides have different Python versions, we'll want a more comprehensive list of normalizers. - Expand the default list to include patterns discovered during a Python 3.8 vs. 3.12 comparison. - Append the setting value by default, rather than replacing (but still allow replacing). - Use default normalizers if custom ones can't be loaded. - Add log message when loading normalizers fails. - Validate the replacement pattern, not just the search pattern.
1 parent ad156c0 commit d5a273c

2 files changed

Lines changed: 130 additions & 34 deletions

File tree

xmodule/capa/safe_exec/safe_exec.py

Lines changed: 80 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,24 @@ def safe_exec(
291291
raise exception
292292

293293

294+
def _compile_normalizers(normalizer_setting):
295+
"""
296+
Compile emsg normalizer search/replace pairs into regex.
297+
298+
Raises exception on bad settings.
299+
"""
300+
compiled = []
301+
for pair in normalizer_setting:
302+
search = re.compile(assert_type(pair['search'], str))
303+
replace = assert_type(pair['replace'], str)
304+
305+
# Test the replacement string (might contain errors)
306+
re.sub(search, replace, "example")
307+
308+
compiled.append({'search': search, 'replace': replace})
309+
return compiled
310+
311+
294312
@lru_cache(maxsize=1)
295313
def emsg_normalizers():
296314
"""
@@ -299,38 +317,77 @@ def emsg_normalizers():
299317
The output is like the setting value, except the 'search' patterns have
300318
been compiled.
301319
"""
302-
default = [
320+
default_setting = [
303321
{
304322
# Character range should be at least as broad as what Python's `tempfile` uses.
305323
'search': r'/tmp/codejail-[0-9a-zA-Z_]+',
306324
'replace': r'/tmp/codejail-<SANDBOX_DIR_NAME>',
307325
},
326+
327+
# These are useful for eliding differences in environments due to Python version:
328+
329+
{
330+
# Python 3.8 doesn't include the dir here, but Python 3.12
331+
# does. Normalize to the 3.8 version.
332+
'search': r'File "/tmp/codejail-<SANDBOX_DIR_NAME>/jailed_code"',
333+
'replace': r'File "jailed_code"'
334+
},
335+
{
336+
# Python version shows up in stack traces in the virtualenv paths
337+
'search': r'python3\.[0-9]+',
338+
'replace': r'python3.XX'
339+
},
340+
{
341+
# Line numbers in stack traces differ between Python versions
342+
'search': r', line [0-9]+, in ',
343+
'replace': r', line XXX, in '
344+
},
345+
{
346+
# Some time after 3.8, Python started adding '^^^' indicators to stack traces
347+
'search': r'\\n\s*\^+\s*\\n',
348+
'replace': r'\\n'
349+
},
350+
{
351+
# Python3.8 had these <listcomp> stack trace elements but 3.12 does not
352+
'search': r'\\n File "[^"]+", line [0-9]+, in <listcomp>\\n',
353+
'replace': r'\\n'
354+
},
308355
]
356+
default_normalizers = _compile_normalizers(default_setting)
357+
358+
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
359+
# .. setting_default: []
360+
# .. setting_description: A list of patterns to search and replace in codejail error
361+
# messages during comparison in codejail-service darklaunch. Each entry is a dict
362+
# of 'search' (a regular expression string) and 'replace' (the replacement string).
363+
# Deployers may also need to add a search/replace pair for the location of the sandbox
364+
# virtualenv, or any other paths that show up in stack traces.
365+
# .. setting_warning: Note that `replace' is a pattern, allowing for
366+
# backreferences. Any backslashes in the replacement pattern that are not
367+
# intended as backreferences should be escaped as `\\`.
368+
# The default list suppresses differences due to the randomly-named sandboxes
369+
# or to differences due to Python version. See setting
370+
# ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE`` for information on how
371+
# this setting interacts with the defaults.
372+
custom_setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', [])
309373
try:
310-
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
311-
# .. setting_default: (see description)
312-
# .. setting_description: A list of patterns to search and replace in codejail error
313-
# messages during comparison in codejail-service darklaunch. Each entry is a dict
314-
# of 'search' (a regular expression string) and 'replace' (the replacement string).
315-
# The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+',
316-
# the directory structure codejail uses for its random-named sandboxes. Deployers
317-
# may also need to add a search/replace pair for the location of the sandbox
318-
# virtualenv, or any other paths that show up in stack traces.
319-
# .. setting_warning: Note that `replace' is a pattern, allowing for
320-
# backreferences. Any backslashes in the replacement pattern that are not
321-
# intended as backreferences should be escaped as `\\`.
322-
setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default)
323-
324-
compiled = []
325-
for pair in setting:
326-
compiled.append({
327-
'search': re.compile(assert_type(pair['search'], str)),
328-
'replace': assert_type(pair['replace'], str),
329-
})
330-
return compiled
374+
custom_normalizers = _compile_normalizers(custom_setting)
331375
except BaseException as e:
376+
log.error("Could not load custom codejail darklaunch emsg normalizers")
332377
record_exception()
333-
return []
378+
return default_normalizers
379+
380+
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE
381+
# .. setting_default: 'append'
382+
# .. setting_description: How to combine ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS``
383+
# with the defaults. If the value is 'replace', the defaults will be replaced
384+
# with the specified patterns. If the value is 'append' (the default), the
385+
# specified replacements will be run after the defaults.
386+
combine = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE', 'append')
387+
if combine == 'replace':
388+
return custom_normalizers
389+
else: # 'append', or unknown
390+
return default_normalizers + custom_normalizers
334391

335392

336393
def normalize_error_message(emsg):

xmodule/capa/safe_exec/tests/test_safe_exec.py

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -370,32 +370,71 @@ def remote_exec(data):
370370
assert isinstance(results['raised'], SafeExecException)
371371
assert 'whatever.py' in repr(results['raised'])
372372

373+
def test_default_normalizers(self):
374+
"""
375+
Default normalizers handle false mismatches we've observed.
376+
377+
This just provides coverage for some of the more complicated patterns.
378+
"""
379+
side_1 = (
380+
'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback'
381+
' (most recent call last):\\n File "/tmp/codejail-9g9715g_/jailed_code"'
382+
', line 19, in <module>\\n exec(code, g_dict)\\n File "<string>"'
383+
', line 1, in <module>\\n File "<string>", line 89, in test_add\\n'
384+
' File "<string>", line 1\\n import random random.choice(range(10))'
385+
'\\n ^\\nSyntaxError: invalid syntax\\n\' with status code: 1'
386+
)
387+
side_2 = (
388+
'Couldn\'t execute jailed code: stdout: b\'\', stderr: b\'Traceback'
389+
' (most recent call last):\\n File "jailed_code"'
390+
', line 19, in <module>\\n exec(code, g_dict)\\n File "<string>"'
391+
', line 203, in <module>\\n File "<string>", line 89, in test_add\\n'
392+
' File "<string>", line 1\\n import random random.choice(range(10))'
393+
'\\n ^^^^^^\\nSyntaxError: invalid syntax\\n\' with status code: 1'
394+
)
395+
assert normalize_error_message(side_1) == normalize_error_message(side_2)
396+
373397
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
374-
{
375-
'search': r'/tmp/codejail-[0-9a-zA-Z]+',
376-
'replace': r'/tmp/codejail-<RAND>',
377-
},
378398
{
379399
'search': r'[0-9]+',
380400
'replace': r'<NUM>',
381401
},
382402
])
383403
def test_configurable_normalizers(self):
384-
"""We can override the normalizers, and they run in order."""
404+
"""We can augment the normalizers, and they run in order."""
405+
emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
406+
expect_out = "Error in /tmp/codejail-<SANDBOX_DIR_NAME>/whatever.py: something <NUM> <NUM> other"
407+
assert expect_out == normalize_error_message(emsg_in)
408+
409+
@override_settings(
410+
CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
411+
{
412+
'search': r'[0-9]+',
413+
'replace': r'<NUM>',
414+
},
415+
],
416+
CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE='replace',
417+
)
418+
def test_can_replace_normalizers(self):
419+
"""We can replace the normalizers."""
385420
emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
386-
expect_out = "Error in /tmp/codejail-<RAND>/whatever.py: something <NUM> <NUM> other"
421+
expect_out = "Error in /tmp/codejail-<NUM>abcd/whatever.py: something <NUM> <NUM> other"
387422
assert expect_out == normalize_error_message(emsg_in)
388423

389424
@override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
390425
{
391-
'search': r'broken [',
392-
'replace': r'replace',
426+
'search': r'broken',
427+
'replace': r'replace \g<>', # invalid replacement pattern
393428
},
394429
])
395430
@patch('xmodule.capa.safe_exec.safe_exec.record_exception')
396-
def test_normalizers_validate(self, mock_record_exception):
397-
"""Normalizers are validated, and fall back to empty list on error."""
398-
assert emsg_normalizers() == [] # pylint: disable=use-implicit-booleaness-not-comparison
431+
@patch('xmodule.capa.safe_exec.safe_exec.log.error')
432+
def test_normalizers_validate(self, mock_log_error, mock_record_exception):
433+
"""Normalizers are validated, and fall back to default list on error."""
434+
assert len(emsg_normalizers()) > 0 # pylint: disable=use-implicit-booleaness-not-comparison
435+
mock_log_error.assert_called_once_with(
436+
"Could not load custom codejail darklaunch emsg normalizers"
437+
)
399438
mock_record_exception.assert_called_once()
400439

401440

0 commit comments

Comments
 (0)