Skip to content

Commit a960cdf

Browse files
authored
fix: Add much more codejail darklaunch info; fix remote error bug (#36534)
- Fix bug where we were overwriting `remote_emsg` with None, and add test that would have caught it. - Suppress differences due solely to the codejail sandbox directory name differing (in stack traces), and add test for this. Configurable because we'll need to add an additional search/replace pair for the sandbox venv paths. - Add a variety of custom attributes, replacing existing ones. The attrs now have a prefixed naming scheme to simplify searching. - Add slug to log output so we can more readily correlate traces and logs, as well as logs across services. - Fix typo in error message. - Fix existing import sort order lint.
1 parent 505b4f4 commit a960cdf

2 files changed

Lines changed: 286 additions & 27 deletions

File tree

xmodule/capa/safe_exec/safe_exec.py

Lines changed: 161 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,20 @@
22
import copy
33
import hashlib
44
import logging
5+
import re
6+
from functools import lru_cache
7+
from typing import assert_type
58

69
from codejail.safe_exec import SafeExecException, json_safe
710
from codejail.safe_exec import not_safe_exec as codejail_not_safe_exec
811
from codejail.safe_exec import safe_exec as codejail_safe_exec
12+
from django.conf import settings
13+
from django.dispatch import receiver
14+
from django.test.signals import setting_changed
915
from edx_django_utils.monitoring import function_trace, record_exception, set_custom_attribute
1016

1117
from . import lazymod
12-
from .remote_exec import is_codejail_rest_service_enabled, is_codejail_in_darklaunch, get_remote_exec
18+
from .remote_exec import get_remote_exec, is_codejail_in_darklaunch, is_codejail_rest_service_enabled
1319

1420
log = logging.getLogger(__name__)
1521

@@ -219,6 +225,28 @@ def safe_exec(
219225
# Run the code in both the remote codejail service as well as the local codejail
220226
# when in darklaunch mode.
221227
if is_codejail_in_darklaunch():
228+
# Start adding attributes only once we're in a darklaunch
229+
# comparison, even though these particular ones aren't specific to
230+
# darklaunch. There can be multiple codejail calls per trace, and
231+
# these attrs will overwrite previous values in the same trace. When
232+
# that happens, we need to ensure we overwrite *all* of them,
233+
# otherwise we could end up with inconsistent combinations of values.
234+
235+
# .. custom_attribute_name: codejail.slug
236+
# .. custom_attribute_description: Value of the slug parameter. This
237+
# might be a problem ID, if present.
238+
set_custom_attribute('codejail.slug', slug)
239+
# .. custom_attribute_name: codejail.limit_overrides_context
240+
# .. custom_attribute_description: Value of the limit_overrides_context
241+
# parameter to this code execution. Generally this will be the
242+
# course name, if present at all.
243+
set_custom_attribute('codejail.limit_overrides_context', limit_overrides_context)
244+
# .. custom_attribute_name: codejail.extra_files_count
245+
# .. custom_attribute_description: Number of extra_files included
246+
# in request. This should be 0 or 1, the latter indicating a
247+
# python_lib.zip was present.
248+
set_custom_attribute('codejail.extra_files_count', len(extra_files) if extra_files else 0)
249+
222250
try:
223251
data = {
224252
"code": code_prolog + LAZY_IMPORTS + code,
@@ -230,30 +258,26 @@ def safe_exec(
230258
"extra_files": extra_files,
231259
}
232260
with function_trace('safe_exec.remote_exec_darklaunch'):
261+
# Ignore the returned exception, because it's just a
262+
# SafeExecException wrapped around emsg (if present).
233263
remote_emsg, _ = get_remote_exec(data)
264+
remote_exception = None
234265
except BaseException as e: # pragma: no cover # pylint: disable=broad-except
235266
# Swallow all exceptions and log it in monitoring so that dark launch doesn't cause issues during
236267
# deploy.
237268
remote_emsg = None
238269
remote_exception = e
239-
else:
240-
remote_emsg = None
241-
remote_exception = None
242270

243271
try:
244-
log.info(
245-
f"Remote execution in darklaunch mode produces globals={darklaunch_globals!r}, "
246-
f"emsg={remote_emsg!r}, exception={remote_exception!r}"
247-
)
248272
local_exc_unexpected = None if isinstance(exception, SafeExecException) else exception
249-
log.info(
250-
f"Local execution in darklaunch mode produces globals={globals_dict!r}, "
251-
f"emsg={emsg!r}, exception={local_exc_unexpected!r}")
252-
set_custom_attribute('dark_launch_emsg_match', remote_emsg == emsg)
253-
set_custom_attribute('remote_emsg_exists', remote_emsg is not None)
254-
set_custom_attribute('local_emsg_exists', emsg is not None)
273+
274+
report_darklaunch_results(
275+
slug=slug,
276+
globals_local=globals_dict, emsg_local=emsg, unexpected_exc_local=local_exc_unexpected,
277+
globals_remote=darklaunch_globals, emsg_remote=remote_emsg, unexpected_exc_remote=remote_exception,
278+
)
255279
except BaseException as e: # pragma: no cover # pylint: disable=broad-except
256-
log.exception("Error occurred while trying to report codejail darklauch data.")
280+
log.exception("Error occurred while trying to report codejail darklaunch data.")
257281
record_exception()
258282

259283
# Put the result back in the cache. This is complicated by the fact that
@@ -265,3 +289,125 @@ def safe_exec(
265289
# If an exception happened, raise it now.
266290
if exception:
267291
raise exception
292+
293+
294+
@lru_cache(maxsize=1)
295+
def emsg_normalizers():
296+
"""
297+
Load emsg normalization settings.
298+
299+
The output is like the setting value, except the 'search' patterns have
300+
been compiled.
301+
"""
302+
default = [
303+
{
304+
'search': r'/tmp/codejail-[0-9a-zA-Z]+',
305+
'replace': r'/tmp/codejail-<SANDBOX_DIR_NAME>',
306+
},
307+
]
308+
try:
309+
# .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
310+
# .. setting_default: (see description)
311+
# .. setting_description: A list of patterns to search and replace in codejail error
312+
# messages during comparison in codejail-service darklaunch. Each entry is a dict
313+
# of 'search' (a regular expression string) and 'replace' (the replacement string).
314+
# The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+',
315+
# the directory structure codejail uses for its random-named sandboxes. Deployers
316+
# may also need to add a search/replace pair for the location of the sandbox
317+
# virtualenv, or any other paths that show up in stack traces.
318+
# .. setting_warning: Note that `replace' is a pattern, allowing for
319+
# backreferences. Any backslashes in the replacement pattern that are not
320+
# intended as backreferences should be escaped as `\\`.
321+
setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default)
322+
323+
compiled = []
324+
for pair in setting:
325+
compiled.append({
326+
'search': re.compile(assert_type(pair['search'], str)),
327+
'replace': assert_type(pair['replace'], str),
328+
})
329+
return compiled
330+
except BaseException as e:
331+
record_exception()
332+
return []
333+
334+
335+
def normalize_error_message(emsg):
336+
"""
337+
Remove any uninteresting sources of discrepancy from an emsg.
338+
"""
339+
if emsg is None:
340+
return None
341+
342+
for replacer in emsg_normalizers():
343+
emsg = re.sub(replacer['search'], replacer['replace'], emsg, count=0)
344+
345+
return emsg
346+
347+
348+
def report_darklaunch_results(
349+
*, slug,
350+
globals_local, emsg_local, unexpected_exc_local,
351+
globals_remote, emsg_remote, unexpected_exc_remote,
352+
):
353+
"""Send telemetry for results of darklaunch."""
354+
can_compare_output = True
355+
356+
def report_arm(arm, globals_dict, emsg, unexpected_exception):
357+
nonlocal can_compare_output
358+
if unexpected_exception:
359+
# .. custom_attribute_name: codejail.darklaunch.status.{local,remote}
360+
# .. custom_attribute_description: Outcome of this arm of the
361+
# darklaunch comparison. Values can be 'ok' (normal execution),
362+
# 'safe_error' (submitted code raised an exception), or
363+
# 'unexpected_error' (uncaught error in submitting or evaluating code).
364+
set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'unexpected_error')
365+
# .. custom_attribute_name: codejail.darklaunch.exception.{local,remote}
366+
# .. custom_attribute_description: When the status attribute indicates
367+
# an unexpected error, this is a string representation of the error,
368+
# otherwise None.
369+
set_custom_attribute(f'codejail.darklaunch.exception.{arm}', repr(unexpected_exception))
370+
can_compare_output = False
371+
else:
372+
set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'ok' if emsg is None else 'safe_error')
373+
set_custom_attribute(f'codejail.darklaunch.exception.{arm}', None)
374+
375+
# Logs include full globals and emsg
376+
log.info(
377+
f"Codejail darklaunch {arm} results for slug={slug}: globals={globals_dict!r}, "
378+
f"emsg={emsg!r}, exception={unexpected_exception!r}"
379+
)
380+
381+
report_arm('local', globals_local, emsg_local, unexpected_exc_local)
382+
report_arm('remote', globals_remote, emsg_remote, unexpected_exc_remote)
383+
384+
# If the arms can't be compared (unexpected errors), stop early -- the rest
385+
# is about output comparison.
386+
if not can_compare_output:
387+
set_custom_attribute('codejail.darklaunch.globals_match', 'N/A')
388+
set_custom_attribute('codejail.darklaunch.emsg_match', 'N/A')
389+
return
390+
391+
globals_match = globals_local == globals_remote
392+
emsg_match = normalize_error_message(emsg_local) == normalize_error_message(emsg_remote)
393+
394+
# .. custom_attribute_name: codejail.darklaunch.globals_match
395+
# .. custom_attribute_description: True if local and remote globals_dict
396+
# values match, False otherwise. 'N/A' when either arm raised an
397+
# uncaught error.
398+
set_custom_attribute('codejail.darklaunch.globals_match', globals_match)
399+
# .. custom_attribute_name: codejail.darklaunch.emsg_match
400+
# .. custom_attribute_description: True if the local and remote emsg values
401+
# (errors returned from sandbox) match, False otherwise. Differences due
402+
# to known irrelevant factors are suppressed in this comparison, such as
403+
# the randomized directory names used for sandboxes. 'N/A' when either
404+
# arm raised an uncaught error.
405+
set_custom_attribute('codejail.darklaunch.emsg_match', emsg_match)
406+
407+
408+
@receiver(setting_changed)
409+
def reset_caches(sender, **kwargs):
410+
"""
411+
Reset cached settings during unit tests.
412+
"""
413+
emsg_normalizers.cache_clear()

0 commit comments

Comments
 (0)