@@ -291,6 +291,24 @@ def safe_exec(
291291 raise exception
292292
293293
294+ def _compile_normalizers (normalizer_setting ):
295+ """
296+ Compile emsg normalizer search/replace pairs into regex.
297+
298+ Raises exception on bad settings.
299+ """
300+ compiled = []
301+ for pair in normalizer_setting :
302+ search = re .compile (assert_type (pair ['search' ], str ))
303+ replace = assert_type (pair ['replace' ], str )
304+
305+ # Test the replacement string (might contain errors)
306+ re .sub (search , replace , "example" )
307+
308+ compiled .append ({'search' : search , 'replace' : replace })
309+ return compiled
310+
311+
294312@lru_cache (maxsize = 1 )
295313def emsg_normalizers ():
296314 """
@@ -299,38 +317,77 @@ def emsg_normalizers():
299317 The output is like the setting value, except the 'search' patterns have
300318 been compiled.
301319 """
302- default = [
320+ default_setting = [
303321 {
304322 # Character range should be at least as broad as what Python's `tempfile` uses.
305323 'search' : r'/tmp/codejail-[0-9a-zA-Z_]+' ,
306324 'replace' : r'/tmp/codejail-<SANDBOX_DIR_NAME>' ,
307325 },
326+
327+ # These are useful for eliding differences in environments due to Python version:
328+
329+ {
330+ # Python 3.8 doesn't include the dir here, but Python 3.12
331+ # does. Normalize to the 3.8 version.
332+ 'search' : r'File "/tmp/codejail-<SANDBOX_DIR_NAME>/jailed_code"' ,
333+ 'replace' : r'File "jailed_code"'
334+ },
335+ {
336+ # Python version shows up in stack traces in the virtualenv paths
337+ 'search' : r'python3\.[0-9]+' ,
338+ 'replace' : r'python3.XX'
339+ },
340+ {
341+ # Line numbers in stack traces differ between Python versions
342+ 'search' : r', line [0-9]+, in ' ,
343+ 'replace' : r', line XXX, in '
344+ },
345+ {
346+ # Some time after 3.8, Python started adding '^^^' indicators to stack traces
347+ 'search' : r'\\n\s*\^+\s*\\n' ,
348+ 'replace' : r'\\n'
349+ },
350+ {
351+ # Python3.8 had these <listcomp> stack trace elements but 3.12 does not
352+ 'search' : r'\\n File "[^"]+", line [0-9]+, in <listcomp>\\n' ,
353+ 'replace' : r'\\n'
354+ },
308355 ]
356+ default_normalizers = _compile_normalizers (default_setting )
357+
358+ # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
359+ # .. setting_default: []
360+ # .. setting_description: A list of patterns to search and replace in codejail error
361+ # messages during comparison in codejail-service darklaunch. Each entry is a dict
362+ # of 'search' (a regular expression string) and 'replace' (the replacement string).
363+ # Deployers may also need to add a search/replace pair for the location of the sandbox
364+ # virtualenv, or any other paths that show up in stack traces.
365+ # .. setting_warning: Note that `replace' is a pattern, allowing for
366+ # backreferences. Any backslashes in the replacement pattern that are not
367+ # intended as backreferences should be escaped as `\\`.
368+ # The default list suppresses differences due to the randomly-named sandboxes
369+ # or to differences due to Python version. See setting
370+ # ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE`` for information on how
371+ # this setting interacts with the defaults.
372+ custom_setting = getattr (settings , 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS' , [])
309373 try :
310- # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
311- # .. setting_default: (see description)
312- # .. setting_description: A list of patterns to search and replace in codejail error
313- # messages during comparison in codejail-service darklaunch. Each entry is a dict
314- # of 'search' (a regular expression string) and 'replace' (the replacement string).
315- # The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+',
316- # the directory structure codejail uses for its random-named sandboxes. Deployers
317- # may also need to add a search/replace pair for the location of the sandbox
318- # virtualenv, or any other paths that show up in stack traces.
319- # .. setting_warning: Note that `replace' is a pattern, allowing for
320- # backreferences. Any backslashes in the replacement pattern that are not
321- # intended as backreferences should be escaped as `\\`.
322- setting = getattr (settings , 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS' , default )
323-
324- compiled = []
325- for pair in setting :
326- compiled .append ({
327- 'search' : re .compile (assert_type (pair ['search' ], str )),
328- 'replace' : assert_type (pair ['replace' ], str ),
329- })
330- return compiled
374+ custom_normalizers = _compile_normalizers (custom_setting )
331375 except BaseException as e :
376+ log .error ("Could not load custom codejail darklaunch emsg normalizers" )
332377 record_exception ()
333- return []
378+ return default_normalizers
379+
380+ # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE
381+ # .. setting_default: 'append'
382+ # .. setting_description: How to combine ``CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS``
383+ # with the defaults. If the value is 'replace', the defaults will be replaced
384+ # with the specified patterns. If the value is 'append' (the default), the
385+ # specified replacements will be run after the defaults.
386+ combine = getattr (settings , 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS_COMBINE' , 'append' )
387+ if combine == 'replace' :
388+ return custom_normalizers
389+ else : # 'append', or unknown
390+ return default_normalizers + custom_normalizers
334391
335392
336393def normalize_error_message (emsg ):
0 commit comments