Skip to content

Commit d743a2c

Browse files
committed
SIMD/x86: Initialize simd_support before every use
As long as a libjpeg instance is only used by one thread at a time, a program is technically within its rights to call jpeg_start_*compress() in one thread and jpeg_(read|write)_*(), with the same libjpeg instance, in a second thread. However, because the various jsimd_can*() functions are called within the body of jpeg_start_*compress() and simd_support is now thread-local (due to f579cc1), that led to a situation in which simd_support was initialized in the first thread but not the second. The uninitialized value of simd_support is 0xFFFFFFFF, which the second thread interpreted to mean that it could use any instruction set, and when it attempted to use AVX2 instructions on a CPU that didn't support them, an illegal instruction error occurred. This issue was known to affect libvips. This commit modifies the i386 and x86-64 SIMD dispatchers so that the various jsimd_*() functions always call init_simd(), if simd_support is uninitialized, prior to dispatching based on the value of simd_support. Note that the other SIMD dispatchers don't need this, because only the x86 SIMD extensions currently support multiple instruction sets. This patch has been verified to be performance-neutral to within +/- 0.4% with 32-bit and 64-bit code running on a 2.8 GHz Intel Xeon W3530 and a 3.6 GHz Intel Xeon W2123. Fixes #649
1 parent a93f6a7 commit d743a2c

2 files changed

Lines changed: 116 additions & 2 deletions

File tree

simd/i386/jsimd.c

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* jsimd_i386.c
33
*
44
* Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5-
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
5+
* Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander.
66
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
77
*
88
* Based on the x86 SIMD extension for IJG JPEG library,
@@ -158,6 +158,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
158158
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
159159
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
160160

161+
if (simd_support == ~0U)
162+
init_simd();
163+
161164
switch (cinfo->in_color_space) {
162165
case JCS_EXT_RGB:
163166
avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -217,6 +220,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
217220
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
218221
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
219222

223+
if (simd_support == ~0U)
224+
init_simd();
225+
220226
switch (cinfo->in_color_space) {
221227
case JCS_EXT_RGB:
222228
avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -276,6 +282,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
276282
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
277283
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
278284

285+
if (simd_support == ~0U)
286+
init_simd();
287+
279288
switch (cinfo->out_color_space) {
280289
case JCS_EXT_RGB:
281290
avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -379,6 +388,9 @@ GLOBAL(void)
379388
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
380389
JSAMPARRAY input_data, JSAMPARRAY output_data)
381390
{
391+
if (simd_support == ~0U)
392+
init_simd();
393+
382394
if (simd_support & JSIMD_AVX2)
383395
jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
384396
compptr->v_samp_factor,
@@ -399,6 +411,9 @@ GLOBAL(void)
399411
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
400412
JSAMPARRAY input_data, JSAMPARRAY output_data)
401413
{
414+
if (simd_support == ~0U)
415+
init_simd();
416+
402417
if (simd_support & JSIMD_AVX2)
403418
jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
404419
compptr->v_samp_factor,
@@ -461,6 +476,9 @@ GLOBAL(void)
461476
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
462477
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
463478
{
479+
if (simd_support == ~0U)
480+
init_simd();
481+
464482
if (simd_support & JSIMD_AVX2)
465483
jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
466484
input_data, output_data_ptr);
@@ -476,6 +494,9 @@ GLOBAL(void)
476494
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
477495
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
478496
{
497+
if (simd_support == ~0U)
498+
init_simd();
499+
479500
if (simd_support & JSIMD_AVX2)
480501
jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
481502
input_data, output_data_ptr);
@@ -537,6 +558,9 @@ GLOBAL(void)
537558
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
538559
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
539560
{
561+
if (simd_support == ~0U)
562+
init_simd();
563+
540564
if (simd_support & JSIMD_AVX2)
541565
jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
542566
compptr->downsampled_width, input_data,
@@ -555,6 +579,9 @@ GLOBAL(void)
555579
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
556580
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
557581
{
582+
if (simd_support == ~0U)
583+
init_simd();
584+
558585
if (simd_support & JSIMD_AVX2)
559586
jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
560587
compptr->downsampled_width, input_data,
@@ -623,6 +650,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
623650
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
624651
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
625652

653+
if (simd_support == ~0U)
654+
init_simd();
655+
626656
switch (cinfo->out_color_space) {
627657
case JCS_EXT_RGB:
628658
avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -681,6 +711,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
681711
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
682712
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
683713

714+
if (simd_support == ~0U)
715+
init_simd();
716+
684717
switch (cinfo->out_color_space) {
685718
case JCS_EXT_RGB:
686719
avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -785,6 +818,9 @@ GLOBAL(void)
785818
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
786819
DCTELEM *workspace)
787820
{
821+
if (simd_support == ~0U)
822+
init_simd();
823+
788824
if (simd_support & JSIMD_AVX2)
789825
jsimd_convsamp_avx2(sample_data, start_col, workspace);
790826
else if (simd_support & JSIMD_SSE2)
@@ -797,6 +833,9 @@ GLOBAL(void)
797833
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
798834
FAST_FLOAT *workspace)
799835
{
836+
if (simd_support == ~0U)
837+
init_simd();
838+
800839
if (simd_support & JSIMD_SSE2)
801840
jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
802841
else if (simd_support & JSIMD_SSE)
@@ -867,6 +906,9 @@ jsimd_can_fdct_float(void)
867906
GLOBAL(void)
868907
jsimd_fdct_islow(DCTELEM *data)
869908
{
909+
if (simd_support == ~0U)
910+
init_simd();
911+
870912
if (simd_support & JSIMD_AVX2)
871913
jsimd_fdct_islow_avx2(data);
872914
else if (simd_support & JSIMD_SSE2)
@@ -878,6 +920,9 @@ jsimd_fdct_islow(DCTELEM *data)
878920
GLOBAL(void)
879921
jsimd_fdct_ifast(DCTELEM *data)
880922
{
923+
if (simd_support == ~0U)
924+
init_simd();
925+
881926
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
882927
jsimd_fdct_ifast_sse2(data);
883928
else
@@ -887,6 +932,9 @@ jsimd_fdct_ifast(DCTELEM *data)
887932
GLOBAL(void)
888933
jsimd_fdct_float(FAST_FLOAT *data)
889934
{
935+
if (simd_support == ~0U)
936+
init_simd();
937+
890938
if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
891939
jsimd_fdct_float_sse(data);
892940
else if (simd_support & JSIMD_3DNOW)
@@ -942,6 +990,9 @@ jsimd_can_quantize_float(void)
942990
GLOBAL(void)
943991
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
944992
{
993+
if (simd_support == ~0U)
994+
init_simd();
995+
945996
if (simd_support & JSIMD_AVX2)
946997
jsimd_quantize_avx2(coef_block, divisors, workspace);
947998
else if (simd_support & JSIMD_SSE2)
@@ -954,6 +1005,9 @@ GLOBAL(void)
9541005
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
9551006
FAST_FLOAT *workspace)
9561007
{
1008+
if (simd_support == ~0U)
1009+
init_simd();
1010+
9571011
if (simd_support & JSIMD_SSE2)
9581012
jsimd_quantize_float_sse2(coef_block, divisors, workspace);
9591013
else if (simd_support & JSIMD_SSE)
@@ -1017,6 +1071,9 @@ jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
10171071
JCOEFPTR coef_block, JSAMPARRAY output_buf,
10181072
JDIMENSION output_col)
10191073
{
1074+
if (simd_support == ~0U)
1075+
init_simd();
1076+
10201077
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
10211078
jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf,
10221079
output_col);
@@ -1029,6 +1086,9 @@ jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
10291086
JCOEFPTR coef_block, JSAMPARRAY output_buf,
10301087
JDIMENSION output_col)
10311088
{
1089+
if (simd_support == ~0U)
1090+
init_simd();
1091+
10321092
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
10331093
jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf,
10341094
output_col);
@@ -1123,6 +1183,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
11231183
JCOEFPTR coef_block, JSAMPARRAY output_buf,
11241184
JDIMENSION output_col)
11251185
{
1186+
if (simd_support == ~0U)
1187+
init_simd();
1188+
11261189
if (simd_support & JSIMD_AVX2)
11271190
jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
11281191
output_col);
@@ -1139,6 +1202,9 @@ jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
11391202
JCOEFPTR coef_block, JSAMPARRAY output_buf,
11401203
JDIMENSION output_col)
11411204
{
1205+
if (simd_support == ~0U)
1206+
init_simd();
1207+
11421208
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
11431209
jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
11441210
output_col);
@@ -1152,6 +1218,9 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
11521218
JCOEFPTR coef_block, JSAMPARRAY output_buf,
11531219
JDIMENSION output_col)
11541220
{
1221+
if (simd_support == ~0U)
1222+
init_simd();
1223+
11551224
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
11561225
jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
11571226
output_col);

simd/x86_64/jsimd.c

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
* jsimd_x86_64.c
33
*
44
* Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5-
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander.
5+
* Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
66
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
77
*
88
* Based on the x86 SIMD extension for IJG JPEG library,
@@ -145,6 +145,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
145145
void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146146
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
147147

148+
if (simd_support == ~0U)
149+
init_simd();
150+
148151
switch (cinfo->in_color_space) {
149152
case JCS_EXT_RGB:
150153
avx2fct = jsimd_extrgb_ycc_convert_avx2;
@@ -194,6 +197,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194197
void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
195198
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
196199

200+
if (simd_support == ~0U)
201+
init_simd();
202+
197203
switch (cinfo->in_color_space) {
198204
case JCS_EXT_RGB:
199205
avx2fct = jsimd_extrgb_gray_convert_avx2;
@@ -243,6 +249,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
243249
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
244250
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
245251

252+
if (simd_support == ~0U)
253+
init_simd();
254+
246255
switch (cinfo->out_color_space) {
247256
case JCS_EXT_RGB:
248257
avx2fct = jsimd_ycc_extrgb_convert_avx2;
@@ -333,6 +342,9 @@ GLOBAL(void)
333342
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
334343
JSAMPARRAY input_data, JSAMPARRAY output_data)
335344
{
345+
if (simd_support == ~0U)
346+
init_simd();
347+
336348
if (simd_support & JSIMD_AVX2)
337349
jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
338350
compptr->v_samp_factor,
@@ -349,6 +361,9 @@ GLOBAL(void)
349361
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
350362
JSAMPARRAY input_data, JSAMPARRAY output_data)
351363
{
364+
if (simd_support == ~0U)
365+
init_simd();
366+
352367
if (simd_support & JSIMD_AVX2)
353368
jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
354369
compptr->v_samp_factor,
@@ -403,6 +418,9 @@ GLOBAL(void)
403418
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
404419
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
405420
{
421+
if (simd_support == ~0U)
422+
init_simd();
423+
406424
if (simd_support & JSIMD_AVX2)
407425
jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
408426
input_data, output_data_ptr);
@@ -415,6 +433,9 @@ GLOBAL(void)
415433
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
416434
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
417435
{
436+
if (simd_support == ~0U)
437+
init_simd();
438+
418439
if (simd_support & JSIMD_AVX2)
419440
jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
420441
input_data, output_data_ptr);
@@ -469,6 +490,9 @@ GLOBAL(void)
469490
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
470491
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
471492
{
493+
if (simd_support == ~0U)
494+
init_simd();
495+
472496
if (simd_support & JSIMD_AVX2)
473497
jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
474498
compptr->downsampled_width, input_data,
@@ -483,6 +507,9 @@ GLOBAL(void)
483507
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
484508
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
485509
{
510+
if (simd_support == ~0U)
511+
init_simd();
512+
486513
if (simd_support & JSIMD_AVX2)
487514
jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
488515
compptr->downsampled_width, input_data,
@@ -542,6 +569,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
542569
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
543570
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
544571

572+
if (simd_support == ~0U)
573+
init_simd();
574+
545575
switch (cinfo->out_color_space) {
546576
case JCS_EXT_RGB:
547577
avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
@@ -590,6 +620,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
590620
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
591621
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
592622

623+
if (simd_support == ~0U)
624+
init_simd();
625+
593626
switch (cinfo->out_color_space) {
594627
case JCS_EXT_RGB:
595628
avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
@@ -679,6 +712,9 @@ GLOBAL(void)
679712
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
680713
DCTELEM *workspace)
681714
{
715+
if (simd_support == ~0U)
716+
init_simd();
717+
682718
if (simd_support & JSIMD_AVX2)
683719
jsimd_convsamp_avx2(sample_data, start_col, workspace);
684720
else
@@ -748,6 +784,9 @@ jsimd_can_fdct_float(void)
748784
GLOBAL(void)
749785
jsimd_fdct_islow(DCTELEM *data)
750786
{
787+
if (simd_support == ~0U)
788+
init_simd();
789+
751790
if (simd_support & JSIMD_AVX2)
752791
jsimd_fdct_islow_avx2(data);
753792
else
@@ -809,6 +848,9 @@ jsimd_can_quantize_float(void)
809848
GLOBAL(void)
810849
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
811850
{
851+
if (simd_support == ~0U)
852+
init_simd();
853+
812854
if (simd_support & JSIMD_AVX2)
813855
jsimd_quantize_avx2(coef_block, divisors, workspace);
814856
else
@@ -963,6 +1005,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
9631005
JCOEFPTR coef_block, JSAMPARRAY output_buf,
9641006
JDIMENSION output_col)
9651007
{
1008+
if (simd_support == ~0U)
1009+
init_simd();
1010+
9661011
if (simd_support & JSIMD_AVX2)
9671012
jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
9681013
output_col);

0 commit comments

Comments
 (0)