22 * jsimd_i386.c
33 *
44 * Copyright 2009 Pierre Ossman <[email protected] > for Cendio AB 5- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander.
5+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023 , D. R. Commander.
66 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
77 *
88 * Based on the x86 SIMD extension for IJG JPEG library,
@@ -158,6 +158,9 @@ jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
158158 void (* sse2fct ) (JDIMENSION , JSAMPARRAY , JSAMPIMAGE , JDIMENSION , int );
159159 void (* mmxfct ) (JDIMENSION , JSAMPARRAY , JSAMPIMAGE , JDIMENSION , int );
160160
161+ if (simd_support == ~0U )
162+ init_simd ();
163+
161164 switch (cinfo -> in_color_space ) {
162165 case JCS_EXT_RGB :
163166 avx2fct = jsimd_extrgb_ycc_convert_avx2 ;
@@ -217,6 +220,9 @@ jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
217220 void (* sse2fct ) (JDIMENSION , JSAMPARRAY , JSAMPIMAGE , JDIMENSION , int );
218221 void (* mmxfct ) (JDIMENSION , JSAMPARRAY , JSAMPIMAGE , JDIMENSION , int );
219222
223+ if (simd_support == ~0U )
224+ init_simd ();
225+
220226 switch (cinfo -> in_color_space ) {
221227 case JCS_EXT_RGB :
222228 avx2fct = jsimd_extrgb_gray_convert_avx2 ;
@@ -276,6 +282,9 @@ jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
276282 void (* sse2fct ) (JDIMENSION , JSAMPIMAGE , JDIMENSION , JSAMPARRAY , int );
277283 void (* mmxfct ) (JDIMENSION , JSAMPIMAGE , JDIMENSION , JSAMPARRAY , int );
278284
285+ if (simd_support == ~0U )
286+ init_simd ();
287+
279288 switch (cinfo -> out_color_space ) {
280289 case JCS_EXT_RGB :
281290 avx2fct = jsimd_ycc_extrgb_convert_avx2 ;
@@ -379,6 +388,9 @@ GLOBAL(void)
379388jsimd_h2v2_downsample (j_compress_ptr cinfo , jpeg_component_info * compptr ,
380389 JSAMPARRAY input_data , JSAMPARRAY output_data )
381390{
391+ if (simd_support == ~0U )
392+ init_simd ();
393+
382394 if (simd_support & JSIMD_AVX2 )
383395 jsimd_h2v2_downsample_avx2 (cinfo -> image_width , cinfo -> max_v_samp_factor ,
384396 compptr -> v_samp_factor ,
@@ -399,6 +411,9 @@ GLOBAL(void)
399411jsimd_h2v1_downsample (j_compress_ptr cinfo , jpeg_component_info * compptr ,
400412 JSAMPARRAY input_data , JSAMPARRAY output_data )
401413{
414+ if (simd_support == ~0U )
415+ init_simd ();
416+
402417 if (simd_support & JSIMD_AVX2 )
403418 jsimd_h2v1_downsample_avx2 (cinfo -> image_width , cinfo -> max_v_samp_factor ,
404419 compptr -> v_samp_factor ,
@@ -461,6 +476,9 @@ GLOBAL(void)
461476jsimd_h2v2_upsample (j_decompress_ptr cinfo , jpeg_component_info * compptr ,
462477 JSAMPARRAY input_data , JSAMPARRAY * output_data_ptr )
463478{
479+ if (simd_support == ~0U )
480+ init_simd ();
481+
464482 if (simd_support & JSIMD_AVX2 )
465483 jsimd_h2v2_upsample_avx2 (cinfo -> max_v_samp_factor , cinfo -> output_width ,
466484 input_data , output_data_ptr );
@@ -476,6 +494,9 @@ GLOBAL(void)
476494jsimd_h2v1_upsample (j_decompress_ptr cinfo , jpeg_component_info * compptr ,
477495 JSAMPARRAY input_data , JSAMPARRAY * output_data_ptr )
478496{
497+ if (simd_support == ~0U )
498+ init_simd ();
499+
479500 if (simd_support & JSIMD_AVX2 )
480501 jsimd_h2v1_upsample_avx2 (cinfo -> max_v_samp_factor , cinfo -> output_width ,
481502 input_data , output_data_ptr );
@@ -537,6 +558,9 @@ GLOBAL(void)
537558jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo , jpeg_component_info * compptr ,
538559 JSAMPARRAY input_data , JSAMPARRAY * output_data_ptr )
539560{
561+ if (simd_support == ~0U )
562+ init_simd ();
563+
540564 if (simd_support & JSIMD_AVX2 )
541565 jsimd_h2v2_fancy_upsample_avx2 (cinfo -> max_v_samp_factor ,
542566 compptr -> downsampled_width , input_data ,
@@ -555,6 +579,9 @@ GLOBAL(void)
555579jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo , jpeg_component_info * compptr ,
556580 JSAMPARRAY input_data , JSAMPARRAY * output_data_ptr )
557581{
582+ if (simd_support == ~0U )
583+ init_simd ();
584+
558585 if (simd_support & JSIMD_AVX2 )
559586 jsimd_h2v1_fancy_upsample_avx2 (cinfo -> max_v_samp_factor ,
560587 compptr -> downsampled_width , input_data ,
@@ -623,6 +650,9 @@ jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
623650 void (* sse2fct ) (JDIMENSION , JSAMPIMAGE , JDIMENSION , JSAMPARRAY );
624651 void (* mmxfct ) (JDIMENSION , JSAMPIMAGE , JDIMENSION , JSAMPARRAY );
625652
653+ if (simd_support == ~0U )
654+ init_simd ();
655+
626656 switch (cinfo -> out_color_space ) {
627657 case JCS_EXT_RGB :
628658 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2 ;
@@ -681,6 +711,9 @@ jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
681711 void (* sse2fct ) (JDIMENSION , JSAMPIMAGE , JDIMENSION , JSAMPARRAY );
682712 void (* mmxfct ) (JDIMENSION , JSAMPIMAGE , JDIMENSION , JSAMPARRAY );
683713
714+ if (simd_support == ~0U )
715+ init_simd ();
716+
684717 switch (cinfo -> out_color_space ) {
685718 case JCS_EXT_RGB :
686719 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2 ;
@@ -785,6 +818,9 @@ GLOBAL(void)
785818jsimd_convsamp (JSAMPARRAY sample_data , JDIMENSION start_col ,
786819 DCTELEM * workspace )
787820{
821+ if (simd_support == ~0U )
822+ init_simd ();
823+
788824 if (simd_support & JSIMD_AVX2 )
789825 jsimd_convsamp_avx2 (sample_data , start_col , workspace );
790826 else if (simd_support & JSIMD_SSE2 )
@@ -797,6 +833,9 @@ GLOBAL(void)
797833jsimd_convsamp_float (JSAMPARRAY sample_data , JDIMENSION start_col ,
798834 FAST_FLOAT * workspace )
799835{
836+ if (simd_support == ~0U )
837+ init_simd ();
838+
800839 if (simd_support & JSIMD_SSE2 )
801840 jsimd_convsamp_float_sse2 (sample_data , start_col , workspace );
802841 else if (simd_support & JSIMD_SSE )
@@ -867,6 +906,9 @@ jsimd_can_fdct_float(void)
867906GLOBAL (void )
868907jsimd_fdct_islow (DCTELEM * data )
869908{
909+ if (simd_support == ~0U )
910+ init_simd ();
911+
870912 if (simd_support & JSIMD_AVX2 )
871913 jsimd_fdct_islow_avx2 (data );
872914 else if (simd_support & JSIMD_SSE2 )
@@ -878,6 +920,9 @@ jsimd_fdct_islow(DCTELEM *data)
878920GLOBAL (void )
879921jsimd_fdct_ifast (DCTELEM * data )
880922{
923+ if (simd_support == ~0U )
924+ init_simd ();
925+
881926 if ((simd_support & JSIMD_SSE2 ) && IS_ALIGNED_SSE (jconst_fdct_islow_sse2 ))
882927 jsimd_fdct_ifast_sse2 (data );
883928 else
@@ -887,6 +932,9 @@ jsimd_fdct_ifast(DCTELEM *data)
887932GLOBAL (void )
888933jsimd_fdct_float (FAST_FLOAT * data )
889934{
935+ if (simd_support == ~0U )
936+ init_simd ();
937+
890938 if ((simd_support & JSIMD_SSE ) && IS_ALIGNED_SSE (jconst_fdct_float_sse ))
891939 jsimd_fdct_float_sse (data );
892940 else if (simd_support & JSIMD_3DNOW )
@@ -942,6 +990,9 @@ jsimd_can_quantize_float(void)
942990GLOBAL (void )
943991jsimd_quantize (JCOEFPTR coef_block , DCTELEM * divisors , DCTELEM * workspace )
944992{
993+ if (simd_support == ~0U )
994+ init_simd ();
995+
945996 if (simd_support & JSIMD_AVX2 )
946997 jsimd_quantize_avx2 (coef_block , divisors , workspace );
947998 else if (simd_support & JSIMD_SSE2 )
@@ -954,6 +1005,9 @@ GLOBAL(void)
9541005jsimd_quantize_float (JCOEFPTR coef_block , FAST_FLOAT * divisors ,
9551006 FAST_FLOAT * workspace )
9561007{
1008+ if (simd_support == ~0U )
1009+ init_simd ();
1010+
9571011 if (simd_support & JSIMD_SSE2 )
9581012 jsimd_quantize_float_sse2 (coef_block , divisors , workspace );
9591013 else if (simd_support & JSIMD_SSE )
@@ -1017,6 +1071,9 @@ jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
10171071 JCOEFPTR coef_block , JSAMPARRAY output_buf ,
10181072 JDIMENSION output_col )
10191073{
1074+ if (simd_support == ~0U )
1075+ init_simd ();
1076+
10201077 if ((simd_support & JSIMD_SSE2 ) && IS_ALIGNED_SSE (jconst_idct_red_sse2 ))
10211078 jsimd_idct_2x2_sse2 (compptr -> dct_table , coef_block , output_buf ,
10221079 output_col );
@@ -1029,6 +1086,9 @@ jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
10291086 JCOEFPTR coef_block , JSAMPARRAY output_buf ,
10301087 JDIMENSION output_col )
10311088{
1089+ if (simd_support == ~0U )
1090+ init_simd ();
1091+
10321092 if ((simd_support & JSIMD_SSE2 ) && IS_ALIGNED_SSE (jconst_idct_red_sse2 ))
10331093 jsimd_idct_4x4_sse2 (compptr -> dct_table , coef_block , output_buf ,
10341094 output_col );
@@ -1123,6 +1183,9 @@ jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
11231183 JCOEFPTR coef_block , JSAMPARRAY output_buf ,
11241184 JDIMENSION output_col )
11251185{
1186+ if (simd_support == ~0U )
1187+ init_simd ();
1188+
11261189 if (simd_support & JSIMD_AVX2 )
11271190 jsimd_idct_islow_avx2 (compptr -> dct_table , coef_block , output_buf ,
11281191 output_col );
@@ -1139,6 +1202,9 @@ jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
11391202 JCOEFPTR coef_block , JSAMPARRAY output_buf ,
11401203 JDIMENSION output_col )
11411204{
1205+ if (simd_support == ~0U )
1206+ init_simd ();
1207+
11421208 if ((simd_support & JSIMD_SSE2 ) && IS_ALIGNED_SSE (jconst_idct_ifast_sse2 ))
11431209 jsimd_idct_ifast_sse2 (compptr -> dct_table , coef_block , output_buf ,
11441210 output_col );
@@ -1152,6 +1218,9 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
11521218 JCOEFPTR coef_block , JSAMPARRAY output_buf ,
11531219 JDIMENSION output_col )
11541220{
1221+ if (simd_support == ~0U )
1222+ init_simd ();
1223+
11551224 if ((simd_support & JSIMD_SSE2 ) && IS_ALIGNED_SSE (jconst_idct_float_sse2 ))
11561225 jsimd_idct_float_sse2 (compptr -> dct_table , coef_block , output_buf ,
11571226 output_col );
0 commit comments