Skip to content

Commit a9ec0c1

Browse files
committed
Resync
1 parent 8825288 commit a9ec0c1

11 files changed

Lines changed: 599 additions & 32 deletions

File tree

audio/conversion/float_to_s16.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,16 @@
2828
#include <altivec.h>
2929
#endif
3030

31-
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
32-
#ifndef HAVE_ARM_NEON_OPTIMIZATIONS
33-
#define HAVE_ARM_NEON_OPTIMIZATIONS
31+
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_ASM_OPTIMIZATIONS)) || defined(HAVE_NEON)
32+
#ifndef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
33+
#define HAVE_ARM_NEON_ASM_OPTIMIZATIONS
3434
#endif
3535
#endif
3636

3737
#include <features/features_cpu.h>
3838
#include <audio/conversion/float_to_s16.h>
3939

40-
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
40+
#if defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
4141
static bool float_to_s16_neon_enabled = false;
4242
void convert_float_s16_asm(int16_t *out, const float *in, size_t samples);
4343
#endif
@@ -97,7 +97,7 @@ void convert_float_to_s16(int16_t *out,
9797

9898
samples = samples_in;
9999
i = 0;
100-
#elif defined(HAVE_ARM_NEON_OPTIMIZATIONS)
100+
#elif defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
101101
if (float_to_s16_neon_enabled)
102102
{
103103
size_t aligned_samples = samples & ~7;
@@ -157,7 +157,7 @@ void convert_float_to_s16(int16_t *out,
157157
**/
158158
void convert_float_to_s16_init_simd(void)
159159
{
160-
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
160+
#if defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
161161
unsigned cpu = cpu_features_get();
162162

163163
if (cpu & RETRO_SIMD_NEON)

audio/conversion/float_to_s16_neon.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2121
*/
22-
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
22+
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_ASM_OPTIMIZATIONS)
2323

2424
#ifndef __MACH__
2525
.arm

audio/conversion/float_to_s16_neon.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2121
*/
22-
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
22+
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_ASM_OPTIMIZATIONS)
2323

2424
#if defined(__thumb__)
2525
#define DECL_ARMMODE(x) " .align 2\n" " .global " x "\n" " .thumb\n" " .thumb_func\n" " .type " x ", %function\n" x ":\n"

audio/conversion/s16_to_float.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@
2929
#include <features/features_cpu.h>
3030
#include <audio/conversion/s16_to_float.h>
3131

32-
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
33-
#ifndef HAVE_ARM_NEON_OPTIMIZATIONS
34-
#define HAVE_ARM_NEON_OPTIMIZATIONS
32+
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_ASM_OPTIMIZATIONS)) || defined(HAVE_NEON)
33+
#ifndef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
34+
#define HAVE_ARM_NEON_ASM_OPTIMIZATIONS
3535
#endif
3636
#endif
3737

38-
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
38+
#if defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
3939
static bool s16_to_float_neon_enabled = false;
4040

4141
/* Avoid potential hard-float/soft-float ABI issues. */
@@ -104,7 +104,7 @@ void convert_s16_to_float(float *out,
104104
samples = samples_in;
105105
i = 0;
106106

107-
#elif defined(HAVE_ARM_NEON_OPTIMIZATIONS)
107+
#elif defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
108108
if (s16_to_float_neon_enabled)
109109
{
110110
size_t aligned_samples = samples & ~7;
@@ -187,7 +187,7 @@ void convert_s16_to_float(float *out,
187187
**/
188188
void convert_s16_to_float_init_simd(void)
189189
{
190-
#if defined(HAVE_ARM_NEON_OPTIMIZATIONS)
190+
#if defined(HAVE_ARM_NEON_ASM_OPTIMIZATIONS)
191191
unsigned cpu = cpu_features_get();
192192

193193
if (cpu & RETRO_SIMD_NEON)

audio/conversion/s16_to_float_neon.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2121
*/
22-
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
22+
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_ASM_OPTIMIZATIONS)
2323

2424
#ifndef __MACH__
2525
.arm

audio/conversion/s16_to_float_neon.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2020
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2121
*/
22-
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
22+
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_ASM_OPTIMIZATIONS)
2323

2424
#if defined(__thumb__)
2525
#define DECL_ARMMODE(x) " .align 2\n" " .global " x "\n" " .thumb\n" " .thumb_func\n" " .type " x ", %function\n" x ":\n"

audio/dsp_filters/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ else ifeq ($(platform), osx)
5454
MINVERFLAGS=
5555
ifeq ($(shell uname -p),arm)
5656
MINVERFLAGS = -mmacosx-version-min=10.15 -stdlib=libc++ # macOS (Metal, ARM 64bit)
57-
MINVERFLAGS += -DDONT_WANT_ARM_OPTIMIZATIONS
57+
MINVERFLAGS += -DDONT_WANT_ARM_ASM_OPTIMIZATIONS
5858
else ifeq ($(HAVE_METAL),1)
5959
MINVERFLAGS = -mmacosx-version-min=10.13 -stdlib=libc++ # macOS (Metal, x86 64bit)
6060
else ifeq ($(shell uname -p),powerpc)
@@ -68,7 +68,7 @@ else ifeq ($(platform), osx)
6868
# Build for a specific architecture when ARCH is defined as a switch
6969
ifeq ($(ARCH),arm64)
7070
MINVERFLAGS = -mmacosx-version-min=10.15 -stdlib=libc++ # macOS (Metal, ARM 64bit)
71-
MINVERFLAGS += -DDONT_WANT_ARM_OPTIMIZATIONS
71+
MINVERFLAGS += -DDONT_WANT_ARM_ASM_OPTIMIZATIONS
7272
ARCHFLAGS = -arch arm64
7373
else ifeq ($(ARCH),x86_64)
7474
ifeq ($(HAVE_METAL),1)

audio/resampler/drivers/sinc_resampler.c

Lines changed: 81 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,81 @@ typedef struct rarch_sinc_resampler
8585
float kaiser_beta;
8686
} rarch_sinc_resampler_t;
8787

88-
#if (defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)) || defined(HAVE_NEON)
89-
#if TARGET_OS_IPHONE
90-
#else
91-
#ifndef WANT_NEON
92-
#define WANT_NEON
93-
#endif
94-
#endif
95-
#endif
88+
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
89+
#ifdef DONT_WANT_ARM_ASM_OPTIMIZATIONS
90+
#include <arm_neon.h>
9691

97-
#ifdef WANT_NEON
92+
/* Assumes that taps >= 8, and that taps is a multiple of 8. */
93+
static void resampler_sinc_process_neon_intrin(void *re_, struct resampler_data *data)
94+
{
95+
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
96+
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
97+
98+
uint32_t ratio = phases / data->ratio;
99+
const float *input = data->data_in;
100+
float *output = data->data_out;
101+
size_t frames = data->input_frames;
102+
size_t out_frames = 0;
103+
104+
while (frames)
105+
{
106+
while (resamp->time >= phases)
107+
{
108+
/* Push in reverse to make filter more obvious. */
109+
if (!resamp->ptr)
110+
resamp->ptr = resamp->taps;
111+
resamp->ptr--;
112+
113+
resamp->buffer_l[resamp->ptr + resamp->taps] =
114+
resamp->buffer_l[resamp->ptr] = *input++;
115+
116+
resamp->buffer_r[resamp->ptr + resamp->taps] =
117+
resamp->buffer_r[resamp->ptr] = *input++;
118+
119+
resamp->time -= phases;
120+
frames--;
121+
}
122+
123+
{
124+
const float *buffer_l = resamp->buffer_l + resamp->ptr;
125+
const float *buffer_r = resamp->buffer_r + resamp->ptr;
126+
unsigned taps = resamp->taps;
127+
while (resamp->time < phases)
128+
{
129+
int i;
130+
unsigned phase = resamp->time >> resamp->subphase_bits;
131+
const float *phase_table = resamp->phase_table + phase * taps;
132+
133+
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
134+
float32x2_t p3, p4;
135+
136+
for (i = 0; i < taps; i += 8)
137+
{
138+
float32x4x2_t coeff8 = vld2q_f32(&phase_table[i]);
139+
float32x4x2_t left8 = vld2q_f32(&buffer_l[i]);
140+
float32x4x2_t right8 = vld2q_f32(&buffer_r[i]);
141+
142+
p1 = vmlaq_f32(p1, left8.val[0], coeff8.val[0]);
143+
p2 = vmlaq_f32(p2, right8.val[0], coeff8.val[0]);
144+
p1 = vmlaq_f32(p1, left8.val[1], coeff8.val[1]);
145+
p2 = vmlaq_f32(p2, right8.val[1], coeff8.val[1]);
146+
}
147+
148+
p3 = vadd_f32(vget_low_f32(p1), vget_high_f32(p1));
149+
p4 = vadd_f32(vget_low_f32(p2), vget_high_f32(p2));
150+
vst1_f32(output, vpadd_f32(p3, p4));
151+
152+
153+
output += 2;
154+
out_frames++;
155+
resamp->time += ratio;
156+
}
157+
}
158+
}
159+
160+
data->output_frames = out_frames;
161+
}
162+
#else
98163
/* Assumes that taps >= 8, and that taps is a multiple of 8. */
99164
void process_sinc_neon_asm(float *out, const float *left,
100165
const float *right, const float *coeff, unsigned taps);
@@ -150,6 +215,7 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
150215
data->output_frames = out_frames;
151216
}
152217
#endif
218+
#endif
153219

154220
#if defined(__AVX__)
155221
static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data)
@@ -844,7 +910,7 @@ static void *resampler_sinc_new(const struct resampler_config *config,
844910
else
845911
#endif
846912
{
847-
#if defined(WANT_NEON)
913+
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
848914
re->taps = (re->taps + 7) & ~7;
849915
#else
850916
re->taps = (re->taps + 3) & ~3;
@@ -902,8 +968,12 @@ static void *resampler_sinc_new(const struct resampler_config *config,
902968
}
903969
else if (mask & RESAMPLER_SIMD_NEON && window_type != SINC_WINDOW_KAISER)
904970
{
905-
#if defined(WANT_NEON)
971+
#if (defined(__ARM_NEON__) || defined(HAVE_NEON))
972+
#ifdef DONT_WANT_ARM_ASM_OPTIMIZATIONS
973+
sinc_resampler.process = resampler_sinc_process_neon_intrin;
974+
#else
906975
sinc_resampler.process = resampler_sinc_process_neon;
976+
#endif
907977
#endif
908978
}
909979

@@ -922,5 +992,3 @@ retro_resampler_t sinc_resampler = {
922992
"sinc",
923993
"sinc"
924994
};
925-
926-
#undef WANT_NEON

audio/resampler/drivers/sinc_resampler_neon.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2121
*/
2222

23-
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_OPTIMIZATIONS)
23+
#if defined(__ARM_NEON__) && !defined(DONT_WANT_ARM_ASM_OPTIMIZATIONS)
2424

2525
#ifndef __MACH__
2626
.arm
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import re
2+
3+
# 0: full struct; 1: up to & including first []; 2: content between first {}
4+
p_struct = re.compile(r'(struct\s*[a-zA-Z0-9_\s]+\[])\s*'
5+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+)\s*)*'
6+
r'=\s*' # =
7+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+)\s*)*'
8+
r'{((?:.|[\r\n])*?)\{\s*NULL,\s*NULL,\s*NULL\s*(?:.|[\r\n])*?},?(?:.|[\r\n])*?};') # captures full struct, it's beginning and it's content
9+
# 0: type name[]; 1: type; 2: name
10+
p_type_name = re.compile(r'(retro_core_option_[a-zA-Z0-9_]+)\s*'
11+
r'(option_cats[a-z_]{0,8}|option_defs([a-z_]{0,8}))\s*\[]')
12+
# 0: full option; 1: key; 2: description; 3: additional info; 4: key/value pairs
13+
p_option = re.compile(r'{\s*' # opening braces
14+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
15+
r'(\".*?\"|' # key start; group 1
16+
r'[a-zA-Z0-9_]+\s*\((?:.|[\r\n])*?\)|'
17+
r'[a-zA-Z0-9_]+\s*\[(?:.|[\r\n])*?]|'
18+
r'[a-zA-Z0-9_]+\s*\".*?\")\s*' # key end
19+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
20+
r',\s*' # comma
21+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
22+
r'(\".*?\")\s*' # description; group 2
23+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
24+
r',\s*' # comma
25+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
26+
r'((?:' # group 3
27+
r'(?:NULL|\"(?:.|[\r\n])*?\")\s*' # description in category, info, info in category, category
28+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
29+
r',?\s*' # comma
30+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
31+
r')+)'
32+
r'(?:' # defs only start
33+
r'{\s*' # opening braces
34+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
35+
r'((?:' # key/value pairs start; group 4
36+
r'{\s*' # opening braces
37+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
38+
r'(?:NULL|\".*?\")\s*' # option key
39+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
40+
r',\s*' # comma
41+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
42+
r'(?:NULL|\".*?\")\s*' # option value
43+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
44+
r'}\s*' # closing braces
45+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
46+
r',?\s*' # comma
47+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
48+
r')*)' # key/value pairs end
49+
r'}\s*' # closing braces
50+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
51+
r',?\s*' # comma
52+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
53+
r'(?:' # defaults start
54+
r'(?:NULL|\".*?\")\s*' # default value
55+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
56+
r',?\s*' # comma
57+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
58+
r')*' # defaults end
59+
r')?' # defs only end
60+
r'},') # closing braces
61+
# analyse option group 3
62+
p_info = re.compile(r'(NULL|\"(?:.|[\r\n])*?\")\s*' # description in category, info, info in category, category
63+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
64+
r',\s*' # comma
65+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*')
66+
# analyse option group 4
67+
p_key_value = re.compile(r'{\s*' # opening braces
68+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
69+
r'(NULL|\".*?\")\s*' # option key; 1
70+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
71+
r',\s*' # comma
72+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
73+
r'(NULL|\".*?\")\s*' # option value; 2
74+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
75+
r'}\s*' # closing braces
76+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*'
77+
r',?\s*' # comma
78+
r'(?:(?:\/\*(?:.|[\r\n])*?\*\/|\/\/.*[\r\n]+|#.*[\r\n]+)\s*)*')
79+
80+
p_masked = re.compile(r'([A-Z_][A-Z0-9_]+)\s*(\"(?:"\s*"|\\\s*|.)*\")')

0 commit comments

Comments
 (0)