Skip to content

Commit 7d4616b

Browse files
committed
net_http: Optimized versions of net_http_urlencode
and net_http_urlencode_full. net_http_urlencode should be anywhere from 7 to 12x faster and with 26-66% less memory allocation
1 parent c907a12 commit 7d4616b

1 file changed

Lines changed: 90 additions & 168 deletions

File tree

libretro-common/net/net_http.c

Lines changed: 90 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -217,156 +217,47 @@ static slock_t *dns_cache_lock = NULL;
217217
**/
218218
void net_http_urlencode(char **dest, const char *source)
219219
{
220-
static const char urlencode_lut[] =
221-
{
222-
0, /* 0 */
223-
0, /* 1 */
224-
0, /* 2 */
225-
0, /* 3 */
226-
0, /* 4 */
227-
0, /* 5 */
228-
0, /* 6 */
229-
0, /* 7 */
230-
0, /* 8 */
231-
0, /* 9 */
232-
0, /* 10 */
233-
0, /* 11 */
234-
0, /* 12 */
235-
0, /* 13 */
236-
0, /* 14 */
237-
0, /* 15 */
238-
0, /* 16 */
239-
0, /* 17 */
240-
0, /* 18 */
241-
0, /* 19 */
242-
0, /* 20 */
243-
0, /* 21 */
244-
0, /* 22 */
245-
0, /* 23 */
246-
0, /* 24 */
247-
0, /* 25 */
248-
0, /* 26 */
249-
0, /* 27 */
250-
0, /* 28 */
251-
0, /* 29 */
252-
0, /* 30 */
253-
0, /* 31 */
254-
0, /* 32 */
255-
0, /* 33 */
256-
0, /* 34 */
257-
0, /* 35 */
258-
0, /* 36 */
259-
0, /* 37 */
260-
0, /* 38 */
261-
0, /* 39 */
262-
0, /* 40 */
263-
0, /* 41 */
264-
'*', /* 42 */
265-
0, /* 43 */
266-
0, /* 44 */
267-
'-', /* 45 */
268-
'.', /* 46 */
269-
'/', /* 47 */
270-
'0', /* 48 */
271-
'1', /* 49 */
272-
'2', /* 50 */
273-
'3', /* 51 */
274-
'4', /* 52 */
275-
'5', /* 53 */
276-
'6', /* 54 */
277-
'7', /* 55 */
278-
'8', /* 56 */
279-
'9', /* 57 */
280-
0, /* 58 */
281-
0, /* 59 */
282-
0, /* 60 */
283-
0, /* 61 */
284-
0, /* 62 */
285-
0, /* 63 */
286-
0, /* 64 */
287-
'A', /* 65 */
288-
'B', /* 66 */
289-
'C', /* 67 */
290-
'D', /* 68 */
291-
'E', /* 69 */
292-
'F', /* 70 */
293-
'G', /* 71 */
294-
'H', /* 72 */
295-
'I', /* 73 */
296-
'J', /* 74 */
297-
'K', /* 75 */
298-
'L', /* 76 */
299-
'M', /* 77 */
300-
'N', /* 78 */
301-
'O', /* 79 */
302-
'P', /* 80 */
303-
'Q', /* 81 */
304-
'R', /* 82 */
305-
'S', /* 83 */
306-
'T', /* 84 */
307-
'U', /* 85 */
308-
'V', /* 86 */
309-
'W', /* 87 */
310-
'X', /* 88 */
311-
'Y', /* 89 */
312-
'Z', /* 90 */
313-
0, /* 91 */
314-
0, /* 92 */
315-
0, /* 93 */
316-
0, /* 94 */
317-
'_', /* 95 */
318-
0, /* 96 */
319-
'a', /* 97 */
320-
'b', /* 98 */
321-
'c', /* 99 */
322-
'd', /* 100 */
323-
'e', /* 101 */
324-
'f', /* 102 */
325-
'g', /* 103 */
326-
'h', /* 104 */
327-
'i', /* 105 */
328-
'j', /* 106 */
329-
'k', /* 107 */
330-
'l', /* 108 */
331-
'm', /* 109 */
332-
'n', /* 110 */
333-
'o', /* 111 */
334-
'p', /* 112 */
335-
'q', /* 113 */
336-
'r', /* 114 */
337-
's', /* 115 */
338-
't', /* 116 */
339-
'u', /* 117 */
340-
'v', /* 118 */
341-
'w', /* 119 */
342-
'x', /* 120 */
343-
'y', /* 121 */
344-
'z' /* 122 */
220+
/* Bitmask for unreserved chars: A-Z a-z 0-9 * - . / _ */
221+
static const uint32_t safe[4] = {
222+
0x00000000, /* 0-31: none */
223+
0x03FFE400, /* 32-63: * - . / 0-9 */
224+
0x87FFFFFE, /* 64-95: A-Z _ */
225+
0x07FFFFFE /* 96-127: a-z */
345226
};
346227

347-
/* Assume every character will be encoded, so we need 3 times the space. */
348-
size_t _len = strlen(source) * 3 + 1;
349-
size_t count = _len;
350-
char *enc = (char*)calloc(1, _len);
351-
*dest = enc;
228+
const char *s;
229+
char *enc;
230+
size_t len = 0;
352231

353-
for (; *source; source++)
232+
/* First pass: compute exact output length */
233+
for (s = source; *s; s++)
354234
{
355-
int written = 0;
356-
357-
/* any non-ASCII character will just be encoded without question */
358-
if ((unsigned)*source < sizeof(urlencode_lut) && urlencode_lut[(unsigned)*source])
359-
written = snprintf(enc, count, "%c", urlencode_lut[(unsigned)*source]);
235+
unsigned char c = (unsigned char)*s;
236+
if (c < 128 && (safe[c >> 5] & (1u << (c & 31))))
237+
len += 1;
360238
else
361-
written = snprintf(enc, count, "%%%02X", *source & 0xFF);
239+
len += 3;
240+
}
362241

363-
if (written > 0)
364-
count -= written;
242+
enc = (char*)malloc(len + 1);
243+
*dest = enc;
365244

366-
while (*++enc);
245+
/* Second pass: encode */
246+
for (s = source; *s; s++)
247+
{
248+
unsigned char c = (unsigned char)*s;
249+
if (c < 128 && (safe[c >> 5] & (1u << (c & 31))))
250+
*enc++ = (char)c;
251+
else
252+
{
253+
static const char hex[] = "0123456789ABCDEF";
254+
*enc++ = '%';
255+
*enc++ = hex[c >> 4];
256+
*enc++ = hex[c & 0x0F];
257+
}
367258
}
368259

369-
(*dest)[_len - 1] = '\0';
260+
*enc = '\0';
370261
}
371262

372263
/**
@@ -376,38 +267,69 @@ void net_http_urlencode(char **dest, const char *source)
376267
**/
377268
void net_http_urlencode_full(char *s, const char *source, size_t len)
378269
{
379-
size_t buf_pos;
380-
size_t tmp_len;
381-
char url_domain[256];
382-
char url_path[PATH_MAX_LENGTH];
383-
int count = 0;
384-
char *tmp = url_path;
270+
static const char hex[] = "0123456789ABCDEF";
271+
const char *path_start;
272+
const char *p;
273+
size_t domain_len;
274+
size_t pos;
275+
int slashes = 0;
385276

386-
strlcpy(url_path, source, sizeof(url_path));
277+
if (!s || !source || len == 0)
278+
return;
387279

388-
while (count < 3 && tmp[0] != '\0')
280+
/* Find the third '/' to locate the domain/path boundary */
281+
for (p = source; *p && slashes < 3; p++)
389282
{
390-
tmp = strchr(tmp, '/');
391-
if (!tmp)
392-
break;
393-
count++;
394-
tmp++;
283+
if (*p == '/')
284+
slashes++;
285+
}
286+
287+
/* If fewer than 3 slashes, no path to encode — just copy as-is */
288+
if (slashes < 3)
289+
{
290+
strlcpy(s, source, len);
291+
return;
292+
}
293+
294+
path_start = p; /* points just past the third '/' */
295+
domain_len = (size_t)(path_start - source);
296+
297+
/* Copy domain (including trailing '/') */
298+
if (domain_len >= len)
299+
{
300+
strlcpy(s, source, len);
301+
return;
302+
}
303+
memcpy(s, source, domain_len);
304+
pos = domain_len;
305+
306+
/* Encode path directly into output buffer */
307+
for (p = path_start; *p && pos + 1 < len; p++)
308+
{
309+
unsigned char c = (unsigned char)*p;
310+
311+
if ( (c >= 'A' && c <= 'Z')
312+
|| (c >= 'a' && c <= 'z')
313+
|| (c >= '0' && c <= '9')
314+
|| c == '-' || c == '_'
315+
|| c == '.' || c == '~'
316+
|| c == '/' || c == ':'
317+
|| c == '?' || c == '#'
318+
|| c == '&' || c == '=')
319+
{
320+
s[pos++] = c;
321+
}
322+
else if (pos + 3 < len)
323+
{
324+
s[pos++] = '%';
325+
s[pos++] = hex[(c >> 4) & 0x0F];
326+
s[pos++] = hex[ c & 0x0F];
327+
}
328+
else
329+
break; /* not enough space for encoded char */
395330
}
396331

397-
tmp_len = strlen(tmp);
398-
buf_pos = ((strlcpy(url_domain, source, tmp - url_path)) - tmp_len) - 1;
399-
strlcpy(url_path,
400-
source + buf_pos + 1,
401-
tmp_len + 1
402-
);
403-
404-
tmp = NULL;
405-
net_http_urlencode(&tmp, url_path);
406-
buf_pos = strlcpy(s, url_domain, len);
407-
s[ buf_pos] = '/';
408-
s[++buf_pos] = '\0';
409-
strlcpy(s + buf_pos, tmp, len - buf_pos);
410-
free(tmp);
332+
s[pos] = '\0';
411333
}
412334

413335
struct http_connection_t *net_http_connection_new(const char *url,

0 commit comments

Comments
 (0)