Skip to content

Commit b6ff811

Browse files
committed
patch 7.4.1434
Problem: JSON encoding doesn't hanel surrogate pair. Solution: Improve multi-byte handling of JSON. (Yasuhiro Matsumoto)
1 parent 85b1176 commit b6ff811

3 files changed

Lines changed: 79 additions & 5 deletions

File tree

src/json.c

Lines changed: 67 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,26 @@ write_string(garray_T *gap, char_u *str)
9797
ga_concat(gap, (char_u *)"null");
9898
else
9999
{
100+
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
101+
vimconv_T conv;
102+
char_u *converted = NULL;
103+
104+
convert_setup(&conv, p_enc, (char_u*)"utf-8");
105+
if (conv.vc_type != CONV_NONE)
106+
converted = res = string_convert(&conv, res, NULL);
107+
convert_setup(&conv, NULL, NULL);
108+
#endif
109+
100110
ga_append(gap, '"');
101111
while (*res != NUL)
102112
{
103-
int c = PTR2CHAR(res);
113+
int c;
114+
#ifdef FEAT_MBYTE
115+
/* always use utf-8 encoding, ignore 'encoding' */
116+
c = utf_ptr2char(res);
117+
#else
118+
c = (int)*(p);
119+
#endif
104120

105121
switch (c)
106122
{
@@ -123,7 +139,7 @@ write_string(garray_T *gap, char_u *str)
123139
if (c >= 0x20)
124140
{
125141
#ifdef FEAT_MBYTE
126-
numbuf[mb_char2bytes(c, numbuf)] = NUL;
142+
numbuf[utf_char2bytes(c, numbuf)] = NUL;
127143
#else
128144
numbuf[0] = c;
129145
numbuf[1] = NUL;
@@ -137,9 +153,16 @@ write_string(garray_T *gap, char_u *str)
137153
ga_concat(gap, numbuf);
138154
}
139155
}
140-
mb_cptr_adv(res);
156+
#ifdef FEAT_MBYTE
157+
res += utf_ptr2len(res);
158+
#else
159+
++p;
160+
#endif
141161
}
142162
ga_append(gap, '"');
163+
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
164+
vim_free(converted);
165+
#endif
143166
}
144167
}
145168

@@ -525,11 +548,21 @@ json_decode_string(js_read_T *reader, typval_T *res)
525548
int c;
526549
long nr;
527550
char_u buf[NUMBUFLEN];
551+
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
552+
vimconv_T conv;
553+
char_u *converted = NULL;
554+
#endif
528555

529556
if (res != NULL)
530557
ga_init2(&ga, 1, 200);
531558

532559
p = reader->js_buf + reader->js_used + 1; /* skip over " */
560+
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
561+
convert_setup(&conv, (char_u*)"utf-8", p_enc);
562+
if (conv.vc_type != CONV_NONE)
563+
converted = p = string_convert(&conv, p, NULL);
564+
convert_setup(&conv, NULL, NULL);
565+
#endif
533566
while (*p != '"')
534567
{
535568
if (*p == NUL || p[1] == NUL
@@ -573,13 +606,32 @@ json_decode_string(js_read_T *reader, typval_T *res)
573606
+ STRLEN(reader->js_buf);
574607
}
575608
}
609+
nr = 0;
610+
len = 0;
576611
vim_str2nr(p + 2, NULL, &len,
577612
STR2NR_HEX + STR2NR_FORCE, &nr, NULL, 4);
578613
p += len + 2;
614+
if (0xd800 <= nr && nr <= 0xdfff
615+
&& (int)(reader->js_end - p) >= 6
616+
&& *p == '\\' && *(p+1) == 'u')
617+
{
618+
long nr2 = 0;
619+
620+
/* decode surrogate pair: \ud812\u3456 */
621+
len = 0;
622+
vim_str2nr(p + 2, NULL, &len,
623+
STR2NR_HEX + STR2NR_FORCE, &nr2, NULL, 4);
624+
if (0xdc00 <= nr2 && nr2 <= 0xdfff)
625+
{
626+
p += len + 2;
627+
nr = (((nr - 0xd800) << 10) |
628+
((nr2 - 0xdc00) & 0x3ff)) + 0x10000;
629+
}
630+
}
579631
if (res != NULL)
580632
{
581633
#ifdef FEAT_MBYTE
582-
buf[(*mb_char2bytes)((int)nr, buf)] = NUL;
634+
buf[utf_char2bytes((int)nr, buf)] = NUL;
583635
ga_concat(&ga, buf);
584636
#else
585637
ga_append(&ga, nr);
@@ -600,12 +652,19 @@ json_decode_string(js_read_T *reader, typval_T *res)
600652
}
601653
else
602654
{
603-
len = MB_PTR2LEN(p);
655+
#ifdef FEAT_MBYTE
656+
len = utf_ptr2len(p);
657+
#else
658+
len = 1;
659+
#endif
604660
if (res != NULL)
605661
{
606662
if (ga_grow(&ga, len) == FAIL)
607663
{
608664
ga_clear(&ga);
665+
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
666+
vim_free(converted);
667+
#endif
609668
return FAIL;
610669
}
611670
mch_memmove((char *)ga.ga_data + ga.ga_len, p, (size_t)len);
@@ -614,6 +673,9 @@ json_decode_string(js_read_T *reader, typval_T *res)
614673
p += len;
615674
}
616675
}
676+
#if defined(FEAT_MBYTE) && defined(USE_ICONV)
677+
vim_free(converted);
678+
#endif
617679

618680
reader->js_used = (int)(p - reader->js_buf);
619681
if (*p == '"')

src/testdir/test_json.vim

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ let s:var4 = "\x10\x11\x12\x13\x14\x15\x16\x17"
1212
let s:json5 = '"\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f"'
1313
let s:var5 = "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
1414

15+
" surrogate pair
16+
let s:jsonsp1 = '"\ud83c\udf63"'
17+
let s:varsp1 = "\xf0\x9f\x8d\xa3"
18+
let s:jsonsp2 = '"\ud83c\u00a0"'
19+
let s:varsp2 = "\ud83c\u00a0"
20+
1521
let s:jsonmb = '"s¢cĴgё"'
1622
let s:varmb = "s¢cĴgё"
1723
let s:jsonnr = '1234'
@@ -69,6 +75,8 @@ func Test_json_encode()
6975

7076
if has('multi_byte')
7177
call assert_equal(s:jsonmb, json_encode(s:varmb))
78+
call assert_equal(s:varsp1, json_decode(s:jsonsp1))
79+
call assert_equal(s:varsp2, json_decode(s:jsonsp2))
7280
endif
7381

7482
call assert_equal(s:jsonnr, json_encode(s:varnr))
@@ -105,6 +113,8 @@ func Test_json_decode()
105113

106114
if has('multi_byte')
107115
call assert_equal(s:varmb, json_decode(s:jsonmb))
116+
call assert_equal(s:varsp1, js_decode(s:jsonsp1))
117+
call assert_equal(s:varsp2, js_decode(s:jsonsp2))
108118
endif
109119

110120
call assert_equal(s:varnr, json_decode(s:jsonnr))

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,8 @@ static char *(features[]) =
743743

744744
static int included_patches[] =
745745
{ /* Add new patch number below this line */
746+
/**/
747+
1434,
746748
/**/
747749
1433,
748750
/**/

0 commit comments

Comments
 (0)