Skip to content

Commit 4019cf9

Browse files
committed
patch 8.0.0252: not properly recognizing word characters between 128 and 255
Problem: Characters below 256 that are not one byte are not always recognized as word characters. Solution: Make vim_iswordc() and vim_iswordp() work the same way. Add a test for this. (Ozaki Kiichi)
1 parent f42dd3c commit 4019cf9

7 files changed

Lines changed: 143 additions & 18 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,4 @@ src/testdir/viminfo
8181
src/memfile_test
8282
src/json_test
8383
src/message_test
84+
src/kword_test

src/Makefile

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1584,14 +1584,16 @@ EXTRA_SRC = hangulin.c if_lua.c if_mzsch.c auto/if_perl.c if_perlsfio.c \
15841584
# Unittest files
15851585
JSON_TEST_SRC = json_test.c
15861586
JSON_TEST_TARGET = json_test$(EXEEXT)
1587+
KWORD_TEST_SRC = kword_test.c
1588+
KWORD_TEST_TARGET = kword_test$(EXEEXT)
15871589
MEMFILE_TEST_SRC = memfile_test.c
15881590
MEMFILE_TEST_TARGET = memfile_test$(EXEEXT)
15891591
MESSAGE_TEST_SRC = message_test.c
15901592
MESSAGE_TEST_TARGET = message_test$(EXEEXT)
15911593

1592-
UNITTEST_SRC = $(JSON_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
1593-
UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
1594-
RUN_UNITTESTS = run_json_test run_memfile_test run_message_test
1594+
UNITTEST_SRC = $(JSON_TEST_SRC) $(KWORD_TEST_SRC) $(MEMFILE_TEST_SRC) $(MESSAGE_TEST_SRC)
1595+
UNITTEST_TARGETS = $(JSON_TEST_TARGET) $(KWORD_TEST_TARGET) $(MEMFILE_TEST_TARGET) $(MESSAGE_TEST_TARGET)
1596+
RUN_UNITTESTS = run_json_test run_kword_test run_memfile_test run_message_test
15951597

15961598
# All sources, also the ones that are not configured
15971599
ALL_SRC = $(BASIC_SRC) $(ALL_GUI_SRC) $(UNITTEST_SRC) $(EXTRA_SRC)
@@ -1611,7 +1613,6 @@ OBJ_COMMON = \
16111613
objects/arabic.o \
16121614
objects/buffer.o \
16131615
objects/blowfish.o \
1614-
objects/charset.o \
16151616
objects/crypt.o \
16161617
objects/crypt_zip.o \
16171618
objects/dict.o \
@@ -1679,6 +1680,7 @@ OBJ_COMMON = \
16791680

16801681
# The files included by tests are not in OBJ_COMMON.
16811682
OBJ_MAIN = \
1683+
objects/charset.o \
16821684
objects/json.o \
16831685
objects/main.o \
16841686
objects/memfile.o \
@@ -1687,20 +1689,31 @@ OBJ_MAIN = \
16871689
OBJ = $(OBJ_COMMON) $(OBJ_MAIN)
16881690

16891691
OBJ_JSON_TEST = \
1692+
objects/charset.o \
16901693
objects/memfile.o \
16911694
objects/message.o \
16921695
objects/json_test.o
16931696

16941697
JSON_TEST_OBJ = $(OBJ_COMMON) $(OBJ_JSON_TEST)
16951698

1699+
OBJ_KWORD_TEST = \
1700+
objects/json.o \
1701+
objects/memfile.o \
1702+
objects/message.o \
1703+
objects/kword_test.o
1704+
1705+
KWORD_TEST_OBJ = $(OBJ_COMMON) $(OBJ_KWORD_TEST)
1706+
16961707
OBJ_MEMFILE_TEST = \
1708+
objects/charset.o \
16971709
objects/json.o \
16981710
objects/message.o \
16991711
objects/memfile_test.o
17001712

17011713
MEMFILE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MEMFILE_TEST)
17021714

17031715
OBJ_MESSAGE_TEST = \
1716+
objects/charset.o \
17041717
objects/json.o \
17051718
objects/memfile.o \
17061719
objects/message_test.o
@@ -1710,6 +1723,7 @@ MESSAGE_TEST_OBJ = $(OBJ_COMMON) $(OBJ_MESSAGE_TEST)
17101723
ALL_OBJ = $(OBJ_COMMON) \
17111724
$(OBJ_MAIN) \
17121725
$(OBJ_JSON_TEST) \
1726+
$(OBJ_KWORD_TEST) \
17131727
$(OBJ_MEMFILE_TEST) \
17141728
$(OBJ_MESSAGE_TEST)
17151729

@@ -2036,6 +2050,9 @@ unittest unittests: $(RUN_UNITTESTS)
20362050
run_json_test: $(JSON_TEST_TARGET)
20372051
$(VALGRIND) ./$(JSON_TEST_TARGET) || exit 1; echo $* passed;
20382052

2053+
run_kword_test: $(KWORD_TEST_TARGET)
2054+
$(VALGRIND) ./$(KWORD_TEST_TARGET) || exit 1; echo $* passed;
2055+
20392056
run_memfile_test: $(MEMFILE_TEST_TARGET)
20402057
$(VALGRIND) ./$(MEMFILE_TEST_TARGET) || exit 1; echo $* passed;
20412058

@@ -2222,6 +2239,13 @@ $(JSON_TEST_TARGET): auto/config.mk objects $(JSON_TEST_OBJ)
22222239
MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
22232240
sh $(srcdir)/link.sh
22242241

2242+
$(KWORD_TEST_TARGET): auto/config.mk objects $(KWORD_TEST_OBJ)
2243+
$(CCC) version.c -o objects/version.o
2244+
@LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
2245+
-o $(KWORD_TEST_TARGET) $(KWORD_TEST_OBJ) $(ALL_LIBS)" \
2246+
MAKE="$(MAKE)" LINK_AS_NEEDED=$(LINK_AS_NEEDED) \
2247+
sh $(srcdir)/link.sh
2248+
22252249
$(MEMFILE_TEST_TARGET): auto/config.mk objects $(MEMFILE_TEST_OBJ)
22262250
$(CCC) version.c -o objects/version.o
22272251
@LINK="$(PURIFY) $(SHRPENV) $(CClink) $(ALL_LIB_DIRS) $(LDFLAGS) \
@@ -3058,6 +3082,9 @@ objects/json.o: json.c
30583082
objects/json_test.o: json_test.c
30593083
$(CCC) -o $@ json_test.c
30603084

3085+
objects/kword_test.o: kword_test.c
3086+
$(CCC) -o $@ kword_test.c
3087+
30613088
objects/list.o: list.c
30623089
$(CCC) -o $@ list.c
30633090

@@ -3597,6 +3624,10 @@ objects/json_test.o: json_test.c main.c vim.h auto/config.h feature.h os_unix.h
35973624
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
35983625
regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
35993626
proto.h globals.h farsi.h arabic.h json.c
3627+
objects/kword_test.o: kword_test.c main.c vim.h auto/config.h feature.h os_unix.h \
3628+
auto/osdef.h ascii.h keymap.h term.h macros.h option.h structs.h \
3629+
regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h ex_cmds.h spell.h \
3630+
proto.h globals.h farsi.h arabic.h charset.c mbyte.c
36003631
objects/memfile_test.o: memfile_test.c main.c vim.h auto/config.h feature.h \
36013632
os_unix.h auto/osdef.h ascii.h keymap.h term.h macros.h option.h \
36023633
structs.h regexp.h gui.h gui_beval.h proto/gui_beval.pro alloc.h \

src/charset.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -899,16 +899,17 @@ vim_iswordc(int c)
899899
int
900900
vim_iswordc_buf(int c, buf_T *buf)
901901
{
902-
#ifdef FEAT_MBYTE
903902
if (c >= 0x100)
904903
{
904+
#ifdef FEAT_MBYTE
905905
if (enc_dbcs != 0)
906906
return dbcs_class((unsigned)c >> 8, (unsigned)(c & 0xff)) >= 2;
907907
if (enc_utf8)
908-
return utf_class(c) >= 2;
909-
}
908+
return utf_class_buf(c, buf) >= 2;
910909
#endif
911-
return (c > 0 && c < 0x100 && GET_CHARTAB(buf, c) != 0);
910+
return FALSE;
911+
}
912+
return (c > 0 && GET_CHARTAB(buf, c) != 0);
912913
}
913914

914915
/*
@@ -917,21 +918,19 @@ vim_iswordc_buf(int c, buf_T *buf)
917918
int
918919
vim_iswordp(char_u *p)
919920
{
920-
#ifdef FEAT_MBYTE
921-
if (has_mbyte && MB_BYTE2LEN(*p) > 1)
922-
return mb_get_class(p) >= 2;
923-
#endif
924-
return GET_CHARTAB(curbuf, *p) != 0;
921+
return vim_iswordp_buf(p, curbuf);
925922
}
926923

927924
int
928925
vim_iswordp_buf(char_u *p, buf_T *buf)
929926
{
927+
int c = *p;
928+
930929
#ifdef FEAT_MBYTE
931-
if (has_mbyte && MB_BYTE2LEN(*p) > 1)
932-
return mb_get_class(p) >= 2;
930+
if (has_mbyte && MB_BYTE2LEN(c) > 1)
931+
c = (*mb_ptr2char)(p);
933932
#endif
934-
return (GET_CHARTAB(buf, *p) != 0);
933+
return vim_iswordc_buf(c, buf);
935934
}
936935

937936
/*

src/kword_test.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/* vi:set ts=8 sts=4 sw=4 noet:
2+
*
3+
* VIM - Vi IMproved by Bram Moolenaar
4+
*
5+
* Do ":help uganda" in Vim to read copying and usage conditions.
6+
* Do ":help credits" in Vim to see a list of people who contributed.
7+
* See README.txt for an overview of the Vim source code.
8+
*/
9+
10+
/*
11+
* kword_test.c: Unittests for vim_iswordc() and vim_iswordp().
12+
*/
13+
14+
#undef NDEBUG
15+
#include <assert.h>
16+
17+
/* Must include main.c because it contains much more than just main() */
18+
#define NO_VIM_MAIN
19+
#include "main.c"
20+
21+
/* This file has to be included because the tested functions are static */
22+
#include "charset.c"
23+
24+
#ifdef FEAT_MBYTE
25+
/*
26+
* Test the results of vim_iswordc() and vim_iswordp() are matched.
27+
*/
28+
static void
29+
test_isword_funcs_utf8(void)
30+
{
31+
buf_T buf;
32+
int c;
33+
34+
vim_memset(&buf, 0, sizeof(buf));
35+
p_enc = (char_u *)"utf-8";
36+
p_isi = (char_u *)"";
37+
p_isp = (char_u *)"";
38+
p_isf = (char_u *)"";
39+
buf.b_p_isk = (char_u *)"@,48-57,_,128-167,224-235";
40+
41+
curbuf = &buf;
42+
mb_init(); /* calls init_chartab() */
43+
44+
for (c = 0; c < 0x10000; ++c)
45+
{
46+
char_u p[4] = {0};
47+
int c1;
48+
int retc;
49+
int retp;
50+
51+
utf_char2bytes(c, p);
52+
c1 = utf_ptr2char(p);
53+
if (c != c1)
54+
{
55+
fprintf(stderr, "Failed: ");
56+
fprintf(stderr,
57+
"[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
58+
c, p[0], p[1], p[2]);
59+
fprintf(stderr, "c != utf_ptr2char(p) (=%#04x)\n", c1);
60+
abort();
61+
}
62+
retc = vim_iswordc_buf(c, &buf);
63+
retp = vim_iswordp_buf(p, &buf);
64+
if (retc != retp)
65+
{
66+
fprintf(stderr, "Failed: ");
67+
fprintf(stderr,
68+
"[c = %#04x, p = {%#02x, %#02x, %#02x}] ",
69+
c, p[0], p[1], p[2]);
70+
fprintf(stderr, "vim_iswordc(c) (=%d) != vim_iswordp(p) (=%d)\n",
71+
retc, retp);
72+
abort();
73+
}
74+
}
75+
}
76+
#endif
77+
78+
int
79+
main(void)
80+
{
81+
#ifdef FEAT_MBYTE
82+
test_isword_funcs_utf8();
83+
#endif
84+
return 0;
85+
}

src/mbyte.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -895,7 +895,7 @@ mb_get_class_buf(char_u *p, buf_T *buf)
895895
if (enc_dbcs != 0 && p[0] != NUL && p[1] != NUL)
896896
return dbcs_class(p[0], p[1]);
897897
if (enc_utf8)
898-
return utf_class(utf_ptr2char(p));
898+
return utf_class_buf(utf_ptr2char(p), buf);
899899
return 0;
900900
}
901901

@@ -2693,6 +2693,12 @@ static struct interval emoji_all[] =
26932693
*/
26942694
int
26952695
utf_class(int c)
2696+
{
2697+
return utf_class_buf(c, curbuf);
2698+
}
2699+
2700+
int
2701+
utf_class_buf(int c, buf_T *buf)
26962702
{
26972703
/* sorted list of non-overlapping intervals */
26982704
static struct clinterval
@@ -2780,7 +2786,7 @@ utf_class(int c)
27802786
{
27812787
if (c == ' ' || c == '\t' || c == NUL || c == 0xa0)
27822788
return 0; /* blank */
2783-
if (vim_iswordc(c))
2789+
if (vim_iswordc_buf(c, buf))
27842790
return 2; /* word character */
27852791
return 1; /* punctuation */
27862792
}

src/proto/mbyte.pro

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ int utf_char2bytes(int c, char_u *buf);
4040
int utf_iscomposing(int c);
4141
int utf_printable(int c);
4242
int utf_class(int c);
43+
int utf_class_buf(int c, buf_T *buf);
4344
int utf_ambiguous_width(int c);
4445
int utf_fold(int a);
4546
int utf_toupper(int a);

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -764,6 +764,8 @@ static char *(features[]) =
764764

765765
static int included_patches[] =
766766
{ /* Add new patch number below this line */
767+
/**/
768+
252,
767769
/**/
768770
251,
769771
/**/

0 commit comments

Comments
 (0)