Skip to content

Commit 17793ef

Browse files
committed
patch 8.2.2233: cannot convert a byte index into a character index
Problem: Cannot convert a byte index into a character index. Solution: Add charidx(). (Yegappan Lakshmanan, closes #7561)
1 parent 9618a25 commit 17793ef

5 files changed

Lines changed: 109 additions & 0 deletions

File tree

runtime/doc/eval.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2475,6 +2475,8 @@ ch_status({handle} [, {options}])
24752475
changenr() Number current change number
24762476
char2nr({expr} [, {utf8}]) Number ASCII/UTF8 value of first char in {expr}
24772477
charclass({string}) Number character class of {string}
2478+
charidx({string}, {idx} [, {countcc}])
2479+
Number char index of byte {idx} in {string}
24782480
chdir({dir}) String change current working directory
24792481
cindent({lnum}) Number C indent for line {lnum}
24802482
clearmatches([{win}]) none clear all matches
@@ -3588,6 +3590,31 @@ charclass({string}) *charclass()*
35883590
other specific Unicode class
35893591
The class is used in patterns and word motions.
35903592

3593+
*charidx()*
3594+
charidx({string}, {idx} [, {countcc}])
3595+
Return the character index of the byte at {idx} in {string}.
3596+
The index of the first character is zero.
3597+
If there are no multibyte characters the returned value is
3598+
equal to {idx}.
3599+
When {countcc} is omitted or zero, then composing characters
3600+
are not counted separately, their byte length is added to the
3601+
preceding base character.
3602+
When {countcc} is set to 1, then composing characters are
3603+
counted as separate characters.
3604+
Returns -1 if the arguments are invalid or if {idx} is greater
3605+
than the index of the last byte in {string}. An error is
3606+
given if the first argument is not a string, the second
3607+
argument is not a number or when the third argument is present
3608+
and is not zero or one.
3609+
See |byteidx()| and |byteidxcomp()| for getting the byte index
3610+
from the character index.
3611+
Examples: >
3612+
echo charidx('áb́ć', 3) returns 1
3613+
echo charidx('áb́ć', 6, 1) returns 4
3614+
echo charidx('áb́ć', 16) returns -1
3615+
<
3616+
Can also be used as a |method|: >
3617+
GetName()->charidx(idx)
35913618

35923619
chdir({dir}) *chdir()*
35933620
Change the current working directory to {dir}. The scope of

runtime/doc/usr_41.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ String manipulation: *string-functions*
625625
iconv() convert text from one encoding to another
626626
byteidx() byte index of a character in a string
627627
byteidxcomp() like byteidx() but count composing characters
628+
charidx() character index of a byte in a string
628629
repeat() repeat a string multiple times
629630
eval() evaluate a string expression
630631
execute() execute an Ex command and get the output

src/evalfunc.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ static void f_ceil(typval_T *argvars, typval_T *rettv);
4747
#endif
4848
static void f_changenr(typval_T *argvars, typval_T *rettv);
4949
static void f_char2nr(typval_T *argvars, typval_T *rettv);
50+
static void f_charidx(typval_T *argvars, typval_T *rettv);
5051
static void f_col(typval_T *argvars, typval_T *rettv);
5152
static void f_confirm(typval_T *argvars, typval_T *rettv);
5253
static void f_copy(typval_T *argvars, typval_T *rettv);
@@ -789,6 +790,8 @@ static funcentry_T global_functions[] =
789790
ret_number, f_char2nr},
790791
{"charclass", 1, 1, FEARG_1, NULL,
791792
ret_number, f_charclass},
793+
{"charidx", 2, 3, FEARG_1, NULL,
794+
ret_number, f_charidx},
792795
{"chdir", 1, 1, FEARG_1, NULL,
793796
ret_string, f_chdir},
794797
{"cindent", 1, 1, FEARG_1, NULL,
@@ -2420,6 +2423,57 @@ f_char2nr(typval_T *argvars, typval_T *rettv)
24202423
rettv->vval.v_number = tv_get_string(&argvars[0])[0];
24212424
}
24222425

2426+
/*
2427+
* "charidx()" function
2428+
*/
2429+
static void
2430+
f_charidx(typval_T *argvars, typval_T *rettv)
2431+
{
2432+
char_u *str;
2433+
varnumber_T idx;
2434+
int countcc = FALSE;
2435+
char_u *p;
2436+
int len;
2437+
int (*ptr2len)(char_u *);
2438+
2439+
rettv->vval.v_number = -1;
2440+
2441+
if (argvars[0].v_type != VAR_STRING || argvars[1].v_type != VAR_NUMBER
2442+
|| (argvars[2].v_type != VAR_UNKNOWN
2443+
&& argvars[2].v_type != VAR_NUMBER))
2444+
{
2445+
emsg(_(e_invarg));
2446+
return;
2447+
}
2448+
2449+
str = tv_get_string_chk(&argvars[0]);
2450+
idx = tv_get_number_chk(&argvars[1], NULL);
2451+
if (str == NULL || idx < 0)
2452+
return;
2453+
2454+
if (argvars[2].v_type != VAR_UNKNOWN)
2455+
countcc = (int)tv_get_bool(&argvars[2]);
2456+
if (countcc < 0 || countcc > 1)
2457+
{
2458+
semsg(_(e_using_number_as_bool_nr), countcc);
2459+
return;
2460+
}
2461+
2462+
if (enc_utf8 && countcc)
2463+
ptr2len = utf_ptr2len;
2464+
else
2465+
ptr2len = mb_ptr2len;
2466+
2467+
for (p = str, len = 0; p <= str + idx; len++)
2468+
{
2469+
if (*p == NUL)
2470+
return;
2471+
p += ptr2len(p);
2472+
}
2473+
2474+
rettv->vval.v_number = len > 0 ? len - 1 : 0;
2475+
}
2476+
24232477
win_T *
24242478
get_optional_window(typval_T *argvars, int idx)
24252479
{

src/testdir/test_functions.vim

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1132,6 +1132,31 @@ func Test_byteidx()
11321132
call assert_fails("call byteidxcomp([], 0)", 'E730:')
11331133
endfunc
11341134

1135+
" Test for charidx()
1136+
func Test_charidx()
1137+
let a = 'xáb́y'
1138+
call assert_equal(0, charidx(a, 0))
1139+
call assert_equal(1, charidx(a, 3))
1140+
call assert_equal(2, charidx(a, 4))
1141+
call assert_equal(3, charidx(a, 7))
1142+
call assert_equal(-1, charidx(a, 8))
1143+
call assert_equal(-1, charidx('', 0))
1144+
1145+
" count composing characters
1146+
call assert_equal(0, charidx(a, 0, 1))
1147+
call assert_equal(2, charidx(a, 2, 1))
1148+
call assert_equal(3, charidx(a, 4, 1))
1149+
call assert_equal(5, charidx(a, 7, 1))
1150+
call assert_equal(-1, charidx(a, 8, 1))
1151+
call assert_equal(-1, charidx('', 0, 1))
1152+
1153+
call assert_fails('let x = charidx([], 1)', 'E474:')
1154+
call assert_fails('let x = charidx("abc", [])', 'E474:')
1155+
call assert_fails('let x = charidx("abc", 1, [])', 'E474:')
1156+
call assert_fails('let x = charidx("abc", 1, -1)', 'E1023:')
1157+
call assert_fails('let x = charidx("abc", 1, 2)', 'E1023:')
1158+
endfunc
1159+
11351160
func Test_count()
11361161
let l = ['a', 'a', 'A', 'b']
11371162
call assert_equal(2, count(l, 'a'))

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,8 @@ static char *(features[]) =
750750

751751
static int included_patches[] =
752752
{ /* Add new patch number below this line */
753+
/**/
754+
2233,
753755
/**/
754756
2232,
755757
/**/

0 commit comments

Comments
 (0)