Skip to content

Commit 0289a09

Browse files
committed
patch 8.2.2605: Vim9: string index and slice does not include composing chars
Problem: Vim9: string index and slice does not include composing chars. Solution: Include composing characters. (issue #6563)
1 parent 240309c commit 0289a09

4 files changed

Lines changed: 54 additions & 16 deletions

File tree

runtime/doc/vim9.txt

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ script and `:def` functions; details are below:
9696
def CallMe(count: number, message: string): bool
9797
- Call functions without `:call`: >
9898
writefile(['done'], 'file.txt')
99-
- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert` or
100-
curly-braces names.
99+
- You cannot use `:xit`, `:t`, `:k`, `:append`, `:change`, `:insert`, `:open`
100+
or curly-braces names.
101101
- A range before a command must be prefixed with a colon: >
102102
:%s/this/that
103103
- Unless mentioned specifically, the highest |scriptversion| is used.
@@ -341,7 +341,8 @@ Functions can be called without `:call`: >
341341
Using `:call` is still possible, but this is discouraged.
342342

343343
A method call without `eval` is possible, so long as the start is an
344-
identifier or can't be an Ex command. Examples: >
344+
identifier or can't be an Ex command. For a function either "(" or "->" must
345+
be following, without a line break. Examples: >
345346
myList->add(123)
346347
g:myList->add(123)
347348
[1, 2, 3]->Process()
@@ -696,8 +697,9 @@ for v:null. When converting a boolean to a string "false" and "true" are
696697
used, not "v:false" and "v:true" like in legacy script. "v:none" is not
697698
changed, it is only used in JSON and has no equivalent in other languages.
698699

699-
Indexing a string with [idx] or [idx : idx] uses character indexes instead of
700-
byte indexes. Example: >
700+
Indexing a string with [idx] or taking a slice with [idx : idx] uses character
701+
indexes instead of byte indexes. Composing characters are included.
702+
Example: >
701703
echo 'bár'[1]
702704
In legacy script this results in the character 0xc3 (an illegal byte), in Vim9
703705
script this results in the string 'á'.
@@ -845,6 +847,8 @@ THIS IS STILL UNDER DEVELOPMENT - ANYTHING CAN BREAK - ANYTHING CAN CHANGE
845847
:enddef End of a function defined with `:def`. It should be on
846848
a line by its own.
847849

850+
You may also find this wiki useful. It was written by an early adoptor of
851+
Vim9 script: https://github.com/lacygoill/wiki/blob/master/vim/vim9.md
848852

849853
If the script the function is defined in is Vim9 script, then script-local
850854
variables can be accessed without the "s:" prefix. They must be defined

src/testdir/test_vim9_expr.vim

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,6 +2367,35 @@ def Test_expr7_any_index_slice()
23672367
assert_equal('abcd', g:teststring[: -3])
23682368
assert_equal('', g:teststring[: -9])
23692369

2370+
# composing characters are included
2371+
g:teststring = 'àéû'
2372+
assert_equal('', g:teststring[0])
2373+
assert_equal('', g:teststring[1])
2374+
assert_equal('', g:teststring[2])
2375+
assert_equal('', g:teststring[3])
2376+
assert_equal('', g:teststring[4])
2377+
2378+
assert_equal('', g:teststring[-1])
2379+
assert_equal('', g:teststring[-2])
2380+
assert_equal('', g:teststring[-3])
2381+
assert_equal('', g:teststring[-4])
2382+
assert_equal('', g:teststring[-5])
2383+
2384+
assert_equal('', g:teststring[0 : 0])
2385+
assert_equal('', g:teststring[1 : 1])
2386+
assert_equal('àé', g:teststring[0 : 1])
2387+
assert_equal('àéû', g:teststring[0 : -1])
2388+
assert_equal('àé', g:teststring[0 : -2])
2389+
assert_equal('', g:teststring[0 : -3])
2390+
assert_equal('', g:teststring[0 : -4])
2391+
assert_equal('', g:teststring[0 : -5])
2392+
assert_equal('àéû', g:teststring[ : ])
2393+
assert_equal('àéû', g:teststring[0 : ])
2394+
assert_equal('éû', g:teststring[1 : ])
2395+
assert_equal('', g:teststring[2 : ])
2396+
assert_equal('', g:teststring[3 : ])
2397+
assert_equal('', g:teststring[4 : ])
2398+
23702399
# blob index cannot be out of range
23712400
g:testblob = 0z01ab
23722401
assert_equal(0x01, g:testblob[0])

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,8 @@ static char *(features[]) =
750750

751751
static int included_patches[] =
752752
{ /* Add new patch number below this line */
753+
/**/
754+
2605,
753755
/**/
754756
2604,
755757
/**/

src/vim9execute.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -985,8 +985,9 @@ allocate_if_null(typval_T *tv)
985985
}
986986

987987
/*
988-
* Return the character "str[index]" where "index" is the character index. If
989-
* "index" is out of range NULL is returned.
988+
* Return the character "str[index]" where "index" is the character index,
989+
* including composing characters.
990+
* If "index" is out of range NULL is returned.
990991
*/
991992
char_u *
992993
char_from_string(char_u *str, varnumber_T index)
@@ -1005,23 +1006,23 @@ char_from_string(char_u *str, varnumber_T index)
10051006
int clen = 0;
10061007

10071008
for (nbyte = 0; nbyte < slen; ++clen)
1008-
nbyte += MB_CPTR2LEN(str + nbyte);
1009+
nbyte += mb_ptr2len(str + nbyte);
10091010
nchar = clen + index;
10101011
if (nchar < 0)
10111012
// unlike list: index out of range results in empty string
10121013
return NULL;
10131014
}
10141015

10151016
for (nbyte = 0; nchar > 0 && nbyte < slen; --nchar)
1016-
nbyte += MB_CPTR2LEN(str + nbyte);
1017+
nbyte += mb_ptr2len(str + nbyte);
10171018
if (nbyte >= slen)
10181019
return NULL;
1019-
return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte));
1020+
return vim_strnsave(str + nbyte, mb_ptr2len(str + nbyte));
10201021
}
10211022

10221023
/*
10231024
* Get the byte index for character index "idx" in string "str" with length
1024-
* "str_len".
1025+
* "str_len". Composing characters are included.
10251026
* If going over the end return "str_len".
10261027
* If "idx" is negative count from the end, -1 is the last character.
10271028
* When going over the start return -1.
@@ -1036,7 +1037,7 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx)
10361037
{
10371038
while (nchar > 0 && nbyte < str_len)
10381039
{
1039-
nbyte += MB_CPTR2LEN(str + nbyte);
1040+
nbyte += mb_ptr2len(str + nbyte);
10401041
--nchar;
10411042
}
10421043
}
@@ -1056,7 +1057,8 @@ char_idx2byte(char_u *str, size_t str_len, varnumber_T idx)
10561057
}
10571058

10581059
/*
1059-
* Return the slice "str[first:last]" using character indexes.
1060+
* Return the slice "str[first : last]" using character indexes. Composing
1061+
* characters are included.
10601062
* "exclusive" is TRUE for slice().
10611063
* Return NULL when the result is empty.
10621064
*/
@@ -1079,7 +1081,7 @@ string_slice(char_u *str, varnumber_T first, varnumber_T last, int exclusive)
10791081
end_byte = char_idx2byte(str, slen, last);
10801082
if (!exclusive && end_byte >= 0 && end_byte < (long)slen)
10811083
// end index is inclusive
1082-
end_byte += MB_CPTR2LEN(str + end_byte);
1084+
end_byte += mb_ptr2len(str + end_byte);
10831085
}
10841086

10851087
if (start_byte >= (long)slen || end_byte <= start_byte)
@@ -3249,8 +3251,9 @@ call_def_function(
32493251
res = string_slice(tv->vval.v_string, n1, n2, FALSE);
32503252
else
32513253
// Index: The resulting variable is a string of a
3252-
// single character. If the index is too big or
3253-
// negative the result is empty.
3254+
// single character (including composing characters).
3255+
// If the index is too big or negative the result is
3256+
// empty.
32543257
res = char_from_string(tv->vval.v_string, n2);
32553258
vim_free(tv->vval.v_string);
32563259
tv->vval.v_string = res;

0 commit comments

Comments
 (0)