Skip to content

Commit be07caa

Browse files
committed
patch 9.0.1777: patch 9.0.1771 causes problems
Problem: patch 9.0.1771 causes problems Solution: revert it Revert "patch 9.0.1771: regex: combining chars in collections not handled" This reverts commit ca22fc3. Signed-off-by: Christian Brabandt <[email protected]>
1 parent 6d626c4 commit be07caa

4 files changed

Lines changed: 10 additions & 146 deletions

File tree

src/regexp_bt.c

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3743,38 +3743,13 @@ regmatch(
37433743

37443744
case ANYOF:
37453745
case ANYBUT:
3746-
{
3747-
char_u *q = OPERAND(scan);
3748-
3749-
if (c == NUL)
3750-
status = RA_NOMATCH;
3751-
else if ((cstrchr(q, c) == NULL) == (op == ANYOF))
3752-
status = RA_NOMATCH;
3753-
else
3754-
{
3755-
// Check following combining characters
3756-
int len = 0;
3757-
int i;
3758-
3759-
if (enc_utf8)
3760-
len = utfc_ptr2len(q) - utf_ptr2len(q);
3761-
3762-
MB_CPTR_ADV(rex.input);
3763-
MB_CPTR_ADV(q);
3764-
3765-
if (!enc_utf8 || len == 0)
3766-
break;
3767-
3768-
for (i = 0; i < len; ++i)
3769-
if (q[i] != rex.input[i])
3770-
{
3771-
status = RA_NOMATCH;
3772-
break;
3773-
}
3774-
rex.input += len;
3775-
}
3776-
break;
3777-
}
3746+
if (c == NUL)
3747+
status = RA_NOMATCH;
3748+
else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
3749+
status = RA_NOMATCH;
3750+
else
3751+
ADVANCE_REGINPUT();
3752+
break;
37783753

37793754
case MULTIBYTECODE:
37803755
if (has_mbyte)

src/regexp_nfa.c

Lines changed: 1 addition & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1764,7 +1764,6 @@ nfa_regatom(void)
17641764
endp = skip_anyof(p);
17651765
if (*endp == ']')
17661766
{
1767-
int plen;
17681767
/*
17691768
* Try to reverse engineer character classes. For example,
17701769
* recognize that [0-9] stands for \d and [A-Za-z_] for \h,
@@ -2036,34 +2035,11 @@ nfa_regatom(void)
20362035
if (got_coll_char == TRUE && startc == 0)
20372036
EMIT(0x0a);
20382037
else
2039-
{
20402038
EMIT(startc);
2041-
if (!(enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse)))))
2042-
{
2043-
EMIT(NFA_CONCAT);
2044-
}
2045-
}
2046-
}
2047-
}
2048-
2049-
if (enc_utf8 && (utf_ptr2len(regparse) != (plen = utfc_ptr2len(regparse))))
2050-
{
2051-
int i = utf_ptr2len(regparse);
2052-
2053-
c = utf_ptr2char(regparse + i);
2054-
2055-
// Add composing characters
2056-
for (;;)
2057-
{
2058-
EMIT(c);
20592039
EMIT(NFA_CONCAT);
2060-
if ((i += utf_char2len(c)) >= plen)
2061-
break;
2062-
c = utf_ptr2char(regparse + i);
20632040
}
2064-
EMIT(NFA_COMPOSING);
2065-
EMIT(NFA_CONCAT);
20662041
}
2042+
20672043
MB_PTR_ADV(regparse);
20682044
} // while (p < endp)
20692045

@@ -6442,84 +6418,6 @@ nfa_regmatch(
64426418
result_if_matched = (t->state->c == NFA_START_COLL);
64436419
for (;;)
64446420
{
6445-
if (state->c == NFA_COMPOSING)
6446-
{
6447-
int mc = curc;
6448-
int len = 0;
6449-
nfa_state_T *end;
6450-
nfa_state_T *sta;
6451-
int cchars[MAX_MCO];
6452-
int ccount = 0;
6453-
int j;
6454-
6455-
sta = t->state->out->out;
6456-
len = 0;
6457-
if (utf_iscomposing(sta->c))
6458-
{
6459-
// Only match composing character(s), ignore base
6460-
// character. Used for ".{composing}" and "{composing}"
6461-
// (no preceding character).
6462-
len += mb_char2len(mc);
6463-
}
6464-
if (rex.reg_icombine && len == 0)
6465-
{
6466-
// If \Z was present, then ignore composing characters.
6467-
// When ignoring the base character this always matches.
6468-
if (sta->c != curc)
6469-
result = FAIL;
6470-
else
6471-
result = OK;
6472-
while (sta->c != NFA_END_COMPOSING)
6473-
sta = sta->out;
6474-
}
6475-
// Check base character matches first, unless ignored.
6476-
else if (len > 0 || mc == sta->c)
6477-
// if (len > 0 || mc == sta->c)
6478-
{
6479-
if (len == 0)
6480-
{
6481-
len += mb_char2len(mc);
6482-
sta = sta->out;
6483-
}
6484-
6485-
// We don't care about the order of composing characters.
6486-
// Get them into cchars[] first.
6487-
while (len < clen)
6488-
{
6489-
mc = mb_ptr2char(rex.input + len);
6490-
cchars[ccount++] = mc;
6491-
len += mb_char2len(mc);
6492-
if (ccount == MAX_MCO)
6493-
break;
6494-
}
6495-
6496-
// Check that each composing char in the pattern matches a
6497-
// composing char in the text. We do not check if all
6498-
// composing chars are matched.
6499-
result = OK;
6500-
while (sta->c != NFA_END_COMPOSING)
6501-
{
6502-
for (j = 0; j < ccount; ++j)
6503-
if (cchars[j] == sta->c)
6504-
break;
6505-
if (j == ccount)
6506-
{
6507-
result = FAIL;
6508-
break;
6509-
}
6510-
sta = sta->out;
6511-
}
6512-
}
6513-
else
6514-
result = FAIL;
6515-
6516-
if (t->state->out->out1->c == NFA_END_COMPOSING)
6517-
{
6518-
end = t->state->out->out1;
6519-
ADD_STATE_IF_MATCH(end);
6520-
}
6521-
break;
6522-
}
65236421
if (state->c == NFA_END_COLL)
65246422
{
65256423
result = !result_if_matched;

src/testdir/test_regexp_utf8.vim

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -575,16 +575,5 @@ func Test_match_too_complicated()
575575
set regexpengine=0
576576
endfunc
577577

578-
func Test_combining_chars_in_collection()
579-
new
580-
for i in range(0,2)
581-
exe "set re=".i
582-
put =['��', '', '� a�', 'abcd']
583-
:%s/[��]//
584-
call assert_equal(['', '', '', '� a�', 'abcd'], getline(1,'$'))
585-
%d
586-
endfor
587-
bw!
588-
endfunc
589578

590579
" vim: shiftwidth=2 sts=2 expandtab

src/version.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,8 @@ static char *(features[]) =
699699

700700
static int included_patches[] =
701701
{ /* Add new patch number below this line */
702+
/**/
703+
1777,
702704
/**/
703705
1776,
704706
/**/

0 commit comments

Comments
 (0)