Skip to content

Commit b86f10e

Browse files
committed
patch 7.4.1629
Problem: Handling emoji characters as full width has problems with backwards compatibility. Solution: Remove ambiguous and double width characters from the emoji table. Use a separate table for the character class. (partly by Yasuhiro Matsumoto)
1 parent 3f3fbd3 commit b86f10e

3 files changed

Lines changed: 335 additions & 210 deletions

File tree

runtime/tools/unicode.vim

Lines changed: 130 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ func! ParseFoldProps()
3232
if line !~ '^#' && line !~ '^\s*$'
3333
let l = split(line, '\s*;\s*', 1)
3434
if len(l) != 4
35-
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4'
36-
return
35+
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 4'
36+
return
3737
endif
3838
call add(s:foldprops, l)
3939
endif
@@ -50,8 +50,8 @@ func! ParseWidthProps()
5050
if line !~ '^#' && line !~ '^\s*$'
5151
let l = split(line, '\s*;\s*', 1)
5252
if len(l) != 2
53-
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2'
54-
return
53+
echoerr 'Found ' . len(l) . ' items in line ' . lnum . ', expected 2'
54+
return
5555
endif
5656
call add(s:widthprops, l)
5757
endif
@@ -72,18 +72,18 @@ func! BuildCaseTable(name, index)
7272
let n = ('0x' . p[0]) + 0
7373
let nl = ('0x' . p[a:index]) + 0
7474
if start >= 0 && add == nl - n && (step == 0 || n - end == step)
75-
" continue with same range.
76-
let step = n - end
77-
let end = n
75+
" continue with same range.
76+
let step = n - end
77+
let end = n
7878
else
79-
if start >= 0
80-
" produce previous range
81-
call Range(ranges, start, end, step, add)
82-
endif
83-
let start = n
84-
let end = n
85-
let step = 0
86-
let add = nl - n
79+
if start >= 0
80+
" produce previous range
81+
call Range(ranges, start, end, step, add)
82+
endif
83+
let start = n
84+
let end = n
85+
let step = 0
86+
let add = nl - n
8787
endif
8888
endif
8989
endfor
@@ -115,18 +115,18 @@ func! BuildFoldTable()
115115
let n = ('0x' . p[0]) + 0
116116
let nl = ('0x' . p[2]) + 0
117117
if start >= 0 && add == nl - n && (step == 0 || n - end == step)
118-
" continue with same range.
119-
let step = n - end
120-
let end = n
118+
" continue with same range.
119+
let step = n - end
120+
let end = n
121121
else
122-
if start >= 0
123-
" produce previous range
124-
call Range(ranges, start, end, step, add)
125-
endif
126-
let start = n
127-
let end = n
128-
let step = 0
129-
let add = nl - n
122+
if start >= 0
123+
" produce previous range
124+
call Range(ranges, start, end, step, add)
125+
endif
126+
let start = n
127+
let end = n
128+
let step = 0
129+
let add = nl - n
130130
endif
131131
endif
132132
endfor
@@ -160,15 +160,15 @@ func! BuildCombiningTable()
160160
if p[2] == 'Mn' || p[2] == 'Mc' || p[2] == 'Me'
161161
let n = ('0x' . p[0]) + 0
162162
if start >= 0 && end + 1 == n
163-
" continue with same range.
164-
let end = n
163+
" continue with same range.
164+
let end = n
165165
else
166-
if start >= 0
167-
" produce previous range
168-
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
169-
endif
170-
let start = n
171-
let end = n
166+
if start >= 0
167+
" produce previous range
168+
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
169+
endif
170+
let start = n
171+
let end = n
172172
endif
173173
endif
174174
endfor
@@ -197,47 +197,57 @@ func! BuildWidthTable(pattern, tableName)
197197
for p in s:widthprops
198198
if p[1][0] =~ a:pattern
199199
if p[0] =~ '\.\.'
200-
" It is a range. we don't check for composing char then.
201-
let rng = split(p[0], '\.\.')
202-
if len(rng) != 2
203-
echoerr "Cannot parse range: '" . p[0] . "' in width table"
204-
endif
205-
let n = ('0x' . rng[0]) + 0
206-
let n_last = ('0x' . rng[1]) + 0
200+
" It is a range. we don't check for composing char then.
201+
let rng = split(p[0], '\.\.')
202+
if len(rng) != 2
203+
echoerr "Cannot parse range: '" . p[0] . "' in width table"
204+
endif
205+
let n = ('0x' . rng[0]) + 0
206+
let n_last = ('0x' . rng[1]) + 0
207207
else
208-
let n = ('0x' . p[0]) + 0
209-
let n_last = n
208+
let n = ('0x' . p[0]) + 0
209+
let n_last = n
210210
endif
211211
" Find this char in the data table.
212212
while 1
213-
let dn = ('0x' . s:dataprops[dataidx][0]) + 0
214-
if dn >= n
215-
break
216-
endif
217-
let dataidx += 1
213+
let dn = ('0x' . s:dataprops[dataidx][0]) + 0
214+
if dn >= n
215+
break
216+
endif
217+
let dataidx += 1
218218
endwhile
219219
if dn != n && n_last == n
220-
echoerr "Cannot find character " . n . " in data table"
220+
echoerr "Cannot find character " . n . " in data table"
221221
endif
222222
" Only use the char when it's not a composing char.
223223
" But use all chars from a range.
224224
let dp = s:dataprops[dataidx]
225225
if n_last > n || (dp[2] != 'Mn' && dp[2] != 'Mc' && dp[2] != 'Me')
226-
if start >= 0 && end + 1 == n
227-
" continue with same range.
228-
else
229-
if start >= 0
230-
" produce previous range
231-
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
232-
endif
233-
let start = n
234-
endif
235-
let end = n_last
226+
if start >= 0 && end + 1 == n
227+
" continue with same range.
228+
else
229+
if start >= 0
230+
" produce previous range
231+
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
232+
if a:pattern == 'A'
233+
call add(s:ambitable, [start, end])
234+
else
235+
call add(s:doubletable, [start, end])
236+
endif
237+
endif
238+
let start = n
239+
endif
240+
let end = n_last
236241
endif
237242
endif
238243
endfor
239244
if start >= 0
240245
call add(ranges, printf("\t{0x%04x, 0x%04x},", start, end))
246+
if a:pattern == 'A'
247+
call add(s:ambitable, [start, end])
248+
else
249+
call add(s:doubletable, [start, end])
250+
endif
241251
endif
242252

243253
" New buffer to put the result in.
@@ -253,21 +263,72 @@ endfunc
253263

254264
" Build the amoji width table in a new buffer.
255265
func! BuildEmojiTable(pattern, tableName)
256-
let ranges = []
257-
for line in map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
266+
let alltokens = []
267+
let widthtokens = []
268+
let lines = map(filter(filter(getline(1, '$'), 'v:val=~"^[1-9]"'), 'v:val=~a:pattern'), 'matchstr(v:val,"^\\S\\+")')
269+
for n in range(len(lines))
270+
let line = lines[n]
258271
let token = split(line, '\.\.')
272+
let first = ('0x' . token[0]) + 0
259273
if len(token) == 1
260-
call add(token, token[0])
274+
let last = first
275+
else
276+
let last = ('0x' . token[1]) + 0
277+
endif
278+
279+
let token = [first, last]
280+
if len(alltokens) > 0 && (token[0] - 1 == alltokens[-1][1])
281+
let alltokens[-1][1] = token[1]
282+
else
283+
call add(alltokens, token)
284+
endif
285+
286+
" exclude characters that are in the "ambiguous" or "doublewidth" table
287+
for ambi in s:ambitable
288+
if first >= ambi[0] && first <= ambi[1]
289+
let first = ambi[1] + 1
290+
endif
291+
if last >= ambi[0] && last <= ambi[1]
292+
let last = ambi[0] - 1
293+
endif
294+
endfor
295+
for double in s:doubletable
296+
if first >= double[0] && first <= double[1]
297+
let first = double[1] + 1
298+
endif
299+
if last >= double[0] && last <= double[1]
300+
let last = double[0] - 1
301+
endif
302+
endfor
303+
304+
if first <= last
305+
let token = [first, last]
306+
if len(widthtokens) > 0 && (token[0] - 1 == widthtokens[-1][1])
307+
let widthtokens[-1][1] = token[1]
308+
else
309+
call add(widthtokens, token)
310+
endif
261311
endif
262-
call add(ranges, printf("\t{0x%04x, 0x%04x},", "0x".token[0], "0x".token[1]))
263312
endfor
313+
let allranges = map(alltokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
314+
let widthranges = map(widthtokens, 'printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])')
264315

265316
" New buffer to put the result in.
266317
new
267-
exe "file " . a:tableName
268-
call setline(1, " static struct interval " . a:tableName . "[] =")
318+
exe "file " . a:tableName . '_all'
319+
call setline(1, " static struct interval " . a:tableName . "_all[] =")
269320
call setline(2, " {")
270-
call append('$', ranges)
321+
call append('$', allranges)
322+
call setline('$', getline('$')[:-2]) " remove last comma
323+
call setline(line('$') + 1, " };")
324+
wincmd p
325+
326+
" New buffer to put the result in.
327+
new
328+
exe "file " . a:tableName . '_width'
329+
call setline(1, " static struct interval " . a:tableName . "_width[] =")
330+
call setline(2, " {")
331+
call append('$', widthranges)
271332
call setline('$', getline('$')[:-2]) " remove last comma
272333
call setline(line('$') + 1, " };")
273334
wincmd p
@@ -307,13 +368,16 @@ edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
307368
call ParseWidthProps()
308369

309370
" Build the double width table.
371+
let s:doubletable = []
310372
call BuildWidthTable('[WF]', 'doublewidth')
311373

312374
" Build the ambiguous width table.
375+
let s:ambitable = []
313376
call BuildWidthTable('A', 'ambiguous')
314377

315378
" Edit the emoji text file. Requires the netrw plugin.
316379
edit http://www.unicode.org/Public/emoji/3.0/emoji-data.txt
317380

318381
" Build the emoji table. Ver. 1.0 - 6.0
382+
" Must come after the "ambiguous" table
319383
call BuildEmojiTable('; Emoji\s\+# [1-6]\.[0-9]', 'emoji')

0 commit comments

Comments
 (0)