@@ -32,8 +32,8 @@ func! ParseFoldProps()
3232 if line !~ ' ^#' && line !~ ' ^\s*$'
3333 let l = split (line , ' \s*;\s*' , 1 )
3434 if len (l ) != 4
35- echoerr ' Found ' . len (l ) . ' items in line ' . lnum . ' , expected 4'
36- return
35+ echoerr ' Found ' . len (l ) . ' items in line ' . lnum . ' , expected 4'
36+ return
3737 endif
3838 call add (s: foldprops , l )
3939 endif
@@ -50,8 +50,8 @@ func! ParseWidthProps()
5050 if line !~ ' ^#' && line !~ ' ^\s*$'
5151 let l = split (line , ' \s*;\s*' , 1 )
5252 if len (l ) != 2
53- echoerr ' Found ' . len (l ) . ' items in line ' . lnum . ' , expected 2'
54- return
53+ echoerr ' Found ' . len (l ) . ' items in line ' . lnum . ' , expected 2'
54+ return
5555 endif
5656 call add (s: widthprops , l )
5757 endif
@@ -72,18 +72,18 @@ func! BuildCaseTable(name, index)
7272 let n = (' 0x' . p [0 ]) + 0
7373 let nl = (' 0x' . p [a: index ]) + 0
7474 if start >= 0 && add == nl - n && (step == 0 || n - end == step)
75- " continue with same range.
76- let step = n - end
77- let end = n
75+ " continue with same range.
76+ let step = n - end
77+ let end = n
7878 else
79- if start >= 0
80- " produce previous range
81- call Range (ranges , start , end , step, add )
82- endif
83- let start = n
84- let end = n
85- let step = 0
86- let add = nl - n
79+ if start >= 0
80+ " produce previous range
81+ call Range (ranges , start , end , step, add )
82+ endif
83+ let start = n
84+ let end = n
85+ let step = 0
86+ let add = nl - n
8787 endif
8888 endif
8989 endfor
@@ -115,18 +115,18 @@ func! BuildFoldTable()
115115 let n = (' 0x' . p [0 ]) + 0
116116 let nl = (' 0x' . p [2 ]) + 0
117117 if start >= 0 && add == nl - n && (step == 0 || n - end == step)
118- " continue with same range.
119- let step = n - end
120- let end = n
118+ " continue with same range.
119+ let step = n - end
120+ let end = n
121121 else
122- if start >= 0
123- " produce previous range
124- call Range (ranges , start , end , step, add )
125- endif
126- let start = n
127- let end = n
128- let step = 0
129- let add = nl - n
122+ if start >= 0
123+ " produce previous range
124+ call Range (ranges , start , end , step, add )
125+ endif
126+ let start = n
127+ let end = n
128+ let step = 0
129+ let add = nl - n
130130 endif
131131 endif
132132 endfor
@@ -160,15 +160,15 @@ func! BuildCombiningTable()
160160 if p [2 ] == ' Mn' || p [2 ] == ' Mc' || p [2 ] == ' Me'
161161 let n = (' 0x' . p [0 ]) + 0
162162 if start >= 0 && end + 1 == n
163- " continue with same range.
164- let end = n
163+ " continue with same range.
164+ let end = n
165165 else
166- if start >= 0
167- " produce previous range
168- call add (ranges , printf (" \t {0x%04x, 0x%04x}," , start , end ))
169- endif
170- let start = n
171- let end = n
166+ if start >= 0
167+ " produce previous range
168+ call add (ranges , printf (" \t {0x%04x, 0x%04x}," , start , end ))
169+ endif
170+ let start = n
171+ let end = n
172172 endif
173173 endif
174174 endfor
@@ -197,47 +197,57 @@ func! BuildWidthTable(pattern, tableName)
197197 for p in s: widthprops
198198 if p [1 ][0 ] = ~ a: pattern
199199 if p [0 ] = ~ ' \.\.'
200- " It is a range. we don't check for composing char then.
201- let rng = split (p [0 ], ' \.\.' )
202- if len (rng) != 2
203- echoerr " Cannot parse range: '" . p [0 ] . " ' in width table"
204- endif
205- let n = (' 0x' . rng[0 ]) + 0
206- let n_last = (' 0x' . rng[1 ]) + 0
200+ " It is a range. we don't check for composing char then.
201+ let rng = split (p [0 ], ' \.\.' )
202+ if len (rng) != 2
203+ echoerr " Cannot parse range: '" . p [0 ] . " ' in width table"
204+ endif
205+ let n = (' 0x' . rng[0 ]) + 0
206+ let n_last = (' 0x' . rng[1 ]) + 0
207207 else
208- let n = (' 0x' . p [0 ]) + 0
209- let n_last = n
208+ let n = (' 0x' . p [0 ]) + 0
209+ let n_last = n
210210 endif
211211 " Find this char in the data table.
212212 while 1
213- let dn = (' 0x' . s: dataprops [dataidx][0 ]) + 0
214- if dn >= n
215- break
216- endif
217- let dataidx += 1
213+ let dn = (' 0x' . s: dataprops [dataidx][0 ]) + 0
214+ if dn >= n
215+ break
216+ endif
217+ let dataidx += 1
218218 endwhile
219219 if dn != n && n_last == n
220- echoerr " Cannot find character " . n . " in data table"
220+ echoerr " Cannot find character " . n . " in data table"
221221 endif
222222 " Only use the char when it's not a composing char.
223223 " But use all chars from a range.
224224 let dp = s: dataprops [dataidx]
225225 if n_last > n || (dp [2 ] != ' Mn' && dp [2 ] != ' Mc' && dp [2 ] != ' Me' )
226- if start >= 0 && end + 1 == n
227- " continue with same range.
228- else
229- if start >= 0
230- " produce previous range
231- call add (ranges , printf (" \t {0x%04x, 0x%04x}," , start , end ))
232- endif
233- let start = n
234- endif
235- let end = n_last
226+ if start >= 0 && end + 1 == n
227+ " continue with same range.
228+ else
229+ if start >= 0
230+ " produce previous range
231+ call add (ranges , printf (" \t {0x%04x, 0x%04x}," , start , end ))
232+ if a: pattern == ' A'
233+ call add (s: ambitable , [start , end ])
234+ else
235+ call add (s: doubletable , [start , end ])
236+ endif
237+ endif
238+ let start = n
239+ endif
240+ let end = n_last
236241 endif
237242 endif
238243 endfor
239244 if start >= 0
240245 call add (ranges , printf (" \t {0x%04x, 0x%04x}," , start , end ))
246+ if a: pattern == ' A'
247+ call add (s: ambitable , [start , end ])
248+ else
249+ call add (s: doubletable , [start , end ])
250+ endif
241251 endif
242252
243253 " New buffer to put the result in.
@@ -253,21 +263,72 @@ endfunc
253263
254264" Build the amoji width table in a new buffer.
255265func ! BuildEmojiTable (pattern, tableName)
256- let ranges = []
257- for line in map (filter (filter (getline (1 , ' $' ), ' v:val=~"^[1-9]"' ), ' v:val=~a:pattern' ), ' matchstr(v:val,"^\\S\\+")' )
266+ let alltokens = []
267+ let widthtokens = []
268+ let lines = map (filter (filter (getline (1 , ' $' ), ' v:val=~"^[1-9]"' ), ' v:val=~a:pattern' ), ' matchstr(v:val,"^\\S\\+")' )
269+ for n in range (len (lines ))
270+ let line = lines [n ]
258271 let token = split (line , ' \.\.' )
272+ let first = (' 0x' . token[0 ]) + 0
259273 if len (token) == 1
260- call add (token, token[0 ])
274+ let last = first
275+ else
276+ let last = (' 0x' . token[1 ]) + 0
277+ endif
278+
279+ let token = [first , last ]
280+ if len (alltokens) > 0 && (token[0 ] - 1 == alltokens[-1 ][1 ])
281+ let alltokens[-1 ][1 ] = token[1 ]
282+ else
283+ call add (alltokens, token)
284+ endif
285+
286+ " exclude characters that are in the "ambiguous" or "doublewidth" table
287+ for ambi in s: ambitable
288+ if first >= ambi[0 ] && first <= ambi[1 ]
289+ let first = ambi[1 ] + 1
290+ endif
291+ if last >= ambi[0 ] && last <= ambi[1 ]
292+ let last = ambi[0 ] - 1
293+ endif
294+ endfor
295+ for double in s: doubletable
296+ if first >= double[0 ] && first <= double[1 ]
297+ let first = double[1 ] + 1
298+ endif
299+ if last >= double[0 ] && last <= double[1 ]
300+ let last = double[0 ] - 1
301+ endif
302+ endfor
303+
304+ if first <= last
305+ let token = [first , last ]
306+ if len (widthtokens) > 0 && (token[0 ] - 1 == widthtokens[-1 ][1 ])
307+ let widthtokens[-1 ][1 ] = token[1 ]
308+ else
309+ call add (widthtokens, token)
310+ endif
261311 endif
262- call add (ranges , printf (" \t {0x%04x, 0x%04x}," , " 0x" .token[0 ], " 0x" .token[1 ]))
263312 endfor
313+ let allranges = map (alltokens, ' printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])' )
314+ let widthranges = map (widthtokens, ' printf("\t{0x%04x, 0x%04x},", v:val[0], v:val[1])' )
264315
265316 " New buffer to put the result in.
266317 new
267- exe " file " . a: tableName
268- call setline (1 , " static struct interval " . a: tableName . " [] =" )
318+ exe " file " . a: tableName . ' _all '
319+ call setline (1 , " static struct interval " . a: tableName . " _all [] =" )
269320 call setline (2 , " {" )
270- call append (' $' , ranges )
321+ call append (' $' , allranges)
322+ call setline (' $' , getline (' $' )[:-2 ]) " remove last comma
323+ call setline (line (' $' ) + 1 , " };" )
324+ wincmd p
325+
326+ " New buffer to put the result in.
327+ new
328+ exe " file " . a: tableName . ' _width'
329+ call setline (1 , " static struct interval " . a: tableName . " _width[] =" )
330+ call setline (2 , " {" )
331+ call append (' $' , widthranges)
271332 call setline (' $' , getline (' $' )[:-2 ]) " remove last comma
272333 call setline (line (' $' ) + 1 , " };" )
273334 wincmd p
@@ -307,13 +368,16 @@ edit http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
307368call ParseWidthProps ()
308369
309370" Build the double width table.
371+ let s: doubletable = []
310372call BuildWidthTable (' [WF]' , ' doublewidth' )
311373
312374" Build the ambiguous width table.
375+ let s: ambitable = []
313376call BuildWidthTable (' A' , ' ambiguous' )
314377
315378" Edit the emoji text file. Requires the netrw plugin.
316379edit http:// www.unicode.org/Public/ emoji /3.0/ emoji - data.txt
317380
318381" Build the emoji table. Ver. 1.0 - 6.0
382+ " Must come after the "ambiguous" table
319383call BuildEmojiTable (' ; Emoji\s\+# [1-6]\.[0-9]' , ' emoji' )
0 commit comments