Skip to content

Commit f42b49f

Browse files
committed
successfully add something that could be called sub lexers
1 parent e531665 commit f42b49f

4 files changed

Lines changed: 207 additions & 6 deletions

File tree

crates/vim9-lexer/src/lib.rs

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#![allow(unreachable_code)]
33

44
use std::{
5-
cell::RefCell,
5+
cell::{Cell, RefCell},
66
collections::VecDeque,
77
fmt::{Debug, Display},
88
};
@@ -127,6 +127,13 @@ pub struct Token<'a> {
127127
}
128128

129129
impl Token<'_> {
130+
pub fn owned(from: Token<'_>) -> Token<'static> {
131+
Token {
132+
text: TokenText::Owned(from.text.to_string()),
133+
..from
134+
}
135+
}
136+
130137
pub fn fake() -> Token<'static> {
131138
Token {
132139
kind: TokenKind::Virtual,
@@ -155,6 +162,7 @@ pub enum TokenKind {
155162
// TODO: Is this crazy for this lang??
156163
EndOfLine,
157164
Comment,
165+
Literal,
158166

159167
// Identifiers and literals
160168
Identifier,
@@ -307,16 +315,25 @@ impl TokenKind {
307315
}
308316
}
309317

318+
trait SubLexer {
319+
fn next_token(
320+
&self,
321+
lexer: &Lexer,
322+
) -> Result<(Token<'static>, Option<Box<dyn SubLexer>>)>;
323+
}
324+
325+
type TokenAndLexer = (Token<'static>, Option<Box<dyn SubLexer>>);
326+
310327
pub struct LexerState {
311328
position: usize,
312329
read_position: usize,
313-
314-
sublexer: Option<Box<dyn Fn(&Lexer) -> Result<Token>>>,
315330
}
316331

317332
pub struct Lexer {
318333
state: RefCell<LexerState>,
319334

335+
sublexer: Cell<Option<Box<dyn SubLexer>>>,
336+
320337
/// Vec containing all the chars,
321338
/// this allows easy accessing (and accounts for unicode chars and what not)
322339
chars: Vec<char>,
@@ -357,8 +374,8 @@ impl Lexer {
357374
state: RefCell::new(LexerState {
358375
position: 0,
359376
read_position: 1,
360-
sublexer: None,
361377
}),
378+
sublexer: Cell::new(None),
362379
chars,
363380
lines,
364381
}
@@ -499,6 +516,31 @@ impl Lexer {
499516
})
500517
}
501518

519+
fn read_line(&self) -> Result<Token<'static>> {
520+
let position = self.position();
521+
522+
let mut line = String::new();
523+
loop {
524+
match self.ch() {
525+
Some(&ch) => {
526+
if ch == '\n' {
527+
break;
528+
}
529+
530+
line += &ch.to_string();
531+
self.read_char();
532+
}
533+
None => panic!("OH NO"),
534+
}
535+
}
536+
537+
Ok(dbg!(Token {
538+
kind: TokenKind::Literal,
539+
text: TokenText::Owned(line),
540+
span: self.make_span(position, self.position())?,
541+
}))
542+
}
543+
502544
fn read_until<F>(
503545
&self,
504546
until: char,
@@ -586,6 +628,11 @@ impl Lexer {
586628
}
587629
_ => TokenKind::IsNot,
588630
},
631+
"normal" => {
632+
self.sublexer.set(Some(Box::new(NormalModeParser {})));
633+
634+
TokenKind::Identifier
635+
}
589636
_ => TokenKind::Identifier,
590637
};
591638

@@ -673,8 +720,13 @@ impl Lexer {
673720
}
674721

675722
pub fn next_token(&self) -> Result<Token> {
676-
if let Some(sublexer) = &self.state.borrow().sublexer {
677-
return sublexer(&self);
723+
// Handle sublexers...
724+
// there is some goofiness with all this stuff
725+
if let Some(sublexer) = self.sublexer.take() {
726+
let (tok, next_lexer) = sublexer.next_token(&self)?;
727+
self.sublexer.set(next_lexer);
728+
self.read_char();
729+
return Ok(tok);
678730
}
679731

680732
use TokenKind::*;
@@ -996,6 +1048,33 @@ impl Lexer {
9961048
}
9971049
}
9981050

1051+
/// NormalModeParser is used to parse normal mode commands.
1052+
///
1053+
/// It will just read the rest of the line after normal mode,
1054+
/// consume all the text as one literal, and then continue on afterwards
1055+
struct NormalModeParser {}
1056+
1057+
impl SubLexer for NormalModeParser {
1058+
fn next_token(&self, lexer: &Lexer) -> Result<TokenAndLexer> {
1059+
if let Some(&ch) = lexer.ch() {
1060+
if ch == ' ' {
1061+
lexer.read_char();
1062+
}
1063+
}
1064+
1065+
match lexer.ch() {
1066+
Some(&ch) => match ch {
1067+
'!' => Ok((
1068+
Token::owned(lexer.handle_bang()?.to_owned()),
1069+
Some(Box::new(NormalModeParser {})),
1070+
)),
1071+
_ => Ok((Token::owned(lexer.read_line()?), None)),
1072+
},
1073+
None => unreachable!("don't think this should happen..."),
1074+
}
1075+
}
1076+
}
1077+
9991078
fn is_newline(ch: &char) -> bool {
10001079
*ch == '\n' || *ch == '\0'
10011080
}
@@ -1074,6 +1153,7 @@ mod test {
10741153
snapshot!(test_lambda, "../testdata/snapshots/lambda.vim");
10751154
snapshot!(test_types, "../testdata/snapshots/types.vim");
10761155
snapshot!(test_methods, "../testdata/snapshots/methods.vim");
1156+
snapshot!(test_normal, "../testdata/snapshots/normal.vim");
10771157

10781158
// snapshot!(test_cfilter, "../testdata/snapshots/cfilter.vim");
10791159

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
source: crates/vim9-lexer/src/lib.rs
3+
assertion_line: 1156
4+
expression: snapshot_lexing(contents)
5+
---
6+
vim9script
7+
^^^^^^^^^^ Token(Identifier, "vim9script", (0,0)->(0,10))
8+
Token(EndOfLine, "\n", (0,10)->(0,10))
9+
10+
Token(EndOfLine, "\n", (1,0)->(1,0))
11+
normal! dd
12+
^^^^^^ Token(Identifier, "normal", (2,0)->(2,6))
13+
^ Token(Bang, "!", (2,6)->(2,7))
14+
^^ Token(Literal, "dd", (2,8)->(2,10))
15+
normal dd
16+
^^^^^^ Token(Identifier, "normal", (3,0)->(3,6))
17+
^^ Token(Literal, "dd", (3,7)->(3,9))
18+
:normal! v"_y
19+
^ Token(Colon, ":", (4,0)->(4,1))
20+
^^^^^^ Token(Identifier, "normal", (4,1)->(4,7))
21+
^ Token(Bang, "!", (4,7)->(4,8))
22+
^^^^ Token(Literal, "v\"_y", (4,9)->(4,13))
23+
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
---
2+
source: crates/vim9-lexer/src/lib.rs
3+
assertion_line: 1122
4+
expression: snapshot_lexing(contents)
5+
---
6+
vim9script
7+
^^^^^^^^^^ Token(Identifier, "vim9script", (0,0)->(0,10))
8+
Token(EndOfLine, "\n", (0,10)->(0,10))
9+
10+
Token(EndOfLine, "\n", (1,0)->(1,0))
11+
# Thanks to: https://github.com/yegappan/lsp for some test cases
12+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Token(Comment, "# Thanks to: https://github.com/yegappan/lsp for some test cases", (2,0)->(2,64))
13+
14+
Token(EndOfLine, "\n", (3,0)->(3,0))
15+
# Functions related to handling LSP range selection.
16+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Token(Comment, "# Functions related to handling LSP range selection.", (4,0)->(4,52))
17+
18+
Token(EndOfLine, "\n", (5,0)->(5,0))
19+
import './util.vim'
20+
^^^^^^ Token(Identifier, "import", (6,0)->(6,6))
21+
^^^^^^^^^ Token(SingleQuoteString, "./util.vim", (6,8)->(6,17))
22+
Token(EndOfLine, "\n", (6,19)->(6,19))
23+
24+
Token(EndOfLine, "\n", (7,0)->(7,0))
25+
# Visually (character-wise) select the text in a range
26+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Token(Comment, "# Visually (character-wise) select the text in a range", (8,0)->(8,54))
27+
def SelectText(bnr: number, range: dict<dict<number>>)
28+
^^^ Token(Identifier, "def", (9,0)->(9,3))
29+
^^^^^^^^^^ Token(Identifier, "SelectText", (9,4)->(9,14))
30+
^ Token(LeftParen, "(", (9,14)->(9,15))
31+
^^^ Token(Identifier, "bnr", (9,15)->(9,18))
32+
^^ Token(SpacedColon, ": ", (9,18)->(9,20))
33+
^^^^^^ Token(Identifier, "number", (9,20)->(9,26))
34+
^ Token(Comma, ",", (9,26)->(9,27))
35+
^^^^^ Token(Identifier, "range", (9,28)->(9,33))
36+
^^ Token(SpacedColon, ": ", (9,33)->(9,35))
37+
^^^^ Token(Identifier, "dict", (9,35)->(9,39))
38+
^ Token(AngleLeft, "<", (9,39)->(9,40))
39+
^^^^ Token(Identifier, "dict", (9,40)->(9,44))
40+
^ Token(AngleLeft, "<", (9,44)->(9,45))
41+
^^^^^^ Token(Identifier, "number", (9,45)->(9,51))
42+
^ Token(AngleRight, ">", (9,51)->(9,52))
43+
^ Token(AngleRight, ">", (9,52)->(9,53))
44+
^ Token(RightParen, ")", (9,53)->(9,54))
45+
Token(EndOfLine, "\n", (9,54)->(9,54))
46+
var start_col: number = util.GetLineByteFromPos(bnr, range.start) + 1
47+
^^^ Token(Identifier, "var", (10,2)->(10,5))
48+
^^^^^^^^^ Token(Identifier, "start_col", (10,6)->(10,15))
49+
^^ Token(SpacedColon, ": ", (10,15)->(10,17))
50+
^^^^^^ Token(Identifier, "number", (10,17)->(10,23))
51+
^ Token(Equal, "=", (10,24)->(10,25))
52+
^^^^ Token(Identifier, "util", (10,26)->(10,30))
53+
^ Token(Dot, ".", (10,30)->(10,31))
54+
^^^^^^^^^^^^^^^^^^ Token(Identifier, "GetLineByteFromPos", (10,31)->(10,49))
55+
^ Token(LeftParen, "(", (10,49)->(10,50))
56+
^^^ Token(Identifier, "bnr", (10,50)->(10,53))
57+
^ Token(Comma, ",", (10,53)->(10,54))
58+
^^^^^ Token(Identifier, "range", (10,55)->(10,60))
59+
^ Token(Dot, ".", (10,60)->(10,61))
60+
^^^^^ Token(Identifier, "start", (10,61)->(10,66))
61+
^ Token(RightParen, ")", (10,66)->(10,67))
62+
^ Token(Plus, "+", (10,68)->(10,69))
63+
^ Token(Integer, "1", (10,70)->(10,71))
64+
Token(EndOfLine, "\n", (10,71)->(10,71))
65+
var end_col: number = util.GetLineByteFromPos(bnr, range.end)
66+
^^^ Token(Identifier, "var", (11,2)->(11,5))
67+
^^^^^^^ Token(Identifier, "end_col", (11,6)->(11,13))
68+
^^ Token(SpacedColon, ": ", (11,13)->(11,15))
69+
^^^^^^ Token(Identifier, "number", (11,15)->(11,21))
70+
^ Token(Equal, "=", (11,22)->(11,23))
71+
^^^^ Token(Identifier, "util", (11,24)->(11,28))
72+
^ Token(Dot, ".", (11,28)->(11,29))
73+
^^^^^^^^^^^^^^^^^^ Token(Identifier, "GetLineByteFromPos", (11,29)->(11,47))
74+
^ Token(LeftParen, "(", (11,47)->(11,48))
75+
^^^ Token(Identifier, "bnr", (11,48)->(11,51))
76+
^ Token(Comma, ",", (11,51)->(11,52))
77+
^^^^^ Token(Identifier, "range", (11,53)->(11,58))
78+
^ Token(Dot, ".", (11,58)->(11,59))
79+
^^^ Token(Identifier, "end", (11,59)->(11,62))
80+
^ Token(RightParen, ")", (11,62)->(11,63))
81+
Token(EndOfLine, "\n", (11,63)->(11,63))
82+
83+
Token(EndOfLine, "\n", (12,0)->(12,0))
84+
:normal! v"_y
85+
^ Token(Colon, ":", (13,2)->(13,3))
86+
^^^^^^ Token(Identifier, "normal", (13,3)->(13,9))
87+
^ Token(Bang, "!", (13,9)->(13,10))
88+
^ Token(Identifier, "v", (13,11)->(13,12))
89+
Token(Comment, "", (13,15)->(13,15))
90+
enddef
91+
^^^^^^ Token(Identifier, "enddef", (14,0)->(14,6))
92+
Token(EndOfLine, "\n", (14,6)->(14,6))
93+
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
vim9script
2+
3+
normal! dd
4+
normal dd
5+
:normal! v"_y

0 commit comments

Comments
 (0)