From 56befb91855dd4a2b17f8864e146762f805abe1b Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Mon, 22 Jun 2026 08:02:13 +0900 Subject: [PATCH 01/20] lsp --- Cargo.toml | 2 + rusty_lr_lsp/Cargo.toml | 17 ++ rusty_lr_lsp/src/diagnostics.rs | 279 ++++++++++++++++++++++++++++ rusty_lr_lsp/src/goto_definition.rs | 216 +++++++++++++++++++++ rusty_lr_lsp/src/main.rs | 152 +++++++++++++++ rusty_lr_lsp/src/position.rs | 101 ++++++++++ rusty_lr_parser/src/lib.rs | 6 +- 7 files changed, 770 insertions(+), 3 deletions(-) create mode 100644 rusty_lr_lsp/Cargo.toml create mode 100644 rusty_lr_lsp/src/diagnostics.rs create mode 100644 rusty_lr_lsp/src/goto_definition.rs create mode 100644 rusty_lr_lsp/src/main.rs create mode 100644 rusty_lr_lsp/src/position.rs diff --git a/Cargo.toml b/Cargo.toml index cac7219c..c3f7f5a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,8 +6,10 @@ members = [ "rusty_lr_parser", "rusty_lr_buildscript", "rusty_lr_executable", + "rusty_lr_lsp", "example/calculator", "example/calculator_u8", "example/glr", "example/json", ] + diff --git a/rusty_lr_lsp/Cargo.toml b/rusty_lr_lsp/Cargo.toml new file mode 100644 index 00000000..4affcff8 --- /dev/null +++ b/rusty_lr_lsp/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "rusty_lr_lsp" +version = "0.1.0" +edition = "2021" +description = "LSP server for rusty_lr grammar files" +license = "MIT OR Apache-2.0" + +[dependencies] +lsp-server = "0.7.6" +lsp-types = "0.95.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +proc-macro2 = { version = "1.0.86", features = ["span-locations"] } +quote = "1.0" +syn = { version = "2.0", features = ["full", "extra-traits"] } +rusty_lr_core = { version = "4.2.0", path = "../rusty_lr_core", features = ["builder"] } +rusty_lr_parser = { version = "4.2.1", path = "../rusty_lr_parser" } diff --git a/rusty_lr_lsp/src/diagnostics.rs b/rusty_lr_lsp/src/diagnostics.rs new file mode 100644 index 00000000..572e68e2 --- /dev/null +++ b/rusty_lr_lsp/src/diagnostics.rs @@ -0,0 +1,279 @@ +use lsp_types::{Diagnostic, DiagnosticSeverity, Range}; +use proc_macro2::{Spacing, TokenStream, TokenTree}; +use rusty_lr_parser::grammar::Grammar; +use std::str::FromStr; + +use crate::position::range_to_lsp_range; + +/// Splits a TokenStream by the `%%` separator. +pub fn split_stream(token_stream: TokenStream) -> Result<(TokenStream, TokenStream), ()> { + let mut token_stream = token_stream.into_iter().peekable(); + let mut output_stream = TokenStream::new(); + + while let Some(token) = token_stream.next() { + if let TokenTree::Punct(token) = &token { + if token.as_char() == '%' && token.spacing() == Spacing::Joint { + if let Some(TokenTree::Punct(next)) = token_stream.peek() { + if next.as_char() == '%' && next.spacing() == Spacing::Alone { + token_stream.next(); + let macro_stream: TokenStream = token_stream.collect(); + return Ok((output_stream, macro_stream)); + } + } + } + } + output_stream.extend(std::iter::once(token)); + } + Err(()) +} + +/// Runs the compiler's parser/builder pipeline on the given file content and gathers all diagnostics. +pub fn compile_and_get_diagnostics(content: &str) -> Vec { + // 1. Parse TokenStream from content + let token_stream = match TokenStream::from_str(content) { + Ok(ts) => ts, + Err(e) => { + let range = e.span().byte_range(); + return vec![Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("Lexing/parsing error: {}", e), + related_information: None, + tags: None, + data: None, + }]; + } + }; + + // 2. Split into Rust code and grammar sections + let (_, macro_stream) = match split_stream(token_stream) { + Ok(res) => res, + Err(_) => { + return vec![Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: "Cannot find `%%` to separate the Rust code and the grammar parts" + .to_string(), + related_information: None, + tags: None, + data: None, + }]; + } + }; + + // 3. Parse grammar arguments + let grammar_args = match Grammar::parse_args(macro_stream) { + Ok(args) => args, + Err((e, sm)) => { + let location = e.location(); + let range = sm.get_byterange(&location).unwrap_or(0..0); + return vec![Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: e.short_message(), + related_information: None, + tags: None, + data: None, + }]; + } + }; + + // 4. Collect recovered parser errors + let mut diagnostics = Vec::new(); + for error in &grammar_args.error_recovered { + let range = grammar_args + .span_manager + .get_byterange(&error.location) + .unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("{} (refer to: {})", error.message, error.link), + related_information: None, + tags: None, + data: None, + }); + } + + if !grammar_args.error_recovered.is_empty() { + return diagnostics; + } + + let span_manager = grammar_args.span_manager.clone(); + + // 5. Run arg validation + if let Err(e) = Grammar::arg_check_error(&grammar_args) { + let msg = e.short_message(); + for loc in e.locations() { + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg.clone(), + related_information: None, + tags: None, + data: None, + }); + } + return diagnostics; + } + + // 6. Build the Grammar structure + let mut grammar = match Grammar::from_grammar_args(grammar_args) { + Ok(g) => g, + Err(e) => { + let msg = e.short_message(); + for loc in e.locations() { + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg.clone(), + related_information: None, + tags: None, + data: None, + }); + } + return diagnostics; + } + }; + + // 7. Verify Shift/Reduce and Reduce/Reduce conflicts in non-GLR mode + let diags_collector = grammar.build_grammar(); + if !grammar.glr { + // Shift/Reduce conflicts + for ((term, shift_rules, _), reduce_rules) in diags_collector.shift_reduce_conflicts { + let term_str = grammar.class_pretty_name_list(term, 5); + let message = format!( + "Shift/Reduce conflict detected with terminal(class): {}", + term_str + ); + + for shift_rule in shift_rules { + if let Some((nonterm, local_rule)) = + grammar.get_rule_by_id(shift_rule.production_idx) + { + let loc = nonterm.rules[local_rule].location(); + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("(Shift) {}", message), + related_information: None, + tags: None, + data: None, + }); + } + } + for (reduce_rule, _) in reduce_rules { + if let Some((nonterm, local_rule)) = grammar.get_rule_by_id(reduce_rule) { + let loc = nonterm.rules[local_rule].location(); + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("(Reduce) {}", message), + related_information: None, + tags: None, + data: None, + }); + } + } + } + + // Reduce/Reduce conflicts + for (reduce_rules, reduce_terms) in diags_collector.reduce_reduce_conflicts { + let mut terms = Vec::new(); + for term in reduce_terms { + terms.push(grammar.class_pretty_name_list(term, 5)); + } + let message = format!( + "Reduce/Reduce conflict detected with terminals: {}", + terms.join(", ") + ); + + for (reduce_rule, _) in reduce_rules { + if let Some((nonterm, local_rule)) = grammar.get_rule_by_id(reduce_rule) { + let loc = nonterm.rules[local_rule].location(); + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: message.clone(), + related_information: None, + tags: None, + data: None, + }); + } + } + } + } + + // 8. Collect Warnings + for warning in &grammar.warnings { + if grammar.is_warning_allowed(warning) { + continue; + } + let msg = warning.short_message(&grammar); + let locs = warning.locations(); + if locs.is_empty() { + let sep_idx = content.find("%%").unwrap_or(0); + let range = sep_idx..(sep_idx + 2); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg, + related_information: None, + tags: None, + data: None, + }); + } else { + for loc in locs { + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg.clone(), + related_information: None, + tags: None, + data: None, + }); + } + } + } + + diagnostics +} diff --git a/rusty_lr_lsp/src/goto_definition.rs b/rusty_lr_lsp/src/goto_definition.rs new file mode 100644 index 00000000..236822ae --- /dev/null +++ b/rusty_lr_lsp/src/goto_definition.rs @@ -0,0 +1,216 @@ +use lsp_types::{Position, Range}; +use proc_macro2::TokenStream; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{GrammarArgs, Located, PatternArgs, TerminalSetItem}; +use std::str::FromStr; + +use crate::diagnostics::split_stream; +use crate::position::{position_to_offset, range_to_lsp_range}; + +/// Traverses the AST of GrammarArgs to collect all Located instances. +fn collect_located(args: &GrammarArgs) -> Vec> { + let mut collected = Vec::new(); + + // 1. %start names + for start_name in &args.start_rule_name { + collected.push(start_name.clone()); + } + + // 2. %token definitions + for (t_name, _) in &args.terminals { + collected.push(t_name.clone()); + } + + // 3. %allow diagnostics names + for (allow_name, _) in &args.allowed_diagnostics { + collected.push(allow_name.clone()); + } + + // 4. Rule definitions + for rule in &args.rules { + collected.push(rule.name.clone()); + for line in &rule.rule_lines { + for (opt_loc, pattern) in &line.tokens { + if let Some(loc) = opt_loc { + collected.push(loc.clone()); + } + collect_pattern_located(pattern, &mut collected); + } + } + } + + collected +} + +/// Recursively traverses a PatternArgs structure to collect Located instances. +fn collect_pattern_located(pattern: &PatternArgs, collected: &mut Vec>) { + match pattern { + PatternArgs::Ident(ident) => { + collected.push(ident.clone()); + } + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => { + collect_pattern_located(base, collected); + } + PatternArgs::TerminalSet(ts) => { + for item in &ts.items { + match item { + TerminalSetItem::Terminal(ident) => { + collected.push(ident.clone()); + } + TerminalSetItem::Range(first, last) => { + collected.push(first.clone()); + collected.push(last.clone()); + } + _ => {} + } + } + } + PatternArgs::Group { alternatives, .. } => { + for alt in alternatives { + for pat in alt { + collect_pattern_located(pat, collected); + } + } + } + PatternArgs::Minus { base, exclude } => { + collect_pattern_located(base, collected); + collect_pattern_located(exclude, collected); + } + PatternArgs::Sep { + base, delimiter, .. + } => { + collect_pattern_located(base, collected); + collect_pattern_located(delimiter, collected); + } + _ => {} + } +} + +/// Locates the definition of the symbol under the cursor. +pub fn find_definition(content: &str, target_pos: Position) -> Option { + let offset = position_to_offset(content, target_pos); + + // Parse the entire document into TokenStream + let token_stream = TokenStream::from_str(content).ok()?; + let (_, macro_stream) = split_stream(token_stream).ok()?; + let grammar_args = Grammar::parse_args(macro_stream).ok()?; + let span_manager = grammar_args.span_manager.clone(); + + // Collect all located identifier strings in the AST + let all_located = collect_located(&grammar_args); + + // Find the one that contains the click offset + let clicked = all_located.iter().find(|loc| { + if let Some(range) = span_manager.get_byterange(&loc.location()) { + range.contains(&offset) + } else { + false + } + })?; + + // Look up the definition by name + let name = clicked.value(); + + // 1. Check rule definitions + if let Some(rule) = grammar_args.rules.iter().find(|r| r.name.value == *name) { + let def_range = span_manager.get_byterange(&rule.name.location())?; + return Some(range_to_lsp_range(content, def_range)); + } + + // 2. Check token definitions + if let Some((t_name, _)) = grammar_args + .terminals + .iter() + .find(|(t, _)| t.value == *name) + { + let def_range = span_manager.get_byterange(&t_name.location())?; + return Some(range_to_lsp_range(content, def_range)); + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype Token; +%start E; + +%token num Token::Num(_); +%token plus Token::Plus; + +E(_) : E plus num { 0 } + | num { 0 } + ; +"#; + + const MOCK_GRAMMAR_WITH_ERROR: &str = r#" +%% +%start E; +E : num plus error ; +"#; + + #[test] + fn test_split_stream() { + let ts = TokenStream::from_str(MOCK_GRAMMAR).unwrap(); + let (output, macro_stream) = split_stream(ts).unwrap(); + + let output_str = output.to_string(); + let macro_str = macro_stream.to_string(); + + assert!(output_str.contains("enum Token")); + assert!(macro_str.contains("tokentype")); + assert!(macro_str.contains("start E")); + } + + #[test] + fn test_diagnostics() { + // Test valid grammar diagnostics (should be empty or only warnings about unused tokens/etc if any) + let diags = crate::diagnostics::compile_and_get_diagnostics(MOCK_GRAMMAR); + // Under normal circumstances, MOCK_GRAMMAR is valid + for diag in &diags { + eprintln!("Diag: {:?}", diag.message); + } + + // Test invalid grammar diagnostics + let diags_err = crate::diagnostics::compile_and_get_diagnostics(MOCK_GRAMMAR_WITH_ERROR); + assert!(!diags_err.is_empty()); + assert!(diags_err + .iter() + .any(|d| d.message.contains("not defined") || d.message.contains("error"))); + } + + #[test] + fn test_goto_definition() { + // Find position of the 'plus' reference in rule "E : E plus num" + // Let's search for "plus num" inside the string + let index = MOCK_GRAMMAR.find("plus num").unwrap(); + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let def_range = find_definition(MOCK_GRAMMAR, pos).unwrap(); + + // The definition should point to "%token plus Token::Plus;" + let def_offset = crate::position::position_to_offset(MOCK_GRAMMAR, def_range.start); + let def_substring = &MOCK_GRAMMAR[def_offset..]; + assert!(def_substring.starts_with("plus")); + + // It should be on the line "%token plus Token::Plus;" + let token_def_index = MOCK_GRAMMAR.find("%token plus").unwrap(); + let expected_start_pos = + crate::position::offset_to_position(MOCK_GRAMMAR, token_def_index + 7); // start of 'plus' + assert_eq!(def_range.start, expected_start_pos); + } +} diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs new file mode 100644 index 00000000..4eba6803 --- /dev/null +++ b/rusty_lr_lsp/src/main.rs @@ -0,0 +1,152 @@ +use lsp_server::{Connection, Message, Notification, Request, RequestId, Response}; +use lsp_types::{ + notification::{ + DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, + }, + request::GotoDefinition, + GotoDefinitionResponse, InitializeParams, Location, OneOf, PublishDiagnosticsParams, + ServerCapabilities, TextDocumentSyncCapability, TextDocumentSyncKind, Url, +}; +use std::collections::HashMap; +use std::error::Error; + +// Import the traits providing `METHOD` constant: +use lsp_types::notification::Notification as LspNotification; +use lsp_types::request::Request as LspRequest; + +mod diagnostics; +mod goto_definition; +mod position; + +fn main() -> Result<(), Box> { + eprintln!("Starting RustyLR LSP server..."); + + // Create stdio transport connection + let (connection, io_threads) = Connection::stdio(); + + // Advertise full document sync and definition provider capabilities + let server_capabilities = serde_json::to_value(&ServerCapabilities { + text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), + definition_provider: Some(OneOf::Left(true)), + ..Default::default() + })?; + + let initialization_params = connection.initialize(server_capabilities)?; + let _params: InitializeParams = serde_json::from_value(initialization_params)?; + + eprintln!("RustyLR LSP server initialized successfully."); + + // Store open document contents + let mut documents: HashMap = HashMap::new(); + + // Main event loop + for msg in &connection.receiver { + match msg { + Message::Request(req) => { + if connection.handle_shutdown(&req)? { + return Ok(()); + } + + if req.method == GotoDefinition::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting goto definition request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position_params.text_document.uri; + let position = params.text_document_position_params.position; + + let mut response = Response::new_ok(id.clone(), serde_json::Value::Null); + if let Some(content) = documents.get(&uri) { + if let Some(range) = goto_definition::find_definition(content, position) { + let loc = Location::new(uri.clone(), range); + response = Response::new_ok(id, GotoDefinitionResponse::Scalar(loc)); + } + } + connection.sender.send(Message::Response(response))?; + } + } + Message::Response(_resp) => {} + Message::Notification(not) => { + if not.method == DidOpenTextDocument::METHOD { + let params = match cast_notification::(not) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting didOpen notification: {:?}", e); + continue; + } + }; + let uri = params.text_document.uri; + let text = params.text_document.text; + + documents.insert(uri.clone(), text.clone()); + publish_diagnostics(&connection, uri, &text); + } else if not.method == DidChangeTextDocument::METHOD { + let params = match cast_notification::(not) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting didChange notification: {:?}", e); + continue; + } + }; + let uri = params.text_document.uri; + + if let Some(change) = params.content_changes.into_iter().next() { + documents.insert(uri.clone(), change.text.clone()); + publish_diagnostics(&connection, uri, &change.text); + } + } else if not.method == DidSaveTextDocument::METHOD { + let params = match cast_notification::(not) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting didSave notification: {:?}", e); + continue; + } + }; + let uri = params.text_document.uri; + if let Some(text) = documents.get(&uri) { + publish_diagnostics(&connection, uri, text); + } + } + } + } + } + + io_threads.join()?; + eprintln!("RustyLR LSP server stopped."); + Ok(()) +} + +fn publish_diagnostics(connection: &Connection, uri: Url, content: &str) { + let diags = diagnostics::compile_and_get_diagnostics(content); + let params = PublishDiagnosticsParams { + uri, + diagnostics: diags, + version: None, + }; + let notification = Notification::new(PublishDiagnostics::METHOD.to_string(), params); + let _ = connection.sender.send(Message::Notification(notification)); +} + +fn cast_request( + req: Request, +) -> Result<(RequestId, R::Params), lsp_server::ExtractError> +where + R: lsp_types::request::Request, + R::Params: serde::de::DeserializeOwned, +{ + req.extract(R::METHOD) +} + +fn cast_notification( + not: Notification, +) -> Result> +where + N: lsp_types::notification::Notification, + N::Params: serde::de::DeserializeOwned, +{ + not.extract(N::METHOD) +} diff --git a/rusty_lr_lsp/src/position.rs b/rusty_lr_lsp/src/position.rs new file mode 100644 index 00000000..f07696c2 --- /dev/null +++ b/rusty_lr_lsp/src/position.rs @@ -0,0 +1,101 @@ +use lsp_types::{Position, Range}; + +/// Converts a 0-indexed byte offset in `content` into an LSP `Position` (line, character). +/// The LSP character index represents the UTF-16 code unit offset on that line. +pub fn offset_to_position(content: &str, offset: usize) -> Position { + let mut line = 0; + let mut character = 0; + let mut current_offset = 0; + + for c in content.chars() { + if current_offset >= offset { + break; + } + let char_len = c.len_utf8(); + if current_offset + char_len > offset { + break; + } + current_offset += char_len; + + if c == '\n' { + line += 1; + character = 0; + } else { + character += c.len_utf16() as u32; + } + } + Position::new(line, character) +} + +/// Converts an LSP `Position` (line, character) back into a 0-indexed byte offset in `content`. +pub fn position_to_offset(content: &str, pos: Position) -> usize { + let mut line = 0; + let mut character = 0; + let mut byte_offset = 0; + + for c in content.chars() { + if line == pos.line && character >= pos.character { + break; + } + byte_offset += c.len_utf8(); + + if c == '\n' { + line += 1; + character = 0; + } else { + character += c.len_utf16() as u32; + } + } + byte_offset +} + +/// Converts a `std::ops::Range` byte range into an LSP `Range`. +pub fn range_to_lsp_range(content: &str, range: std::ops::Range) -> Range { + Range::new( + offset_to_position(content, range.start), + offset_to_position(content, range.end), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_position_conversion() { + let content = "hello\nworld\n안녕 하세요\n😀 hello"; + + // Test ASCII character + let pos = offset_to_position(content, 0); + assert_eq!(pos.line, 0); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 0); + + let pos = offset_to_position(content, 6); // 'w' in 'world' + assert_eq!(pos.line, 1); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 6); + + // Test multi-byte UTF-8 character (Korean '안' is 3 bytes in UTF-8, 1 code unit in UTF-16) + let pos = offset_to_position(content, 12); // start of '안' + assert_eq!(pos.line, 2); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 12); + + let pos = offset_to_position(content, 15); // after '안', start of '녕' + assert_eq!(pos.line, 2); + assert_eq!(pos.character, 1); + assert_eq!(position_to_offset(content, pos), 15); + + // Test Emoji (😀 is 4 bytes in UTF-8, 2 code units in UTF-16) + let pos = offset_to_position(content, 29); // start of emoji + assert_eq!(pos.line, 3); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 29); + + let pos = offset_to_position(content, 33); // after emoji, before space + assert_eq!(pos.line, 3); + assert_eq!(pos.character, 2); + assert_eq!(position_to_offset(content, pos), 33); + } +} diff --git a/rusty_lr_parser/src/lib.rs b/rusty_lr_parser/src/lib.rs index 68dcaeff..1181de61 100644 --- a/rusty_lr_parser/src/lib.rs +++ b/rusty_lr_parser/src/lib.rs @@ -16,9 +16,9 @@ pub mod terminal_info; pub(crate) mod terminalset; pub mod utils; -pub use parser::args::TableLayout; -/// Re-export Location for use by external crates (e.g. rusty_lr_buildscript) -pub use parser::location::Location; +pub use parser::args::{GrammarArgs, PatternArgs, RuleDefArgs, RuleLineArgs, TableLayout}; +pub use parser::location::{Located, Location}; +pub use terminalset::{TerminalSet, TerminalSetItem}; /// This, `rusty_lr_parser` is designed to generate a code, that will be relied on `rusty_lr`. /// From 611ca4f9f81d7ad82ad3b234a5a7edc1750d6626 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Mon, 22 Jun 2026 20:33:23 +0900 Subject: [PATCH 02/20] test working --- editors/vscode-rustylr/.gitignore | 2 + editors/vscode-rustylr/README.md | 68 +++++++ editors/vscode-rustylr/extension.js | 183 ++++++++++++++++++ .../language-configuration.json | 74 +++++++ editors/vscode-rustylr/package-lock.json | 161 +++++++++++++++ editors/vscode-rustylr/package.json | 97 ++++++++++ .../syntaxes/rustylr.tmLanguage.json | 80 ++++++++ rusty_lr_lsp/src/diagnostics.rs | 5 + rusty_lr_lsp/src/main.rs | 57 +++++- 9 files changed, 719 insertions(+), 8 deletions(-) create mode 100644 editors/vscode-rustylr/.gitignore create mode 100644 editors/vscode-rustylr/README.md create mode 100644 editors/vscode-rustylr/extension.js create mode 100644 editors/vscode-rustylr/language-configuration.json create mode 100644 editors/vscode-rustylr/package-lock.json create mode 100644 editors/vscode-rustylr/package.json create mode 100644 editors/vscode-rustylr/syntaxes/rustylr.tmLanguage.json diff --git a/editors/vscode-rustylr/.gitignore b/editors/vscode-rustylr/.gitignore new file mode 100644 index 00000000..28a78a7c --- /dev/null +++ b/editors/vscode-rustylr/.gitignore @@ -0,0 +1,2 @@ +node_modules/ +*.vsix diff --git a/editors/vscode-rustylr/README.md b/editors/vscode-rustylr/README.md new file mode 100644 index 00000000..78239826 --- /dev/null +++ b/editors/vscode-rustylr/README.md @@ -0,0 +1,68 @@ +# RustyLR VSCode Extension + +Temporary VSCode extension client for the `rusty_lr_lsp` server in this repository. + +## Run From This Repository + +1. Build or check the language server once: + + ```bash + cargo check -p rusty_lr_lsp + ``` + +2. Install the extension client dependencies: + + ```bash + cd editors/vscode-rustylr + npm install + ``` + +3. Open this extension folder in VSCode: + + ```bash + code editors/vscode-rustylr + ``` + +4. Press `F5` and choose `VS Code Extension Development` if prompted. + +5. In the Extension Development Host window, open the RustyLR repository folder and then open a grammar file such as `example/calculator/src/parser.rs`, or `src/grammar.rs` in a downstream project. + +The extension starts the already-built server binary when it exists: + +```bash +/home/ehwan/workspace/RustyLR/target/debug/rusty_lr_lsp +``` + +If that binary does not exist yet, it falls back to `cargo run --quiet --package rusty_lr_lsp`. + +The extension searches upward for the RustyLR repository root and uses that as the server working directory. You can override the command, arguments, and working directory with VSCode settings: + +```json +{ + "rustylr.server.command": "/home/ehwan/workspace/RustyLR/target/debug/rusty_lr_lsp", + "rustylr.server.args": [], + "rustylr.server.cwd": "/home/ehwan/workspace/RustyLR" +} +``` + +## File Matching + +The extension contributes a `rustylr` language mode for: + +- `grammar.rs` +- `src/parser.rs` +- `*.rustylr.rs` +- `*.rustylr` +- `*.lr` + +It also sends those file patterns to the LSP server even when the VSCode language mode is not manually changed. + +For a differently named grammar file, add it to: + +```json +{ + "rustylr.server.documentPatterns": ["**/grammar.rs", "**/src/parser.rs", "**/my_parser_input.rs"] +} +``` + +Then run `RustyLR: Restart Language Server` from the command palette. diff --git a/editors/vscode-rustylr/extension.js b/editors/vscode-rustylr/extension.js new file mode 100644 index 00000000..3f17e367 --- /dev/null +++ b/editors/vscode-rustylr/extension.js @@ -0,0 +1,183 @@ +const fs = require("fs"); +const path = require("path"); +const vscode = require("vscode"); +const { LanguageClient, TransportKind } = require("vscode-languageclient/node"); + +let client; +let outputChannel; + +async function activate(context) { + outputChannel = vscode.window.createOutputChannel("RustyLR LSP"); + context.subscriptions.push(outputChannel); + + context.subscriptions.push( + vscode.commands.registerCommand("rustylr.restartServer", async () => { + await stopClient(); + try { + await startClient(context); + vscode.window.showInformationMessage("RustyLR language server restarted."); + } catch (error) { + reportStartError(error); + } + }) + ); + + try { + await startClient(context); + } catch (error) { + reportStartError(error); + } +} + +async function deactivate() { + await stopClient(); +} + +async function startClient(context) { + const config = vscode.workspace.getConfiguration("rustylr.server"); + const workspaceFolder = + vscode.workspace.workspaceFolders && vscode.workspace.workspaceFolders.length > 0 + ? vscode.workspace.workspaceFolders[0].uri.fsPath + : undefined; + const repoRoot = findRustyLrRoot(workspaceFolder) || findRustyLrRoot(context.extensionPath); + + const configuredCwd = config.get("cwd", ""); + const cwd = configuredCwd + ? expandPath(configuredCwd, { workspaceFolder, extensionPath: context.extensionPath, repoRoot }) + : repoRoot || workspaceFolder || context.extensionPath; + + const configuredCommand = config.get("command", ""); + const configuredArgs = config.get("args", []); + const server = resolveServerCommand(configuredCommand, configuredArgs, { + workspaceFolder, + extensionPath: context.extensionPath, + repoRoot, + cwd, + }); + + const patterns = config.get("documentPatterns", [ + "**/grammar.rs", + "**/src/parser.rs", + "**/*.rustylr.rs", + "**/*.rustylr", + "**/*.lr", + ]); + + const documentSelector = [ + { scheme: "file", language: "rustylr" }, + ...patterns.map((pattern) => ({ scheme: "file", pattern })), + ]; + + outputChannel.appendLine(`Starting RustyLR LSP: ${server.command} ${server.args.join(" ")}`); + outputChannel.appendLine(`RustyLR LSP cwd: ${cwd}`); + + client = new LanguageClient( + "rustylr", + "RustyLR Language Server", + { + command: server.command, + args: server.args, + options: { cwd }, + transport: TransportKind.stdio, + }, + { + documentSelector, + outputChannel, + synchronize: { + configurationSection: "rustylr", + }, + } + ); + + await client.start(); +} + +async function stopClient() { + if (!client) { + return; + } + + const activeClient = client; + client = undefined; + try { + await activeClient.stop(); + } catch (error) { + const message = error && error.message ? error.message : String(error); + if (outputChannel) { + outputChannel.appendLine(`Ignoring RustyLR LSP stop error: ${message}`); + } + } +} + +function expandPath(value, vars) { + return value + .split("${workspaceFolder}") + .join(vars.workspaceFolder || "") + .split("${extensionPath}") + .join(vars.extensionPath || "") + .split("${repoRoot}") + .join(vars.repoRoot || ""); +} + +function resolveServerCommand(configuredCommand, configuredArgs, vars) { + if (configuredCommand) { + return { + command: expandPath(configuredCommand, vars), + args: configuredArgs.map((arg) => expandPath(arg, vars)), + }; + } + + const binaryName = process.platform === "win32" ? "rusty_lr_lsp.exe" : "rusty_lr_lsp"; + const candidates = [ + vars.repoRoot && path.join(vars.repoRoot, "target", "debug", binaryName), + vars.repoRoot && path.join(vars.repoRoot, "target", "release", binaryName), + ].filter(Boolean); + + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return { command: candidate, args: [] }; + } + } + + return { + command: "cargo", + args: ["run", "--quiet", "--package", "rusty_lr_lsp"], + }; +} + +function findRustyLrRoot(startPath) { + if (!startPath) { + return undefined; + } + + let current = fs.statSync(startPath).isDirectory() ? startPath : path.dirname(startPath); + while (true) { + if ( + fs.existsSync(path.join(current, "Cargo.toml")) && + fs.existsSync(path.join(current, "rusty_lr_lsp", "Cargo.toml")) + ) { + return current; + } + + const parent = path.dirname(current); + if (parent === current) { + return undefined; + } + current = parent; + } +} + +function reportStartError(error) { + const message = error && error.stack ? error.stack : String(error); + if (outputChannel) { + outputChannel.appendLine("Failed to start RustyLR LSP."); + outputChannel.appendLine(message); + outputChannel.show(true); + } + vscode.window.showErrorMessage("Failed to start RustyLR language server. See Output: RustyLR LSP."); +} + +module.exports = { + activate, + deactivate, +}; diff --git a/editors/vscode-rustylr/language-configuration.json b/editors/vscode-rustylr/language-configuration.json new file mode 100644 index 00000000..0893eba1 --- /dev/null +++ b/editors/vscode-rustylr/language-configuration.json @@ -0,0 +1,74 @@ +{ + "comments": { + "lineComment": "//", + "blockComment": [ + "/*", + "*/" + ] + }, + "brackets": [ + [ + "{", + "}" + ], + [ + "[", + "]" + ], + [ + "(", + ")" + ] + ], + "autoClosingPairs": [ + { + "open": "{", + "close": "}" + }, + { + "open": "[", + "close": "]" + }, + { + "open": "(", + "close": ")" + }, + { + "open": "\"", + "close": "\"", + "notIn": [ + "string" + ] + }, + { + "open": "'", + "close": "'", + "notIn": [ + "string", + "comment" + ] + } + ], + "surroundingPairs": [ + [ + "{", + "}" + ], + [ + "[", + "]" + ], + [ + "(", + ")" + ], + [ + "\"", + "\"" + ], + [ + "'", + "'" + ] + ] +} diff --git a/editors/vscode-rustylr/package-lock.json b/editors/vscode-rustylr/package-lock.json new file mode 100644 index 00000000..243f6479 --- /dev/null +++ b/editors/vscode-rustylr/package-lock.json @@ -0,0 +1,161 @@ +{ + "name": "rustylr-vscode", + "version": "0.0.1", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "rustylr-vscode", + "version": "0.0.1", + "license": "MIT OR Apache-2.0", + "dependencies": { + "vscode-languageclient": "^9.0.1" + }, + "devDependencies": { + "@types/vscode": "1.84.0" + }, + "engines": { + "vscode": "^1.84.0" + } + }, + "node_modules/@types/vscode": { + "version": "1.84.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.84.0.tgz", + "integrity": "sha512-lCGOSrhT3cL+foUEqc8G1PVZxoDbiMmxgnUZZTEnHF4mC47eKAUtBGAuMLY6o6Ua8PAuNCoKXbqPmJd1JYnQfg==", + "dev": true + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "node_modules/brace-expansion": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.1.tgz", + "integrity": "sha512-WR1cURNjuvBLMZBMbqM0UoE+WAfdUcEV1ccD8PVBVOI+Z3ND4+SZbN8RsfT2bMuG1qwz5RFvPukSZm5fF2D5eA==", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/minimatch": { + "version": "5.1.9", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz", + "integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/semver": { + "version": "7.8.5", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.5.tgz", + "integrity": "sha512-Y7/KDsb8LjooZpwaqGyulO6DQlksgCncchHGk+sZIY4SBvUocMBEFH5Ur1fI4dV+Jvl0w6cjvucaIi40puRioA==", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/vscode-languageclient": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz", + "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==", + "dependencies": { + "minimatch": "^5.1.0", + "semver": "^7.3.7", + "vscode-languageserver-protocol": "3.17.5" + }, + "engines": { + "vscode": "^1.82.0" + } + }, + "node_modules/vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "dependencies": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "node_modules/vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==" + } + }, + "dependencies": { + "@types/vscode": { + "version": "1.84.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.84.0.tgz", + "integrity": "sha512-lCGOSrhT3cL+foUEqc8G1PVZxoDbiMmxgnUZZTEnHF4mC47eKAUtBGAuMLY6o6Ua8PAuNCoKXbqPmJd1JYnQfg==", + "dev": true + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "brace-expansion": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.1.tgz", + "integrity": "sha512-WR1cURNjuvBLMZBMbqM0UoE+WAfdUcEV1ccD8PVBVOI+Z3ND4+SZbN8RsfT2bMuG1qwz5RFvPukSZm5fF2D5eA==", + "requires": { + "balanced-match": "^1.0.0" + } + }, + "minimatch": { + "version": "5.1.9", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz", + "integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==", + "requires": { + "brace-expansion": "^2.0.1" + } + }, + "semver": { + "version": "7.8.5", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.5.tgz", + "integrity": "sha512-Y7/KDsb8LjooZpwaqGyulO6DQlksgCncchHGk+sZIY4SBvUocMBEFH5Ur1fI4dV+Jvl0w6cjvucaIi40puRioA==" + }, + "vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==" + }, + "vscode-languageclient": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz", + "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==", + "requires": { + "minimatch": "^5.1.0", + "semver": "^7.3.7", + "vscode-languageserver-protocol": "3.17.5" + } + }, + "vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "requires": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==" + } + } +} diff --git a/editors/vscode-rustylr/package.json b/editors/vscode-rustylr/package.json new file mode 100644 index 00000000..bed6909d --- /dev/null +++ b/editors/vscode-rustylr/package.json @@ -0,0 +1,97 @@ +{ + "name": "rustylr-vscode", + "displayName": "RustyLR", + "description": "Temporary VSCode extension client for the RustyLR language server.", + "version": "0.0.1", + "publisher": "rustylr", + "license": "MIT OR Apache-2.0", + "engines": { + "vscode": "^1.84.0" + }, + "categories": [ + "Programming Languages" + ], + "main": "./extension.js", + "activationEvents": [ + "onLanguage:rustylr", + "workspaceContains:**/grammar.rs", + "workspaceContains:**/src/parser.rs", + "onCommand:rustylr.restartServer" + ], + "contributes": { + "commands": [ + { + "command": "rustylr.restartServer", + "title": "RustyLR: Restart Language Server" + } + ], + "configuration": { + "title": "RustyLR", + "properties": { + "rustylr.server.command": { + "type": "string", + "default": "", + "description": "Command used to start the RustyLR LSP server. Empty means auto-detect target/debug/rusty_lr_lsp and fall back to cargo run." + }, + "rustylr.server.args": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "Arguments passed to rustylr.server.command." + }, + "rustylr.server.cwd": { + "type": "string", + "default": "", + "description": "Working directory for the RustyLR LSP server. Empty means the extension will use the RustyLR workspace root when it can find one." + }, + "rustylr.server.documentPatterns": { + "type": "array", + "items": { + "type": "string" + }, + "default": [ + "**/grammar.rs", + "**/src/parser.rs", + "**/*.rustylr.rs", + "**/*.rustylr", + "**/*.lr" + ], + "description": "Additional file globs handled by the RustyLR LSP server." + } + } + }, + "languages": [ + { + "id": "rustylr", + "aliases": [ + "RustyLR", + "rustylr" + ], + "extensions": [ + ".rustylr", + ".lr" + ], + "filenamePatterns": [ + "grammar.rs", + "*.rustylr.rs" + ], + "configuration": "./language-configuration.json" + } + ], + "grammars": [ + { + "language": "rustylr", + "scopeName": "source.rustylr", + "path": "./syntaxes/rustylr.tmLanguage.json" + } + ] + }, + "dependencies": { + "vscode-languageclient": "^9.0.1" + }, + "devDependencies": { + "@types/vscode": "1.84.0" + } +} diff --git a/editors/vscode-rustylr/syntaxes/rustylr.tmLanguage.json b/editors/vscode-rustylr/syntaxes/rustylr.tmLanguage.json new file mode 100644 index 00000000..4682b3b4 --- /dev/null +++ b/editors/vscode-rustylr/syntaxes/rustylr.tmLanguage.json @@ -0,0 +1,80 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "RustyLR", + "scopeName": "source.rustylr", + "patterns": [ + { + "include": "#comments" + }, + { + "include": "#directives" + }, + { + "include": "#punctuation" + }, + { + "include": "#strings" + } + ], + "repository": { + "comments": { + "patterns": [ + { + "name": "comment.line.double-slash.rustylr", + "match": "//.*$" + }, + { + "name": "comment.block.rustylr", + "begin": "/\\*", + "end": "\\*/" + } + ] + }, + "directives": { + "patterns": [ + { + "name": "keyword.control.directive.rustylr", + "match": "%(?:allow|eof|error|errortype|fallback|glr|lalr|layout|left|location|nonassoc|prec|right|start|token|tokentype)\\b" + }, + { + "name": "keyword.operator.section.rustylr", + "match": "%%" + } + ] + }, + "punctuation": { + "patterns": [ + { + "name": "punctuation.separator.production.rustylr", + "match": "[:;|]" + } + ] + }, + "strings": { + "patterns": [ + { + "name": "string.quoted.double.rustylr", + "begin": "\"", + "end": "\"", + "patterns": [ + { + "name": "constant.character.escape.rustylr", + "match": "\\\\." + } + ] + }, + { + "name": "string.quoted.single.rustylr", + "begin": "'", + "end": "'", + "patterns": [ + { + "name": "constant.character.escape.rustylr", + "match": "\\\\." + } + ] + } + ] + } + } +} diff --git a/rusty_lr_lsp/src/diagnostics.rs b/rusty_lr_lsp/src/diagnostics.rs index 572e68e2..4169ac18 100644 --- a/rusty_lr_lsp/src/diagnostics.rs +++ b/rusty_lr_lsp/src/diagnostics.rs @@ -156,6 +156,11 @@ pub fn compile_and_get_diagnostics(content: &str) -> Vec { } }; + if grammar.optimize { + grammar.optimize(25); + } + grammar.builder = grammar.create_builder(); + // 7. Verify Shift/Reduce and Reduce/Reduce conflicts in non-GLR mode let diags_collector = grammar.build_grammar(); if !grammar.glr { diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index 4eba6803..0492461f 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -4,11 +4,13 @@ use lsp_types::{ DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, }, request::GotoDefinition, - GotoDefinitionResponse, InitializeParams, Location, OneOf, PublishDiagnosticsParams, - ServerCapabilities, TextDocumentSyncCapability, TextDocumentSyncKind, Url, + Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Location, OneOf, + PublishDiagnosticsParams, Range, ServerCapabilities, TextDocumentSyncCapability, + TextDocumentSyncKind, Url, }; use std::collections::HashMap; use std::error::Error; +use std::panic::{catch_unwind, set_hook, take_hook, AssertUnwindSafe}; // Import the traits providing `METHOD` constant: use lsp_types::notification::Notification as LspNotification; @@ -31,8 +33,7 @@ fn main() -> Result<(), Box> { ..Default::default() })?; - let initialization_params = connection.initialize(server_capabilities)?; - let _params: InitializeParams = serde_json::from_value(initialization_params)?; + connection.initialize(server_capabilities)?; eprintln!("RustyLR LSP server initialized successfully."); @@ -61,9 +62,18 @@ fn main() -> Result<(), Box> { let mut response = Response::new_ok(id.clone(), serde_json::Value::Null); if let Some(content) = documents.get(&uri) { - if let Some(range) = goto_definition::find_definition(content, position) { - let loc = Location::new(uri.clone(), range); - response = Response::new_ok(id, GotoDefinitionResponse::Scalar(loc)); + match catch_lsp_panic(|| { + goto_definition::find_definition(content, position) + }) { + Ok(Some(range)) => { + let loc = Location::new(uri.clone(), range); + response = + Response::new_ok(id, GotoDefinitionResponse::Scalar(loc)); + } + Ok(None) => {} + Err(message) => { + eprintln!("RustyLR goto-definition panicked: {message}"); + } } } connection.sender.send(Message::Response(response))?; @@ -121,7 +131,20 @@ fn main() -> Result<(), Box> { } fn publish_diagnostics(connection: &Connection, uri: Url, content: &str) { - let diags = diagnostics::compile_and_get_diagnostics(content); + let diags = match catch_lsp_panic(|| diagnostics::compile_and_get_diagnostics(content)) { + Ok(diags) => diags, + Err(message) => vec![Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("RustyLR compiler panicked: {message}"), + related_information: None, + tags: None, + data: None, + }], + }; let params = PublishDiagnosticsParams { uri, diagnostics: diags, @@ -131,6 +154,24 @@ fn publish_diagnostics(connection: &Connection, uri: Url, content: &str) { let _ = connection.sender.send(Message::Notification(notification)); } +fn catch_lsp_panic(f: impl FnOnce() -> T) -> Result { + let hook = take_hook(); + set_hook(Box::new(|_| {})); + let result = catch_unwind(AssertUnwindSafe(f)).map_err(panic_message); + set_hook(hook); + result +} + +fn panic_message(payload: Box) -> String { + if let Some(message) = payload.downcast_ref::<&str>() { + (*message).to_string() + } else if let Some(message) = payload.downcast_ref::() { + message.clone() + } else { + "unknown panic payload".to_string() + } +} + fn cast_request( req: Request, ) -> Result<(RequestId, R::Params), lsp_server::ExtractError> From 3dcbc9920acca706337d5ecf9251dc0380a57083 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Mon, 22 Jun 2026 20:56:52 +0900 Subject: [PATCH 03/20] WIP completion support --- README.md | 7 + editors/vscode-rustylr/README.md | 21 +- editors/vscode-rustylr/extension.js | 36 +- editors/vscode-rustylr/package.json | 27 +- rusty_lr_lsp/README.md | 63 +++ rusty_lr_lsp/src/completion.rs | 584 ++++++++++++++++++++++++++++ rusty_lr_lsp/src/main.rs | 42 +- 7 files changed, 744 insertions(+), 36 deletions(-) create mode 100644 rusty_lr_lsp/README.md create mode 100644 rusty_lr_lsp/src/completion.rs diff --git a/README.md b/README.md index 0347d119..31df3746 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,13 @@ println!("{}", context); // Formats the state tree (requires 'tree' feature) --- +## Editor Support + +An experimental RustyLR language server is under development in [`rusty_lr_lsp`](rusty_lr_lsp), with a temporary VSCode client in [`editors/vscode-rustylr`](editors/vscode-rustylr). +It currently targets `*.rustylr` files and files named `rustylr.rs`. + +--- + ## Examples - [Calculator (enum tokens)](https://github.com/ehwan/RustyLR/blob/main/example/calculator/src/parser.rustylr): A numeric expression parser using custom token enums. diff --git a/editors/vscode-rustylr/README.md b/editors/vscode-rustylr/README.md index 78239826..467d86b0 100644 --- a/editors/vscode-rustylr/README.md +++ b/editors/vscode-rustylr/README.md @@ -4,10 +4,10 @@ Temporary VSCode extension client for the `rusty_lr_lsp` server in this reposito ## Run From This Repository -1. Build or check the language server once: +1. Build the language server once: ```bash - cargo check -p rusty_lr_lsp + cargo build -p rusty_lr_lsp ``` 2. Install the extension client dependencies: @@ -25,7 +25,7 @@ Temporary VSCode extension client for the `rusty_lr_lsp` server in this reposito 4. Press `F5` and choose `VS Code Extension Development` if prompted. -5. In the Extension Development Host window, open the RustyLR repository folder and then open a grammar file such as `example/calculator/src/parser.rs`, or `src/grammar.rs` in a downstream project. +5. In the Extension Development Host window, open the RustyLR repository folder and then open a grammar file such as `example/calculator/src/parser.rustylr`, or `src/rustylr.rs` in a downstream project. The extension starts the already-built server binary when it exists: @@ -49,20 +49,13 @@ The extension searches upward for the RustyLR repository root and uses that as t The extension contributes a `rustylr` language mode for: -- `grammar.rs` -- `src/parser.rs` -- `*.rustylr.rs` - `*.rustylr` -- `*.lr` +- `rustylr.rs` It also sends those file patterns to the LSP server even when the VSCode language mode is not manually changed. -For a differently named grammar file, add it to: +## Features -```json -{ - "rustylr.server.documentPatterns": ["**/grammar.rs", "**/src/parser.rs", "**/my_parser_input.rs"] -} -``` +The extension is intentionally thin: VSCode starts `rusty_lr_lsp` over stdio and the server provides the language features. -Then run `RustyLR: Restart Language Server` from the command palette. +See [`rusty_lr_lsp/README.md`](../../rusty_lr_lsp/README.md) for the current diagnostics, go-to-definition, and completion feature details. diff --git a/editors/vscode-rustylr/extension.js b/editors/vscode-rustylr/extension.js index 3f17e367..de14b63f 100644 --- a/editors/vscode-rustylr/extension.js +++ b/editors/vscode-rustylr/extension.js @@ -5,6 +5,7 @@ const { LanguageClient, TransportKind } = require("vscode-languageclient/node"); let client; let outputChannel; +let startingClient; async function activate(context) { outputChannel = vscode.window.createOutputChannel("RustyLR LSP"); @@ -22,11 +23,7 @@ async function activate(context) { }) ); - try { - await startClient(context); - } catch (error) { - reportStartError(error); - } + startClient(context).catch(reportStartError); } async function deactivate() { @@ -34,6 +31,22 @@ async function deactivate() { } async function startClient(context) { + if (startingClient) { + return startingClient; + } + if (client) { + return; + } + + startingClient = doStartClient(context); + try { + await startingClient; + } finally { + startingClient = undefined; + } +} + +async function doStartClient(context) { const config = vscode.workspace.getConfiguration("rustylr.server"); const workspaceFolder = vscode.workspace.workspaceFolders && vscode.workspace.workspaceFolders.length > 0 @@ -56,11 +69,8 @@ async function startClient(context) { }); const patterns = config.get("documentPatterns", [ - "**/grammar.rs", - "**/src/parser.rs", - "**/*.rustylr.rs", "**/*.rustylr", - "**/*.lr", + "**/rustylr.rs", ]); const documentSelector = [ @@ -93,6 +103,14 @@ async function startClient(context) { } async function stopClient() { + if (startingClient) { + try { + await startingClient; + } catch (_error) { + // The start failure will already be reported by the original caller. + } + } + if (!client) { return; } diff --git a/editors/vscode-rustylr/package.json b/editors/vscode-rustylr/package.json index bed6909d..b4ef54df 100644 --- a/editors/vscode-rustylr/package.json +++ b/editors/vscode-rustylr/package.json @@ -14,8 +14,8 @@ "main": "./extension.js", "activationEvents": [ "onLanguage:rustylr", - "workspaceContains:**/grammar.rs", - "workspaceContains:**/src/parser.rs", + "workspaceContains:**/*.rustylr", + "workspaceContains:**/rustylr.rs", "onCommand:rustylr.restartServer" ], "contributes": { @@ -52,11 +52,8 @@ "type": "string" }, "default": [ - "**/grammar.rs", - "**/src/parser.rs", - "**/*.rustylr.rs", "**/*.rustylr", - "**/*.lr" + "**/rustylr.rs" ], "description": "Additional file globs handled by the RustyLR LSP server." } @@ -70,12 +67,10 @@ "rustylr" ], "extensions": [ - ".rustylr", - ".lr" + ".rustylr" ], "filenamePatterns": [ - "grammar.rs", - "*.rustylr.rs" + "rustylr.rs" ], "configuration": "./language-configuration.json" } @@ -86,7 +81,17 @@ "scopeName": "source.rustylr", "path": "./syntaxes/rustylr.tmLanguage.json" } - ] + ], + "configurationDefaults": { + "[rustylr]": { + "editor.quickSuggestions": { + "other": true, + "comments": false, + "strings": false + }, + "editor.suggestOnTriggerCharacters": true + } + } }, "dependencies": { "vscode-languageclient": "^9.0.1" diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md new file mode 100644 index 00000000..7730ef33 --- /dev/null +++ b/rusty_lr_lsp/README.md @@ -0,0 +1,63 @@ +# RustyLR LSP + +`rusty_lr_lsp` is an experimental language server for RustyLR grammar files. It communicates over stdio and is intended to be used by editor clients such as the temporary VSCode extension in `editors/vscode-rustylr`. + +## Supported Files + +The current VSCode client targets: + +- `*.rustylr` +- `rustylr.rs` + +Other Rust files are intentionally not matched by default. + +## Features + +- **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. +- **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. +- **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. +- **Completion for directives and keywords:** Suggests directives such as `%token`, `%start`, `%tokentype`, `%left`, `%right`, `%precedence`, `%prec`, `%dprec`, `%glr`, `%lalr`, `%nooptim`, `%allow`, and common identifiers such as `error`, `$sep`, `data`, `lookahead`, and `shift`. +- **Completion for `$...` variables:** Suggests built-in substitutions (`$tokentype`, `$location`, `$userdata`, `$error`, `$errortype`), terminal and non-terminal substitutions (`$terminal_name`, `$NonTerminalName`), current reduce-action bindings (`$left`, `$value`, etc.), and positional semantic variables (`$1`, `$2`, ...). +- **Completion for locations:** Suggests `@$`, `@0`, positional locations (`@1`, `@2`, ...), and named binding locations (`@left`, `@value`, etc.). +- **Completion for `%allow`:** Suggests valid diagnostic names such as `nonterm_unreachable`, `unused_terminals`, and conflict-resolution diagnostic identifiers. + +## Running the Server + +Build the server from the workspace root: + +```bash +cargo build -p rusty_lr_lsp +``` + +The debug binary is then available at: + +```bash +target/debug/rusty_lr_lsp +``` + +The server expects to be launched by an LSP client over stdio. For quick VSCode testing, use the extension client in `editors/vscode-rustylr`. + +## VSCode Test Client + +From the repository root: + +```bash +cargo build -p rusty_lr_lsp +cd editors/vscode-rustylr +npm install +code . +``` + +Press `F5` in VSCode to open an Extension Development Host, then open the RustyLR repository or another workspace containing `*.rustylr` or `rustylr.rs` grammar files. + +The extension auto-detects `target/debug/rusty_lr_lsp` when it exists. You can override the server command with VSCode settings: + +```json +{ + "rustylr.server.command": "/home/ehwan/workspace/RustyLR/target/debug/rusty_lr_lsp", + "rustylr.server.args": [], + "rustylr.server.cwd": "/home/ehwan/workspace/RustyLR" +} +``` + +Use `RustyLR: Restart Language Server` from the command palette after changing server settings. diff --git a/rusty_lr_lsp/src/completion.rs b/rusty_lr_lsp/src/completion.rs new file mode 100644 index 00000000..b0fc3226 --- /dev/null +++ b/rusty_lr_lsp/src/completion.rs @@ -0,0 +1,584 @@ +use lsp_types::{ + CompletionItem, CompletionItemKind, CompletionResponse, CompletionTextEdit, Position, Range, + TextEdit, +}; +use proc_macro2::{TokenStream, TokenTree}; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{GrammarArgs, PatternArgs}; +use std::collections::BTreeSet; +use std::str::FromStr; + +use crate::diagnostics::split_stream; +use crate::position::{offset_to_position, position_to_offset}; + +const DIRECTIVES: &[&str] = &[ + "%token", + "%start", + "%tokentype", + "%userdata", + "%error", + "%errortype", + "%location", + "%left", + "%right", + "%precedence", + "%prec", + "%dprec", + "%glr", + "%lalr", + "%nooptim", + "%allow", + "%moduleprefix", +]; + +const SUBSTITUTION_VARIABLES: &[&str] = &[ + "$tokentype", + "$location", + "$userdata", + "$error", + "$errortype", +]; + +const ALLOW_DIAGNOSTICS: &[&str] = &[ + "nonterm_unreachable", + "nonterm_unproductive", + "unused_nonterm_data", + "unused_terminals", + "terminals_merged", + "redundant_rule_removed", + "unit_production_eliminated", + "reduce_reduce_conflict_resolved", + "shift_reduce_conflict_resolved", + "shift_reduce_conflict_glr", + "reduce_reduce_conflict_glr", +]; + +const KEYWORDS: &[&str] = &[ + "error", + "auto", + "dense", + "sparse", + "$sep", + "data", + "lookahead", + "shift", +]; + +#[derive(Clone, Copy, PartialEq, Eq)] +enum CompletionMode { + Directive, + Dollar, + Location, + AllowDiagnostic, + Symbol, +} + +pub fn completions(content: &str, position: Position) -> CompletionResponse { + let offset = position_to_offset(content, position); + let mode = completion_mode(content, offset); + let replace_range = replacement_range(content, offset, mode); + + let parsed = parse_args(content).ok(); + let names = parsed + .as_ref() + .map(CompletionNames::from_args) + .unwrap_or_else(|| CompletionNames::from_text(content)); + let line_variables = parsed + .as_ref() + .map(|args| variables_for_offset(args, content, offset)) + .unwrap_or_default(); + + let mut builder = CompletionBuilder::new(replace_range); + + match mode { + CompletionMode::Directive => { + for directive in DIRECTIVES { + builder.keyword(directive, "RustyLR directive"); + } + } + CompletionMode::Dollar => { + for variable in SUBSTITUTION_VARIABLES { + builder.variable(variable, "built-in RustCode substitution"); + } + for name in &names.nonterminals { + builder.variable(&format!("${name}"), "non-terminal production type"); + } + for name in &names.terminals { + builder.variable(&format!("${name}"), "terminal definition substitution"); + } + for variable in &line_variables.value_names { + builder.variable(&format!("${variable}"), "current production binding"); + } + for index in 1..=line_variables.value_count { + builder.variable(&format!("${index}"), "positional semantic value"); + } + } + CompletionMode::Location => { + builder.variable("@$", "current production location"); + builder.variable("@0", "current production location"); + for variable in &line_variables.value_names { + builder.variable( + &format!("@{variable}"), + "current production binding location", + ); + } + for index in 1..=line_variables.value_count { + builder.variable(&format!("@{index}"), "positional location"); + } + } + CompletionMode::AllowDiagnostic => { + for diagnostic in ALLOW_DIAGNOSTICS { + builder.keyword(diagnostic, "diagnostic suppression name"); + } + add_symbol_items(&mut builder, &names); + } + CompletionMode::Symbol => { + add_symbol_items(&mut builder, &names); + for keyword in KEYWORDS { + builder.keyword(keyword, "RustyLR keyword"); + } + for directive in DIRECTIVES { + builder.keyword(directive, "RustyLR directive"); + } + } + } + + CompletionResponse::Array(builder.finish()) +} + +fn add_symbol_items(builder: &mut CompletionBuilder, names: &CompletionNames) { + for name in &names.nonterminals { + builder.nonterminal(name); + } + for name in &names.terminals { + builder.terminal(name); + } +} + +fn parse_args(content: &str) -> Result { + let token_stream = TokenStream::from_str(content).map_err(|_| ())?; + let (_, macro_stream) = split_stream(token_stream).map_err(|_| ())?; + Grammar::parse_args(macro_stream).map_err(|_| ()) +} + +fn completion_mode(content: &str, offset: usize) -> CompletionMode { + let prefix_start = current_prefix_start(content, offset, true); + if prefix_start < offset { + match content.as_bytes()[prefix_start] { + b'%' => return CompletionMode::Directive, + b'$' => return CompletionMode::Dollar, + b'@' => return CompletionMode::Location, + _ => {} + } + } + + let line_prefix = line_prefix(content, offset); + let trimmed = line_prefix.trim_start(); + if trimmed.starts_with("%allow") { + return CompletionMode::AllowDiagnostic; + } + if trimmed.ends_with('%') { + return CompletionMode::Directive; + } + if trimmed.ends_with('$') { + return CompletionMode::Dollar; + } + if trimmed.ends_with('@') { + return CompletionMode::Location; + } + + CompletionMode::Symbol +} + +fn replacement_range(content: &str, offset: usize, mode: CompletionMode) -> Range { + let include_sigils = matches!( + mode, + CompletionMode::Directive | CompletionMode::Dollar | CompletionMode::Location + ); + let start = current_prefix_start(content, offset, include_sigils); + Range::new( + offset_to_position(content, start), + offset_to_position(content, offset), + ) +} + +fn current_prefix_start(content: &str, offset: usize, include_sigils: bool) -> usize { + let mut start = offset.min(content.len()); + while start > 0 { + let Some(ch) = content[..start].chars().next_back() else { + break; + }; + if is_ident_continue(ch) + || (include_sigils && matches!(ch, '$' | '@' | '%')) + || (include_sigils && ch.is_ascii_digit()) + { + start -= ch.len_utf8(); + } else { + break; + } + } + start +} + +fn line_prefix(content: &str, offset: usize) -> &str { + let offset = offset.min(content.len()); + let line_start = content[..offset].rfind('\n').map_or(0, |idx| idx + 1); + &content[line_start..offset] +} + +fn is_ident_continue(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() +} + +#[derive(Default)] +struct CompletionNames { + terminals: BTreeSet, + nonterminals: BTreeSet, +} + +impl CompletionNames { + fn from_args(args: &GrammarArgs) -> Self { + let mut names = CompletionNames::default(); + for (terminal, _) in &args.terminals { + names.terminals.insert(terminal.value().clone()); + } + for rule in &args.rules { + names.nonterminals.insert(rule.name.value().clone()); + } + names + } + + fn from_text(content: &str) -> Self { + let mut names = CompletionNames::default(); + let grammar = content + .split_once("%%") + .map_or(content, |(_, grammar)| grammar); + + for raw_line in grammar.lines() { + let line = raw_line.trim_start(); + if let Some(rest) = line.strip_prefix("%token") { + if let Some(name) = first_ident(rest) { + names.terminals.insert(name.to_string()); + } + continue; + } + + if line.starts_with('%') { + continue; + } + + if let Some(colon_idx) = line.find(':') { + let head = &line[..colon_idx]; + if let Some(name) = first_ident(head) { + names.nonterminals.insert(name.to_string()); + } + } + } + + names + } +} + +fn first_ident(text: &str) -> Option<&str> { + let start = text.find(|ch: char| ch == '_' || ch.is_ascii_alphabetic())?; + let rest = &text[start..]; + let end = rest + .find(|ch: char| !(ch == '_' || ch.is_ascii_alphanumeric())) + .unwrap_or(rest.len()); + Some(&rest[..end]) +} + +#[derive(Default)] +struct LineVariables { + value_names: BTreeSet, + value_count: usize, +} + +fn variables_for_offset(args: &GrammarArgs, content: &str, offset: usize) -> LineVariables { + for rule in &args.rules { + for (line_idx, line) in rule.rule_lines.iter().enumerate() { + let start = args + .span_manager + .get_byterange(&line.separator_location) + .map_or(0, |range| range.start); + let end = rule_line_end(args, content, rule, line_idx); + if start <= offset && offset <= end { + let mut variables = LineVariables::default(); + for (mapped_name, pattern) in &line.tokens { + variables.value_count += 1; + if let Some(name) = mapped_name { + variables.value_names.insert(name.value().clone()); + } else { + collect_default_bindings(pattern, &mut variables.value_names); + } + } + return variables; + } + } + } + + LineVariables::default() +} + +fn rule_line_end( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, + line_idx: usize, +) -> usize { + if let Some(next_line) = rule.rule_lines.get(line_idx + 1) { + return args + .span_manager + .get_byterange(&next_line.separator_location) + .map_or(content.len(), |range| range.start); + } + + let mut end = args + .span_manager + .get_byterange(&rule.name.location()) + .map_or(0, |range| range.end); + for (_, pattern) in &rule.rule_lines[line_idx].tokens { + end = end.max(pattern_end(args, pattern)); + } + if let Some(action) = &rule.rule_lines[line_idx].reduce_action { + end = end.max(token_stream_end(action)); + } + + content[end.min(content.len())..] + .find(';') + .map_or(content.len(), |semi| end + semi) +} + +fn pattern_end(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.end), + PatternArgs::Plus { base, op_location } + | PatternArgs::Star { base, op_location } + | PatternArgs::Question { base, op_location } + | PatternArgs::Exclamation { base, op_location } => pattern_end(args, base).max( + args.span_manager + .get_byterange(op_location) + .map_or(0, |range| range.end), + ), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.end), + PatternArgs::Group { + alternatives, + close_location, + .. + } => alternatives + .iter() + .flatten() + .map(|pattern| pattern_end(args, pattern)) + .max() + .unwrap_or(0) + .max( + args.span_manager + .get_byterange(close_location) + .map_or(0, |range| range.end), + ), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Minus { base, exclude } => { + pattern_end(args, base).max(pattern_end(args, exclude)) + } + PatternArgs::Sep { + base, + delimiter, + location, + .. + } => pattern_end(args, base) + .max(pattern_end(args, delimiter)) + .max( + args.span_manager + .get_byterange(location) + .map_or(0, |range| range.end), + ), + } +} + +fn token_stream_end(stream: &TokenStream) -> usize { + stream + .clone() + .into_iter() + .map(token_tree_end) + .max() + .unwrap_or(0) +} + +fn token_tree_end(token: TokenTree) -> usize { + match token { + TokenTree::Group(group) => token_stream_end(&group.stream()) + .max(group.span_close().byte_range().end) + .max(group.span_open().byte_range().end), + TokenTree::Ident(ident) => ident.span().byte_range().end, + TokenTree::Punct(punct) => punct.span().byte_range().end, + TokenTree::Literal(lit) => lit.span().byte_range().end, + } +} + +fn collect_default_bindings(pattern: &PatternArgs, names: &mut BTreeSet) { + match pattern { + PatternArgs::Ident(ident) => { + names.insert(ident.value().clone()); + } + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => { + collect_default_bindings(base, names); + } + PatternArgs::Minus { base, exclude } => { + collect_default_bindings(base, names); + collect_default_bindings(exclude, names); + } + PatternArgs::Sep { base, .. } => { + collect_default_bindings(base, names); + } + PatternArgs::Group { .. } + | PatternArgs::TerminalSet(_) + | PatternArgs::Byte(_) + | PatternArgs::ByteString(_) + | PatternArgs::Char(_) + | PatternArgs::String(_) => {} + } +} + +struct CompletionBuilder { + range: Range, + seen: BTreeSet, + items: Vec, +} + +impl CompletionBuilder { + fn new(range: Range) -> Self { + CompletionBuilder { + range, + seen: BTreeSet::new(), + items: Vec::new(), + } + } + + fn terminal(&mut self, label: &str) { + self.push(label, CompletionItemKind::ENUM_MEMBER, "terminal symbol"); + } + + fn nonterminal(&mut self, label: &str) { + self.push(label, CompletionItemKind::CLASS, "non-terminal symbol"); + } + + fn keyword(&mut self, label: &str, detail: &str) { + self.push(label, CompletionItemKind::KEYWORD, detail); + } + + fn variable(&mut self, label: &str, detail: &str) { + self.push(label, CompletionItemKind::VARIABLE, detail); + } + + fn push(&mut self, label: &str, kind: CompletionItemKind, detail: &str) { + if !self.seen.insert(label.to_string()) { + return; + } + + self.items.push(CompletionItem { + label: label.to_string(), + kind: Some(kind), + detail: Some(detail.to_string()), + text_edit: Some(CompletionTextEdit::Edit(TextEdit { + range: self.range, + new_text: label.to_string(), + })), + ..Default::default() + }); + } + + fn finish(self) -> Vec { + self.items + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype Token; +%start E; + +%token num Token::Num(_); +%token plus Token::Plus; + +E(i32) : left=E plus num { $ } + | num { num } + ; +"#; + + fn labels(response: CompletionResponse) -> BTreeSet { + match response { + CompletionResponse::Array(items) => items + .into_iter() + .map(|item| item.label) + .collect::>(), + _ => BTreeSet::new(), + } + } + + #[test] + fn completes_symbols() { + let pos = offset_to_position(MOCK_GRAMMAR, MOCK_GRAMMAR.find("plus num").unwrap()); + let labels = labels(completions(MOCK_GRAMMAR, pos)); + assert!(labels.contains("E")); + assert!(labels.contains("num")); + assert!(labels.contains("plus")); + assert!(labels.contains("error")); + } + + #[test] + fn completes_dollar_variables() { + let offset = MOCK_GRAMMAR.find("$ }").unwrap() + 1; + let labels = labels(completions( + MOCK_GRAMMAR, + offset_to_position(MOCK_GRAMMAR, offset), + )); + assert!(labels.contains("$tokentype")); + assert!(labels.contains("$E")); + assert!(labels.contains("$num")); + assert!(labels.contains("$left")); + assert!(labels.contains("$1")); + } + + #[test] + fn completes_directives() { + let content = "%%\n%"; + let labels = labels(completions(content, Position::new(1, 1))); + assert!(labels.contains("%token")); + assert!(labels.contains("%start")); + } +} diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index 0492461f..407cdc07 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -3,8 +3,8 @@ use lsp_types::{ notification::{ DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, }, - request::GotoDefinition, - Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Location, OneOf, + request::{Completion, GotoDefinition}, + CompletionOptions, Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Location, OneOf, PublishDiagnosticsParams, Range, ServerCapabilities, TextDocumentSyncCapability, TextDocumentSyncKind, Url, }; @@ -16,6 +16,7 @@ use std::panic::{catch_unwind, set_hook, take_hook, AssertUnwindSafe}; use lsp_types::notification::Notification as LspNotification; use lsp_types::request::Request as LspRequest; +mod completion; mod diagnostics; mod goto_definition; mod position; @@ -30,6 +31,10 @@ fn main() -> Result<(), Box> { let server_capabilities = serde_json::to_value(&ServerCapabilities { text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), definition_provider: Some(OneOf::Left(true)), + completion_provider: Some(CompletionOptions { + trigger_characters: Some(completion_trigger_characters()), + ..Default::default() + }), ..Default::default() })?; @@ -77,6 +82,32 @@ fn main() -> Result<(), Box> { } } connection.sender.send(Message::Response(response))?; + } else if req.method == Completion::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting completion request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position.text_document.uri; + let position = params.text_document_position.position; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| completion::completions(content, position)) { + Ok(completions) => Response::new_ok(id, completions), + Err(message) => { + eprintln!("RustyLR completion panicked: {message}"); + Response::new_ok( + id, + lsp_types::CompletionResponse::Array(Vec::new()), + ) + } + } + } else { + Response::new_ok(id, lsp_types::CompletionResponse::Array(Vec::new())) + }; + connection.sender.send(Message::Response(response))?; } } Message::Response(_resp) => {} @@ -130,6 +161,13 @@ fn main() -> Result<(), Box> { Ok(()) } +fn completion_trigger_characters() -> Vec { + "%@$_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + .chars() + .map(|ch| ch.to_string()) + .collect() +} + fn publish_diagnostics(connection: &Connection, uri: Url, content: &str) { let diags = match catch_lsp_panic(|| diagnostics::compile_and_get_diagnostics(content)) { Ok(diags) => diags, From 4aacd2b8ca5ec3ca7060a97c32309d893bc10424 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Mon, 22 Jun 2026 21:29:12 +0900 Subject: [PATCH 04/20] suggestion details --- rusty_lr_lsp/README.md | 2 +- rusty_lr_lsp/src/completion.rs | 514 ++++++++++++++++++++++++++++++--- rusty_lr_parser/src/grammar.rs | 20 ++ 3 files changed, 496 insertions(+), 40 deletions(-) diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md index 7730ef33..0f93274c 100644 --- a/rusty_lr_lsp/README.md +++ b/rusty_lr_lsp/README.md @@ -15,7 +15,7 @@ Other Rust files are intentionally not matched by default. - **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. - **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. -- **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. +- **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. Completion details include the resolved Rust type for terminals and non-terminals, including inferred placeholders and a note when the value is boxed for parser storage. - **Completion for directives and keywords:** Suggests directives such as `%token`, `%start`, `%tokentype`, `%left`, `%right`, `%precedence`, `%prec`, `%dprec`, `%glr`, `%lalr`, `%nooptim`, `%allow`, and common identifiers such as `error`, `$sep`, `data`, `lookahead`, and `shift`. - **Completion for `$...` variables:** Suggests built-in substitutions (`$tokentype`, `$location`, `$userdata`, `$error`, `$errortype`), terminal and non-terminal substitutions (`$terminal_name`, `$NonTerminalName`), current reduce-action bindings (`$left`, `$value`, etc.), and positional semantic variables (`$1`, `$2`, ...). - **Completion for locations:** Suggests `@$`, `@0`, positional locations (`@1`, `@2`, ...), and named binding locations (`@left`, `@value`, etc.). diff --git a/rusty_lr_lsp/src/completion.rs b/rusty_lr_lsp/src/completion.rs index b0fc3226..b4f5f83d 100644 --- a/rusty_lr_lsp/src/completion.rs +++ b/rusty_lr_lsp/src/completion.rs @@ -1,11 +1,11 @@ use lsp_types::{ - CompletionItem, CompletionItemKind, CompletionResponse, CompletionTextEdit, Position, Range, - TextEdit, + CompletionItem, CompletionItemKind, CompletionResponse, CompletionTextEdit, Documentation, + MarkupContent, MarkupKind, Position, Range, TextEdit, }; use proc_macro2::{TokenStream, TokenTree}; use rusty_lr_parser::grammar::Grammar; -use rusty_lr_parser::{GrammarArgs, PatternArgs}; -use std::collections::BTreeSet; +use rusty_lr_parser::{GrammarArgs, Location, PatternArgs}; +use std::collections::{BTreeMap, BTreeSet}; use std::str::FromStr; use crate::diagnostics::split_stream; @@ -64,6 +64,8 @@ const KEYWORDS: &[&str] = &[ "shift", ]; +const SYNTAX_URL: &str = "https://github.com/ehwan/RustyLR/blob/main/SYNTAX.md"; + #[derive(Clone, Copy, PartialEq, Eq)] enum CompletionMode { Directive, @@ -81,7 +83,7 @@ pub fn completions(content: &str, position: Position) -> CompletionResponse { let parsed = parse_args(content).ok(); let names = parsed .as_ref() - .map(CompletionNames::from_args) + .map(|args| CompletionNames::from_args(args, content)) .unwrap_or_else(|| CompletionNames::from_text(content)); let line_variables = parsed .as_ref() @@ -93,52 +95,105 @@ pub fn completions(content: &str, position: Position) -> CompletionResponse { match mode { CompletionMode::Directive => { for directive in DIRECTIVES { - builder.keyword(directive, "RustyLR directive"); + builder.keyword( + directive, + "RustyLR directive", + keyword_documentation(directive), + ); } } CompletionMode::Dollar => { for variable in SUBSTITUTION_VARIABLES { - builder.variable(variable, "built-in RustCode substitution"); + builder.variable( + variable, + "built-in RustCode substitution", + substitution_documentation(variable), + ); } - for name in &names.nonterminals { - builder.variable(&format!("${name}"), "non-terminal production type"); + for (name, documentation) in &names.nonterminals { + builder.variable( + &format!("${name}"), + "non-terminal production type", + Some(format!( + "Substitutes to the production type of non-terminal `{name}`.\n\n{documentation}\n\n[Variable substitution]({SYNTAX_URL}#variable-substitution)" + )), + ); } - for name in &names.terminals { - builder.variable(&format!("${name}"), "terminal definition substitution"); + for (name, documentation) in &names.terminals { + builder.variable( + &format!("${name}"), + "terminal definition substitution", + Some(format!( + "Substitutes to the `%token` definition for terminal `{name}`.\n\n{documentation}\n\n[Variable substitution]({SYNTAX_URL}#variable-substitution)" + )), + ); } for variable in &line_variables.value_names { - builder.variable(&format!("${variable}"), "current production binding"); + builder.variable( + &format!("${variable}"), + "current production binding", + Some(format!( + "Semantic value bound by the current production line.\n\nExample:\n\n```rustylr\nExpr : left=Expr plus right=Term {{ left + right }};\n```\n\nHere `$left` and `$right` can be used in RustCode substitution contexts.\n\n[Named variables]({SYNTAX_URL}#named-variables)" + )), + ); } for index in 1..=line_variables.value_count { - builder.variable(&format!("${index}"), "positional semantic value"); + builder.variable( + &format!("${index}"), + "positional semantic value", + Some(format!( + "Semantic value of RHS symbol #{index} in the current production line.\n\nExample:\n\n```rustylr\nExpr : Expr plus Term {{ $1 }};\n```\n\n[Bison-style positional variables]({SYNTAX_URL}#3-bison-style-positional-variables)" + )), + ); } } CompletionMode::Location => { - builder.variable("@$", "current production location"); - builder.variable("@0", "current production location"); + builder.variable( + "@$", + "current production location", + location_documentation("@$"), + ); + builder.variable( + "@0", + "current production location", + location_documentation("@0"), + ); for variable in &line_variables.value_names { builder.variable( &format!("@{variable}"), "current production binding location", + location_documentation(&format!("@{variable}")), ); } for index in 1..=line_variables.value_count { - builder.variable(&format!("@{index}"), "positional location"); + builder.variable( + &format!("@{index}"), + "positional location", + location_documentation(&format!("@{index}")), + ); } } CompletionMode::AllowDiagnostic => { for diagnostic in ALLOW_DIAGNOSTICS { - builder.keyword(diagnostic, "diagnostic suppression name"); + builder.keyword( + diagnostic, + "diagnostic suppression name", + allow_diagnostic_documentation(diagnostic), + ); } add_symbol_items(&mut builder, &names); } CompletionMode::Symbol => { add_symbol_items(&mut builder, &names); for keyword in KEYWORDS { - builder.keyword(keyword, "RustyLR keyword"); + builder.keyword(keyword, "RustyLR keyword", keyword_documentation(keyword)); } for directive in DIRECTIVES { - builder.keyword(directive, "RustyLR directive"); + builder.keyword( + directive, + "RustyLR directive", + keyword_documentation(directive), + ); } } } @@ -147,11 +202,11 @@ pub fn completions(content: &str, position: Position) -> CompletionResponse { } fn add_symbol_items(builder: &mut CompletionBuilder, names: &CompletionNames) { - for name in &names.nonterminals { - builder.nonterminal(name); + for (name, documentation) in &names.nonterminals { + builder.nonterminal(name, documentation.clone()); } - for name in &names.terminals { - builder.terminal(name); + for (name, documentation) in &names.terminals { + builder.terminal(name, documentation.clone()); } } @@ -232,18 +287,36 @@ fn is_ident_continue(ch: char) -> bool { #[derive(Default)] struct CompletionNames { - terminals: BTreeSet, - nonterminals: BTreeSet, + terminals: BTreeMap, + nonterminals: BTreeMap, } impl CompletionNames { - fn from_args(args: &GrammarArgs) -> Self { + fn from_args(args: &GrammarArgs, content: &str) -> Self { let mut names = CompletionNames::default(); + let types = ResolvedTypes::from_args(args); for (terminal, _) in &args.terminals { - names.terminals.insert(terminal.value().clone()); + let line = line_text_for_location(args, content, &terminal.location()); + names.terminals.insert( + terminal.value().clone(), + terminal_documentation(terminal.value(), &line, types.token_type.as_ref()), + ); } for rule in &args.rules { - names.nonterminals.insert(rule.name.value().clone()); + let snippet = rule_definition_text(args, content, rule); + let documentation = nonterminal_documentation( + rule.name.value(), + &snippet, + types.nonterminals.get(rule.name.value()), + ); + names + .nonterminals + .entry(rule.name.value().clone()) + .and_modify(|existing| { + existing.push_str("\n\n---\n\n"); + existing.push_str(&documentation); + }) + .or_insert(documentation); } names } @@ -258,7 +331,9 @@ impl CompletionNames { let line = raw_line.trim_start(); if let Some(rest) = line.strip_prefix("%token") { if let Some(name) = first_ident(rest) { - names.terminals.insert(name.to_string()); + names + .terminals + .insert(name.to_string(), terminal_documentation(name, line, None)); } continue; } @@ -270,7 +345,10 @@ impl CompletionNames { if let Some(colon_idx) = line.find(':') { let head = &line[..colon_idx]; if let Some(name) = first_ident(head) { - names.nonterminals.insert(name.to_string()); + names.nonterminals.insert( + name.to_string(), + nonterminal_documentation(name, line.trim(), None), + ); } } } @@ -279,6 +357,100 @@ impl CompletionNames { } } +#[derive(Default)] +struct ResolvedTypes { + token_type: Option, + nonterminals: BTreeMap, +} + +struct ResolvedRustType { + name: String, + boxed: bool, +} + +impl ResolvedTypes { + fn from_args(args: &GrammarArgs) -> Self { + let Ok(grammar) = Grammar::from_grammar_args(args.clone()) else { + return ResolvedTypes::default(); + }; + + let mut types = ResolvedTypes { + token_type: Some(resolved_rust_type( + Some(grammar.token_type()), + grammar.token_type_boxed(), + )), + nonterminals: BTreeMap::new(), + }; + for rule in &args.rules { + if let Some((rule_type, boxed)) = grammar.nonterminal_type(rule.name.value()) { + types.nonterminals.insert( + rule.name.value().clone(), + resolved_rust_type(rule_type, boxed), + ); + } + } + + types + } +} + +fn resolved_rust_type(ty: Option<&TokenStream>, boxed: bool) -> ResolvedRustType { + let name = ty + .map(TokenStream::to_string) + .filter(|ty| !ty.is_empty()) + .unwrap_or_else(|| "()".to_string()); + ResolvedRustType { name, boxed } +} + +fn line_text_for_location(args: &GrammarArgs, content: &str, location: &Location) -> String { + let offset = args + .span_manager + .get_byterange(location) + .map_or(0, |range| range.start); + let start = content[..offset.min(content.len())] + .rfind('\n') + .map_or(0, |idx| idx + 1); + let end = content[offset.min(content.len())..] + .find('\n') + .map_or(content.len(), |idx| offset + idx); + content[start..end].trim().to_string() +} + +fn rule_definition_text( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, +) -> String { + let rule_start = args + .span_manager + .get_byterange(&rule.name.location()) + .map_or(0, |range| range.start); + let start = content[..rule_start.min(content.len())] + .rfind('\n') + .map_or(0, |idx| idx + 1); + let first_separator = rule.rule_lines.first().and_then(|line| { + args.span_manager + .get_byterange(&line.separator_location) + .map(|range| range.start) + }); + let header_end = first_separator.unwrap_or(rule_start).min(content.len()); + let header = content[start..header_end].trim(); + let mut definition = String::new(); + definition.push_str(header); + for (line_idx, line) in rule.rule_lines.iter().enumerate() { + let tokens = rule_line_tokens_text(args, content, line); + definition.push('\n'); + definition.push(' '); + definition.push(if line_idx == 0 { ':' } else { '|' }); + if !tokens.is_empty() { + definition.push(' '); + definition.push_str(&tokens); + } + } + definition.push_str("\n ;"); + definition +} + fn first_ident(text: &str) -> Option<&str> { let start = text.find(|ch: char| ch == '_' || ch.is_ascii_alphabetic())?; let rest = &text[start..]; @@ -288,6 +460,31 @@ fn first_ident(text: &str) -> Option<&str> { Some(&rest[..end]) } +fn rule_line_tokens_text( + args: &GrammarArgs, + content: &str, + line: &rusty_lr_parser::RuleLineArgs, +) -> String { + line.tokens + .iter() + .map(|(mapped_name, pattern)| { + let start = mapped_name + .as_ref() + .and_then(|name| { + args.span_manager + .get_byterange(&name.location()) + .map(|range| range.start) + }) + .unwrap_or_else(|| pattern_start(args, pattern)); + let end = pattern_end(args, pattern); + content[start.min(content.len())..end.min(content.len())] + .trim() + .to_string() + }) + .collect::>() + .join(" ") +} + #[derive(Default)] struct LineVariables { value_names: BTreeSet, @@ -416,6 +613,48 @@ fn pattern_end(args: &GrammarArgs, pattern: &PatternArgs) -> usize { } } +fn pattern_start(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.start), + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => pattern_start(args, base), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.start), + PatternArgs::Group { open_location, .. } => args + .span_manager + .get_byterange(open_location) + .map_or(0, |range| range.start), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Minus { base, .. } => pattern_start(args, base), + PatternArgs::Sep { location, .. } => args + .span_manager + .get_byterange(location) + .map_or(0, |range| range.start), + } +} + fn token_stream_end(stream: &TokenStream) -> usize { stream .clone() @@ -463,6 +702,142 @@ fn collect_default_bindings(pattern: &PatternArgs, names: &mut BTreeSet) } } +fn terminal_documentation( + name: &str, + definition: &str, + rust_type: Option<&ResolvedRustType>, +) -> String { + let type_line = type_line(rust_type); + format!( + "Terminal symbol `{name}`.\n\n{type_line}\n\nDefinition:\n\n```rustylr\n{definition}\n```\n\n[Token definition]({SYNTAX_URL}#token-definition-must-defined)" + ) +} + +fn nonterminal_documentation( + name: &str, + definition: &str, + rust_type: Option<&ResolvedRustType>, +) -> String { + let type_line = type_line(rust_type); + format!( + "Non-terminal symbol `{name}`.\n\n{type_line}\n\nDefinition:\n\n```rustylr\n{definition}\n```\n\n[Production rules]({SYNTAX_URL}#production-rules)" + ) +} + +fn type_line(rust_type: Option<&ResolvedRustType>) -> String { + match rust_type { + Some(rust_type) if rust_type.boxed => format!("Rust type: `{}` (boxed)", rust_type.name), + Some(rust_type) => format!("Rust type: `{}`", rust_type.name), + None => "Rust type: unavailable until the grammar parses successfully.".to_string(), + } +} + +fn keyword_documentation(label: &str) -> Option { + let documentation = match label { + "%token" => format!( + "Defines a terminal symbol and the Rust pattern that recognizes it.\n\nExample:\n\n```rustylr\n%token num Token::Num(_);\n```\n\n[Token definition]({SYNTAX_URL}#token-definition-must-defined)" + ), + "%start" => format!( + "Declares a start non-terminal for parser generation.\n\nExample:\n\n```rustylr\n%start Expr;\n```\n\n[Start symbol]({SYNTAX_URL}#start-symbol-must-defined)" + ), + "%tokentype" => format!( + "Sets the Rust type used as the parser's input terminal token type.\n\nExample:\n\n```rustylr\n%tokentype Token;\n```\n\n[Token type]({SYNTAX_URL}#token-type-must-defined)" + ), + "%userdata" => format!( + "Sets the mutable user-data type threaded through parser contexts and reduce actions.\n\nExample:\n\n```rustylr\n%userdata ParserState;\n```\n\n[Userdata type]({SYNTAX_URL}#userdata-type-optional)" + ), + "%error" | "%errortype" => format!( + "Sets the custom error type returned by reduce actions.\n\nExample:\n\n```rustylr\n%error String;\n```\n\n[Error type]({SYNTAX_URL}#error-type-optional)" + ), + "%location" => format!( + "Sets the source-location type used by `@...` location bindings.\n\nExample:\n\n```rustylr\n%location Span;\n```\n\n[Location tracking]({SYNTAX_URL}#location-tracking)" + ), + "%left" => format!( + "Declares left-associative operator precedence for one or more terminals.\n\nExample:\n\n```rustylr\n%left plus minus;\n```\n\n[Operator precedence]({SYNTAX_URL}#operator-precedence)" + ), + "%right" => format!( + "Declares right-associative operator precedence for one or more terminals.\n\nExample:\n\n```rustylr\n%right caret;\n```\n\n[Operator precedence]({SYNTAX_URL}#operator-precedence)" + ), + "%precedence" => format!( + "Declares precedence without associativity.\n\nExample:\n\n```rustylr\n%precedence unary_minus;\n```\n\n[Operator precedence]({SYNTAX_URL}#operator-precedence)" + ), + "%prec" => format!( + "Overrides the precedence of a specific production line.\n\nExample:\n\n```rustylr\nExpr : minus Expr %prec unary_minus {{ Expr }};\n```\n\n[Explicit precedence]({SYNTAX_URL}#explicit-precedence-prec)" + ), + "%dprec" => format!( + "Assigns a dynamic precedence priority to a production, mainly for GLR reduce/reduce control.\n\nExample:\n\n```rustylr\nExpr : Expr star Expr %dprec 2 {{ ... }};\n```\n\n[Rule priority]({SYNTAX_URL}#rule-priority)" + ), + "%glr" => format!( + "Enables Generalized LR parser generation for ambiguous grammars.\n\nExample:\n\n```rustylr\n%glr;\n```\n\n[GLR parser generation]({SYNTAX_URL}#glr-parser-generation)" + ), + "%lalr" => format!( + "Generates LALR(1) parsing tables instead of the default LR construction.\n\nExample:\n\n```rustylr\n%lalr;\n```\n\n[LALR parser generation]({SYNTAX_URL}#lalr-parser-generation)" + ), + "%nooptim" => format!( + "Disables parser table optimization.\n\nExample:\n\n```rustylr\n%nooptim;\n```\n\n[No optimization]({SYNTAX_URL}#no-optimization)" + ), + "%allow" => format!( + "Suppresses a RustyLR diagnostic globally or for a specific target.\n\nExample:\n\n```rustylr\n%allow unused_terminals(plus);\n```\n\n[Diagnostic suppression]({SYNTAX_URL}#diagnostic-suppression)" + ), + "%moduleprefix" => { + "Internal directive used by RustyLR's own generated parser code. Most grammars should not use this directly.".to_string() + } + "error" => format!( + "Reserved terminal used for panic-mode error recovery.\n\nExample:\n\n```rustylr\nBlock : lbrace error rbrace {{ recover() }};\n```\n\n[Panic-mode error recovery]({SYNTAX_URL}#panic-mode-error-recovery)" + ), + "$sep" => format!( + "Pattern helper for separated repetition.\n\nExample:\n\n```rustylr\nList : $sep(Item, comma, +) {{ Item }};\n```\n\n[Patterns]({SYNTAX_URL}#patterns)" + ), + "data" => format!( + "Mutable user-data binding available inside reduce actions.\n\nExample:\n\n```rustylr\nExpr : num {{ data.count += 1; num }};\n```\n\n[User data]({SYNTAX_URL}#4-user-data-data)" + ), + "lookahead" => format!( + "GLR reduce-action control binding for inspecting the next terminal.\n\nExample:\n\n```rustylr\nif let Some(term) = lookahead.to_term() {{ /* ... */ }}\n```\n\n[Advanced GLR reduce controls]({SYNTAX_URL}#advanced-glr-reduce-controls)" + ), + "shift" => format!( + "GLR reduce-action control binding used to allow or prune a shift branch.\n\nExample:\n\n```rustylr\n*shift = false;\n```\n\n[Advanced GLR reduce controls]({SYNTAX_URL}#advanced-glr-reduce-controls)" + ), + "auto" => "Table layout mode selected automatically by RustyLR.".to_string(), + "dense" => "Dense table layout mode.".to_string(), + "sparse" => "Sparse table layout mode.".to_string(), + _ => return None, + }; + Some(documentation) +} + +fn substitution_documentation(label: &str) -> Option { + let documentation = match label { + "$tokentype" => "`$tokentype` substitutes to the type defined by `%tokentype`.", + "$location" => "`$location` substitutes to the type defined by `%location`.", + "$userdata" => "`$userdata` substitutes to the type defined by `%userdata`.", + "$error" => "`$error` substitutes to the configured reduce-action error type.", + "$errortype" => "`$errortype` is an alias for the configured reduce-action error type.", + _ => return None, + }; + Some(format!( + "{documentation}\n\nExample:\n\n```rustylr\nRule($tokentype) : token {{ $tokentype }};\n```\n\n[Variable substitution]({SYNTAX_URL}#variable-substitution)" + )) +} + +fn location_documentation(label: &str) -> Option { + Some(format!( + "`{label}` refers to a source-location value in the current reduce action.\n\nExamples:\n\n```rustylr\nExpr : left=Expr plus right=Term {{ println!(\"{{:?}}\", @left); }};\nExpr : Expr plus Term {{ println!(\"{{:?}}\", @1); }};\nExpr : Term {{ println!(\"{{:?}}\", @$); }};\n```\n\n[Location tracking]({SYNTAX_URL}#location-tracking)" + )) +} + +fn allow_diagnostic_documentation(name: &str) -> Option { + Some(format!( + "Diagnostic suppression name `{name}`.\n\nExample:\n\n```rustylr\n%allow {name};\n%allow {name}(SomeTarget);\n```\n\n[Diagnostic suppression]({SYNTAX_URL}#diagnostic-suppression)" + )) +} + +fn markdown_documentation(value: String) -> Documentation { + Documentation::MarkupContent(MarkupContent { + kind: MarkupKind::Markdown, + value, + }) +} + struct CompletionBuilder { range: Range, seen: BTreeSet, @@ -478,23 +853,39 @@ impl CompletionBuilder { } } - fn terminal(&mut self, label: &str) { - self.push(label, CompletionItemKind::ENUM_MEMBER, "terminal symbol"); + fn terminal(&mut self, label: &str, documentation: String) { + self.push( + label, + CompletionItemKind::ENUM_MEMBER, + "terminal symbol", + Some(documentation), + ); } - fn nonterminal(&mut self, label: &str) { - self.push(label, CompletionItemKind::CLASS, "non-terminal symbol"); + fn nonterminal(&mut self, label: &str, documentation: String) { + self.push( + label, + CompletionItemKind::CLASS, + "non-terminal symbol", + Some(documentation), + ); } - fn keyword(&mut self, label: &str, detail: &str) { - self.push(label, CompletionItemKind::KEYWORD, detail); + fn keyword(&mut self, label: &str, detail: &str, documentation: Option) { + self.push(label, CompletionItemKind::KEYWORD, detail, documentation); } - fn variable(&mut self, label: &str, detail: &str) { - self.push(label, CompletionItemKind::VARIABLE, detail); + fn variable(&mut self, label: &str, detail: &str, documentation: Option) { + self.push(label, CompletionItemKind::VARIABLE, detail, documentation); } - fn push(&mut self, label: &str, kind: CompletionItemKind, detail: &str) { + fn push( + &mut self, + label: &str, + kind: CompletionItemKind, + detail: &str, + documentation: Option, + ) { if !self.seen.insert(label.to_string()) { return; } @@ -507,6 +898,7 @@ impl CompletionBuilder { range: self.range, new_text: label.to_string(), })), + documentation: documentation.map(markdown_documentation), ..Default::default() }); } @@ -538,6 +930,8 @@ pub enum Token { E(i32) : left=E plus num { $ } | num { num } ; + +Boxed(box $tokentype) : num { num }; "#; fn labels(response: CompletionResponse) -> BTreeSet { @@ -550,6 +944,13 @@ E(i32) : left=E plus num { $ } } } + fn items(response: CompletionResponse) -> Vec { + match response { + CompletionResponse::Array(items) => items, + _ => Vec::new(), + } + } + #[test] fn completes_symbols() { let pos = offset_to_position(MOCK_GRAMMAR, MOCK_GRAMMAR.find("plus num").unwrap()); @@ -581,4 +982,39 @@ E(i32) : left=E plus num { $ } assert!(labels.contains("%token")); assert!(labels.contains("%start")); } + + #[test] + fn completion_items_include_markdown_documentation() { + let pos = offset_to_position(MOCK_GRAMMAR, MOCK_GRAMMAR.find("plus num").unwrap()); + let items = items(completions(MOCK_GRAMMAR, pos)); + + let terminal = items.iter().find(|item| item.label == "plus").unwrap(); + let markup = markdown_value(terminal); + assert!(markup.contains("Rust type: `Token`")); + assert!(markup.contains("%token plus Token::Plus;")); + + let nonterminal = items.iter().find(|item| item.label == "E").unwrap(); + let markup = markdown_value(nonterminal); + assert!(markup.contains("Rust type: `i32`")); + assert!(markup.contains("E(i32)")); + assert!(markup.contains("E(i32)\n : left=E plus num")); + assert!(markup.contains("left=E plus num")); + assert!(markup.contains("\n | num")); + assert!(!markup.contains("{ $ }")); + assert!(!markup.contains("{ num }")); + + let boxed = items.iter().find(|item| item.label == "Boxed").unwrap(); + let markup = markdown_value(boxed); + assert!(markup.contains("Rust type: `Token` (boxed)")); + assert!(markup.contains("Boxed(box $tokentype)")); + assert!(!markup.contains("{ num }")); + } + + fn markdown_value(item: &CompletionItem) -> &str { + let documentation = item.documentation.as_ref().unwrap(); + let Documentation::MarkupContent(markup) = documentation else { + panic!("expected markdown documentation"); + }; + &markup.value + } } diff --git a/rusty_lr_parser/src/grammar.rs b/rusty_lr_parser/src/grammar.rs index e2feeb05..e64b7266 100644 --- a/rusty_lr_parser/src/grammar.rs +++ b/rusty_lr_parser/src/grammar.rs @@ -154,6 +154,26 @@ pub enum ResolvedAllowTarget { } impl Grammar { + /// Resolved Rust type for `%tokentype`, after substitutions such as `$tokentype` + /// and storage modifiers such as `box` have been stripped. + pub fn token_type(&self) -> &TokenStream { + &self.token_typename + } + + /// Whether terminal values are stored as `Box<%tokentype>` in the generated + /// parser's data enum. + pub fn token_type_boxed(&self) -> bool { + self.is_tokentype_boxed + } + + /// Resolved Rust type for a non-terminal by name, plus whether it is boxed + /// in the generated parser's data enum. + pub fn nonterminal_type(&self, name: &str) -> Option<(Option<&TokenStream>, bool)> { + let index = self.nonterminals_index.get(name)?; + let nonterminal = &self.nonterminals[*index]; + Some((nonterminal.ruletype.as_ref(), nonterminal.ruletype_boxed)) + } + fn is_terminal_allowed_by_target(&self, term: Terminal, target: &ResolvedAllowTarget) -> bool { match target { ResolvedAllowTarget::Name(name) => { From b5efae122eac0d1f7d9686b61256b3b8a351df05 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 07:11:37 +0900 Subject: [PATCH 05/20] Hovering --- rusty_lr_lsp/README.md | 1 + rusty_lr_lsp/src/completion.rs | 50 ++- rusty_lr_lsp/src/hover.rs | 626 +++++++++++++++++++++++++++++++++ rusty_lr_lsp/src/main.rs | 33 +- 4 files changed, 680 insertions(+), 30 deletions(-) create mode 100644 rusty_lr_lsp/src/hover.rs diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md index 0f93274c..533e1090 100644 --- a/rusty_lr_lsp/README.md +++ b/rusty_lr_lsp/README.md @@ -15,6 +15,7 @@ Other Rust files are intentionally not matched by default. - **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. - **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. +- **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. - **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. Completion details include the resolved Rust type for terminals and non-terminals, including inferred placeholders and a note when the value is boxed for parser storage. - **Completion for directives and keywords:** Suggests directives such as `%token`, `%start`, `%tokentype`, `%left`, `%right`, `%precedence`, `%prec`, `%dprec`, `%glr`, `%lalr`, `%nooptim`, `%allow`, and common identifiers such as `error`, `$sep`, `data`, `lookahead`, and `shift`. - **Completion for `$...` variables:** Suggests built-in substitutions (`$tokentype`, `$location`, `$userdata`, `$error`, `$errortype`), terminal and non-terminal substitutions (`$terminal_name`, `$NonTerminalName`), current reduce-action bindings (`$left`, `$value`, etc.), and positional semantic variables (`$1`, `$2`, ...). diff --git a/rusty_lr_lsp/src/completion.rs b/rusty_lr_lsp/src/completion.rs index b4f5f83d..9106afc7 100644 --- a/rusty_lr_lsp/src/completion.rs +++ b/rusty_lr_lsp/src/completion.rs @@ -11,7 +11,7 @@ use std::str::FromStr; use crate::diagnostics::split_stream; use crate::position::{offset_to_position, position_to_offset}; -const DIRECTIVES: &[&str] = &[ +pub(crate) const DIRECTIVES: &[&str] = &[ "%token", "%start", "%tokentype", @@ -31,7 +31,7 @@ const DIRECTIVES: &[&str] = &[ "%moduleprefix", ]; -const SUBSTITUTION_VARIABLES: &[&str] = &[ +pub(crate) const SUBSTITUTION_VARIABLES: &[&str] = &[ "$tokentype", "$location", "$userdata", @@ -39,7 +39,7 @@ const SUBSTITUTION_VARIABLES: &[&str] = &[ "$errortype", ]; -const ALLOW_DIAGNOSTICS: &[&str] = &[ +pub(crate) const ALLOW_DIAGNOSTICS: &[&str] = &[ "nonterm_unreachable", "nonterm_unproductive", "unused_nonterm_data", @@ -53,7 +53,7 @@ const ALLOW_DIAGNOSTICS: &[&str] = &[ "reduce_reduce_conflict_glr", ]; -const KEYWORDS: &[&str] = &[ +pub(crate) const KEYWORDS: &[&str] = &[ "error", "auto", "dense", @@ -64,7 +64,7 @@ const KEYWORDS: &[&str] = &[ "shift", ]; -const SYNTAX_URL: &str = "https://github.com/ehwan/RustyLR/blob/main/SYNTAX.md"; +pub(crate) const SYNTAX_URL: &str = "https://github.com/ehwan/RustyLR/blob/main/SYNTAX.md"; #[derive(Clone, Copy, PartialEq, Eq)] enum CompletionMode { @@ -210,7 +210,7 @@ fn add_symbol_items(builder: &mut CompletionBuilder, names: &CompletionNames) { } } -fn parse_args(content: &str) -> Result { +pub(crate) fn parse_args(content: &str) -> Result { let token_stream = TokenStream::from_str(content).map_err(|_| ())?; let (_, macro_stream) = split_stream(token_stream).map_err(|_| ())?; Grammar::parse_args(macro_stream).map_err(|_| ()) @@ -257,7 +257,7 @@ fn replacement_range(content: &str, offset: usize, mode: CompletionMode) -> Rang ) } -fn current_prefix_start(content: &str, offset: usize, include_sigils: bool) -> usize { +pub(crate) fn current_prefix_start(content: &str, offset: usize, include_sigils: bool) -> usize { let mut start = offset.min(content.len()); while start > 0 { let Some(ch) = content[..start].chars().next_back() else { @@ -281,7 +281,7 @@ fn line_prefix(content: &str, offset: usize) -> &str { &content[line_start..offset] } -fn is_ident_continue(ch: char) -> bool { +pub(crate) fn is_ident_continue(ch: char) -> bool { ch == '_' || ch.is_ascii_alphanumeric() } @@ -402,7 +402,11 @@ fn resolved_rust_type(ty: Option<&TokenStream>, boxed: bool) -> ResolvedRustType ResolvedRustType { name, boxed } } -fn line_text_for_location(args: &GrammarArgs, content: &str, location: &Location) -> String { +pub(crate) fn line_text_for_location( + args: &GrammarArgs, + content: &str, + location: &Location, +) -> String { let offset = args .span_manager .get_byterange(location) @@ -416,7 +420,7 @@ fn line_text_for_location(args: &GrammarArgs, content: &str, location: &Location content[start..end].trim().to_string() } -fn rule_definition_text( +pub(crate) fn rule_definition_text( args: &GrammarArgs, content: &str, rule: &rusty_lr_parser::RuleDefArgs, @@ -467,15 +471,8 @@ fn rule_line_tokens_text( ) -> String { line.tokens .iter() - .map(|(mapped_name, pattern)| { - let start = mapped_name - .as_ref() - .and_then(|name| { - args.span_manager - .get_byterange(&name.location()) - .map(|range| range.start) - }) - .unwrap_or_else(|| pattern_start(args, pattern)); + .map(|(_, pattern)| { + let start = pattern_start(args, pattern); let end = pattern_end(args, pattern); content[start.min(content.len())..end.min(content.len())] .trim() @@ -732,7 +729,7 @@ fn type_line(rust_type: Option<&ResolvedRustType>) -> String { } } -fn keyword_documentation(label: &str) -> Option { +pub(crate) fn keyword_documentation(label: &str) -> Option { let documentation = match label { "%token" => format!( "Defines a terminal symbol and the Rust pattern that recognizes it.\n\nExample:\n\n```rustylr\n%token num Token::Num(_);\n```\n\n[Token definition]({SYNTAX_URL}#token-definition-must-defined)" @@ -805,7 +802,7 @@ fn keyword_documentation(label: &str) -> Option { Some(documentation) } -fn substitution_documentation(label: &str) -> Option { +pub(crate) fn substitution_documentation(label: &str) -> Option { let documentation = match label { "$tokentype" => "`$tokentype` substitutes to the type defined by `%tokentype`.", "$location" => "`$location` substitutes to the type defined by `%location`.", @@ -819,19 +816,19 @@ fn substitution_documentation(label: &str) -> Option { )) } -fn location_documentation(label: &str) -> Option { +pub(crate) fn location_documentation(label: &str) -> Option { Some(format!( "`{label}` refers to a source-location value in the current reduce action.\n\nExamples:\n\n```rustylr\nExpr : left=Expr plus right=Term {{ println!(\"{{:?}}\", @left); }};\nExpr : Expr plus Term {{ println!(\"{{:?}}\", @1); }};\nExpr : Term {{ println!(\"{{:?}}\", @$); }};\n```\n\n[Location tracking]({SYNTAX_URL}#location-tracking)" )) } -fn allow_diagnostic_documentation(name: &str) -> Option { +pub(crate) fn allow_diagnostic_documentation(name: &str) -> Option { Some(format!( "Diagnostic suppression name `{name}`.\n\nExample:\n\n```rustylr\n%allow {name};\n%allow {name}(SomeTarget);\n```\n\n[Diagnostic suppression]({SYNTAX_URL}#diagnostic-suppression)" )) } -fn markdown_documentation(value: String) -> Documentation { +pub(crate) fn markdown_documentation(value: String) -> Documentation { Documentation::MarkupContent(MarkupContent { kind: MarkupKind::Markdown, value, @@ -997,8 +994,9 @@ Boxed(box $tokentype) : num { num }; let markup = markdown_value(nonterminal); assert!(markup.contains("Rust type: `i32`")); assert!(markup.contains("E(i32)")); - assert!(markup.contains("E(i32)\n : left=E plus num")); - assert!(markup.contains("left=E plus num")); + assert!(markup.contains("E(i32)\n : E plus num")); + assert!(markup.contains("E plus num")); + assert!(!markup.contains("left=E")); assert!(markup.contains("\n | num")); assert!(!markup.contains("{ $ }")); assert!(!markup.contains("{ num }")); diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs new file mode 100644 index 00000000..0070a6e3 --- /dev/null +++ b/rusty_lr_lsp/src/hover.rs @@ -0,0 +1,626 @@ +use lsp_types::{Hover, HoverContents, MarkupContent, MarkupKind, Position}; +use proc_macro2::TokenStream; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::terminal_info::TerminalName; +use rusty_lr_parser::{GrammarArgs, PatternArgs, TerminalSetItem}; +use std::collections::BTreeSet; +use std::ops::Range as ByteRange; + +use crate::completion::{ + self, ALLOW_DIAGNOSTICS, DIRECTIVES, KEYWORDS, SUBSTITUTION_VARIABLES, SYNTAX_URL, +}; +use crate::position::position_to_offset; + +pub fn hover(content: &str, position: Position) -> Option { + let offset = position_to_offset(content, position); + let parsed = completion::parse_args(content).ok(); + + if let Some(args) = &parsed { + if let Some((pattern, range)) = pattern_at_offset(args, offset) { + return Some(markdown_hover( + content, + pattern_documentation(args, pattern, content), + Some(range), + )); + } + } + + let word = hover_word(content, offset)?; + let documentation = hover_word_documentation(&word)?; + Some(markdown_hover(content, documentation, None)) +} + +fn markdown_hover(content: &str, value: String, range: Option>) -> Hover { + Hover { + contents: HoverContents::Markup(MarkupContent { + kind: MarkupKind::Markdown, + value, + }), + range: range.map(|range| crate::position::range_to_lsp_range(content, range)), + } +} + +fn hover_word(content: &str, offset: usize) -> Option { + let offset = offset.min(content.len()); + let start = completion::current_prefix_start(content, offset, true); + let mut end = offset; + while end < content.len() { + let ch = content[end..].chars().next()?; + if completion::is_ident_continue(ch) { + end += ch.len_utf8(); + } else { + break; + } + } + if start == end { + return None; + } + Some(content[start..end].to_string()) +} + +fn hover_word_documentation(word: &str) -> Option { + if DIRECTIVES.contains(&word) || KEYWORDS.contains(&word) { + return completion::keyword_documentation(word); + } + if SUBSTITUTION_VARIABLES.contains(&word) { + return completion::substitution_documentation(word); + } + if word.starts_with('@') { + return completion::location_documentation(word); + } + if ALLOW_DIAGNOSTICS.contains(&word) { + return completion::allow_diagnostic_documentation(word); + } + None +} + +fn pattern_at_offset( + args: &GrammarArgs, + offset: usize, +) -> Option<(&PatternArgs, ByteRange)> { + for rule in &args.rules { + for line in &rule.rule_lines { + for (_, pattern) in &line.tokens { + if let Some(range) = args.span_manager.get_byterange(&pattern.location()) { + if range.contains(&offset) { + return Some((pattern, range)); + } + } + } + } + } + None +} + +fn pattern_documentation(args: &GrammarArgs, pattern: &PatternArgs, content: &str) -> String { + let pattern_text = pattern_text(args, pattern, content); + let grammar = Grammar::from_grammar_args(args.clone()).ok(); + let pattern_type = grammar + .as_ref() + .and_then(|grammar| pattern_type(args, grammar, pattern)); + let type_line = hover_type_line(pattern_type.as_ref()); + let subterms = grammar + .as_ref() + .map(|grammar| subterm_documentation(args, grammar, pattern, content)) + .unwrap_or_default(); + let keyword = pattern_keyword_documentation(pattern); + + let mut documentation = format!("Pattern `{pattern_text}`.\n\n{type_line}"); + if !subterms.is_empty() { + documentation.push_str("\n\n"); + documentation.push_str(&subterms); + } + if let Some(keyword) = keyword { + documentation.push_str("\n\n---\n\n"); + documentation.push_str(&keyword); + } + documentation.push_str(&format!("\n\n[Patterns]({SYNTAX_URL}#patterns)")); + documentation +} + +fn pattern_text(args: &GrammarArgs, pattern: &PatternArgs, content: &str) -> String { + args.span_manager + .get_byterange(&pattern.location()) + .and_then(|range| content.get(range)) + .map(str::trim) + .filter(|text| !text.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| pattern.to_string()) +} + +fn subterm_documentation( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, + content: &str, +) -> String { + let mut seen_nonterminals = BTreeSet::new(); + let mut seen_terminals = BTreeSet::new(); + let mut symbols = Vec::new(); + collect_symbol_documentation( + args, + grammar, + pattern, + content, + &mut seen_nonterminals, + &mut seen_terminals, + &mut symbols, + ); + + let mut seen_syntax = BTreeSet::new(); + let mut syntax = Vec::new(); + collect_pattern_syntax(pattern, &mut seen_syntax, &mut syntax); + + let mut sections = Vec::new(); + if !symbols.is_empty() { + sections.push(format!("Identifiers:\n\n{}", symbols.join("\n\n"))); + } + if !syntax.is_empty() { + sections.push(format!("Pattern syntax:\n\n{}", syntax.join("\n"))); + } + sections.join("\n\n") +} + +fn collect_symbol_documentation( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, + content: &str, + seen_nonterminals: &mut BTreeSet, + seen_terminals: &mut BTreeSet, + symbols: &mut Vec, +) { + match pattern { + PatternArgs::Ident(ident) => { + let name = ident.value(); + if let Some(symbol) = nonterminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_nonterminals.insert(name.clone())) + { + symbols.push(symbol); + } else if let Some(symbol) = terminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_terminals.insert(name.clone())) + { + symbols.push(symbol); + } + } + PatternArgs::TerminalSet(terminal_set) => { + for item in &terminal_set.items { + match item { + TerminalSetItem::Terminal(ident) => { + let name = ident.value(); + if let Some(symbol) = + terminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_terminals.insert(name.clone())) + { + symbols.push(symbol); + } + } + TerminalSetItem::Range(first, last) => { + for ident in [first, last] { + let name = ident.value(); + if let Some(symbol) = + terminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_terminals.insert(name.clone())) + { + symbols.push(symbol); + } + } + } + TerminalSetItem::Byte(_) + | TerminalSetItem::ByteRange(_, _) + | TerminalSetItem::Char(_) + | TerminalSetItem::CharRange(_, _) => {} + } + } + } + _ => {} + } + + for child in pattern_children(pattern) { + collect_symbol_documentation( + args, + grammar, + child, + content, + seen_nonterminals, + seen_terminals, + symbols, + ); + } +} + +fn nonterminal_symbol_documentation( + args: &GrammarArgs, + grammar: &Grammar, + content: &str, + name: &str, +) -> Option { + let rule = args.rules.iter().find(|rule| rule.name.value() == name)?; + let (ty, boxed) = grammar.nonterminal_type(name)?; + let type_line = hover_type_line(rust_type(ty, boxed).as_ref()); + let definition = completion::rule_definition_text(args, content, rule); + Some(format!( + "**Non-terminal `{name}`**\n\n{type_line}\n\nDefinition:\n\n{}", + definition_code_block(&definition) + )) +} + +fn terminal_symbol_documentation( + args: &GrammarArgs, + grammar: &Grammar, + content: &str, + name: &str, +) -> Option { + let (terminal, _) = args + .terminals + .iter() + .find(|(terminal, _)| terminal.value() == name)?; + let type_line = hover_type_line(Some(&token_type(grammar))); + let definition = completion::line_text_for_location(args, content, &terminal.location()); + Some(format!( + "**Terminal `{name}`**\n\n{type_line}\n\nDefinition:\n\n{}", + definition_code_block(&definition) + )) +} + +fn definition_code_block(definition: &str) -> String { + format!("```rustylr\n{definition}\n```") +} + +fn collect_pattern_syntax( + pattern: &PatternArgs, + seen: &mut BTreeSet<&'static str>, + syntax: &mut Vec, +) { + if let Some(label) = pattern_syntax_label(pattern) { + if seen.insert(label) { + syntax.push(format!( + "- `{label}`: {}", + pattern_syntax_documentation(pattern) + )); + } + } + + for child in pattern_children(pattern) { + collect_pattern_syntax(child, seen, syntax); + } +} + +fn pattern_syntax_label(pattern: &PatternArgs) -> Option<&'static str> { + match pattern { + PatternArgs::Plus { .. } => Some("A+"), + PatternArgs::Star { .. } => Some("A*"), + PatternArgs::Question { .. } => Some("A?"), + PatternArgs::Exclamation { .. } => Some("A!"), + PatternArgs::TerminalSet(_) => Some("[...]"), + PatternArgs::Group { .. } => Some("(...)"), + PatternArgs::Minus { .. } => Some("A - B"), + PatternArgs::Sep { .. } => Some("$sep(A, Sep, ...)"), + PatternArgs::Ident(_) + | PatternArgs::Byte(_) + | PatternArgs::ByteString(_) + | PatternArgs::Char(_) + | PatternArgs::String(_) => None, + } +} + +fn pattern_children(pattern: &PatternArgs) -> Vec<&PatternArgs> { + match pattern { + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => vec![base.as_ref()], + PatternArgs::Group { alternatives, .. } => alternatives.iter().flatten().collect(), + PatternArgs::Minus { base, exclude } => vec![base.as_ref(), exclude.as_ref()], + PatternArgs::Sep { + base, delimiter, .. + } => vec![base.as_ref(), delimiter.as_ref()], + PatternArgs::Ident(_) + | PatternArgs::TerminalSet(_) + | PatternArgs::Byte(_) + | PatternArgs::ByteString(_) + | PatternArgs::Char(_) + | PatternArgs::String(_) => Vec::new(), + } +} + +fn pattern_syntax_documentation(pattern: &PatternArgs) -> String { + match pattern { + PatternArgs::Ident(_) => { + "Identifier pattern. It references a terminal or non-terminal symbol.".to_string() + } + PatternArgs::Plus { .. } => { + "`A+` matches one or more repetitions of `A` and collects valued matches into a `Vec`." + .to_string() + } + PatternArgs::Star { .. } => { + "`A*` matches zero or more repetitions of `A` and collects valued matches into a `Vec`." + .to_string() + } + PatternArgs::Question { .. } => { + "`A?` matches zero or one `A` and maps valued matches to `Option`.".to_string() + } + PatternArgs::Exclamation { .. } => { + "`A!` matches `A` but discards its semantic value from the production.".to_string() + } + PatternArgs::TerminalSet(_) => { + "Terminal set pattern. It matches one terminal from the set.".to_string() + } + PatternArgs::Group { .. } => { + "Grouped pattern. Alternatives are matched as a nested pattern; valued children are returned as a single value or tuple." + .to_string() + } + PatternArgs::Byte(_) => { + "Byte literal pattern. It is available when `%tokentype` is `u8`.".to_string() + } + PatternArgs::ByteString(_) => { + "Byte string literal pattern. It expands to a sequence of byte terminals.".to_string() + } + PatternArgs::Char(_) => { + "Character literal pattern. It is available when `%tokentype` is `char`.".to_string() + } + PatternArgs::String(_) => { + "String literal pattern. It expands to a sequence of character terminals.".to_string() + } + PatternArgs::Minus { .. } => { + "`A - B` matches terminals in `A` excluding terminals in `B`.".to_string() + } + PatternArgs::Sep { at_least_one, .. } => { + let quantifier = if *at_least_one { + "one or more" + } else { + "zero or more" + }; + format!( + "`$sep(A, Sep, ...)` matches {quantifier} `A` patterns separated by `Sep` and collects valued `A` matches into a `Vec`." + ) + } + } +} + +fn pattern_keyword_documentation(pattern: &PatternArgs) -> Option { + match pattern { + PatternArgs::Ident(ident) if ident.value() == "error" => { + completion::keyword_documentation("error") + } + PatternArgs::Sep { .. } => completion::keyword_documentation("$sep"), + _ => None, + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +struct HoverRustType { + name: String, + boxed: bool, +} + +fn hover_type_line(ty: Option<&HoverRustType>) -> String { + match ty { + Some(ty) if ty.boxed => format!("Final type: `{}` (boxed)", ty.name), + Some(ty) => format!("Final type: `{}`", ty.name), + None => "Final type: `()`".to_string(), + } +} + +fn pattern_type( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, +) -> Option { + match pattern { + PatternArgs::Ident(ident) => { + if ident.value() == "error" { + return None; + } + if grammar + .terminals_index + .contains_key(&TerminalName::Ident(ident.value().clone())) + { + return Some(token_type(grammar)); + } + let (ty, boxed) = grammar.nonterminal_type(ident.value())?; + rust_type(ty, boxed) + } + PatternArgs::Plus { base, .. } | PatternArgs::Star { base, .. } => { + let base_type = pattern_type(args, grammar, base)?; + Some(HoverRustType { + name: format!("Vec<{}>", base_type.name), + boxed: false, + }) + } + PatternArgs::Question { base, .. } => { + let base_type = pattern_type(args, grammar, base)?; + Some(HoverRustType { + name: format!("Option<{}>", base_type.name), + boxed: false, + }) + } + PatternArgs::Exclamation { .. } => None, + PatternArgs::TerminalSet(_) | PatternArgs::Byte(_) | PatternArgs::Char(_) => { + Some(token_type(grammar)) + } + PatternArgs::ByteString(_) => Some(HoverRustType { + name: "&'static [u8]".to_string(), + boxed: false, + }), + PatternArgs::String(_) => Some(HoverRustType { + name: "&'static str".to_string(), + boxed: false, + }), + PatternArgs::Group { alternatives, .. } => group_type(args, grammar, alternatives), + PatternArgs::Minus { .. } => Some(token_type(grammar)), + PatternArgs::Sep { base, .. } => { + let base_type = pattern_type(args, grammar, base)?; + Some(HoverRustType { + name: format!("Vec<{}>", base_type.name), + boxed: false, + }) + } + } +} + +fn group_type( + args: &GrammarArgs, + grammar: &Grammar, + alternatives: &[Vec], +) -> Option { + let mut alternatives = alternatives + .iter() + .map(|alternative| alternative_type(args, grammar, alternative)); + let first = alternatives.next()?; + if alternatives.all(|ty| ty == first) { + first + } else { + None + } +} + +fn alternative_type( + args: &GrammarArgs, + grammar: &Grammar, + alternative: &[PatternArgs], +) -> Option { + let child_types = alternative + .iter() + .filter_map(|pattern| pattern_type(args, grammar, pattern)) + .collect::>(); + match child_types.len() { + 0 => None, + 1 => child_types.into_iter().next(), + _ => Some(HoverRustType { + name: format!( + "({})", + child_types + .iter() + .map(|ty| format!("{},", ty.name)) + .collect::>() + .join(" ") + ), + boxed: false, + }), + } +} + +fn token_type(grammar: &Grammar) -> HoverRustType { + rust_type(Some(grammar.token_type()), grammar.token_type_boxed()).unwrap() +} + +fn rust_type(ty: Option<&TokenStream>, boxed: bool) -> Option { + let name = ty.map(TokenStream::to_string).filter(|ty| !ty.is_empty())?; + Some(HoverRustType { name, boxed }) +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Comma, +} + +%% + +%tokentype Token; +%start List; + +%token num Token::Num(_); +%token plus Token::Plus; +%token comma Token::Comma; + +E(i32) : value=num { 0 }; +List(Vec) : $sep(E, comma, +) { E }; +"#; + + #[test] + fn hovers_keyword() { + let offset = MOCK_GRAMMAR.find("%token num").unwrap() + 1; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Defines a terminal symbol")); + } + + #[test] + fn hovers_identifier_pattern_with_type() { + let offset = MOCK_GRAMMAR.find("E(i32) : value=num").unwrap(); + let offset = MOCK_GRAMMAR[offset..].find("num").unwrap() + offset; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Pattern `num`")); + assert!(markup.value.contains("Final type: `Token`")); + assert!(markup.value.contains("Identifiers:")); + assert!(markup.value.contains("**Terminal `num`**")); + assert!(markup + .value + .contains("```rustylr\n%token num Token::Num(_);\n```")); + assert!(!markup.value.contains("Identifier pattern")); + } + + #[test] + fn hovers_sep_pattern_with_vec_type_and_keyword_details() { + let offset = MOCK_GRAMMAR.find("$sep").unwrap() + 1; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Pattern `$sep(E, comma, +)`")); + assert!(markup.value.contains("Final type: `Vec`")); + assert!(markup.value.contains("Identifiers:")); + assert!(markup.value.contains("**Non-terminal `E`**")); + assert!(markup.value.contains("```rustylr\nE(i32)\n : num\n ;\n```")); + assert!(!markup.value.contains("value=num")); + assert!(markup.value.contains("**Terminal `comma`**")); + assert!(markup + .value + .contains("```rustylr\n%token comma Token::Comma;\n```")); + assert!(markup.value.contains("Pattern syntax:")); + assert!(markup.value.contains("- `$sep(A, Sep, ...)`:")); + assert_eq!(markup.value.matches("**Non-terminal `E`**").count(), 1); + assert_eq!(markup.value.matches("**Terminal `comma`**").count(), 1); + assert!(markup + .value + .contains("Pattern helper for separated repetition")); + } + + #[test] + fn hovers_whole_pattern_when_cursor_is_on_inner_symbol() { + let sep_offset = MOCK_GRAMMAR.find("$sep").unwrap(); + let offset = MOCK_GRAMMAR[sep_offset..].find("comma").unwrap() + sep_offset + 1; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Pattern `$sep(E, comma, +)`")); + assert!(markup.value.contains("Final type: `Vec`")); + assert!(markup.value.contains("Identifiers:")); + assert!(markup.value.contains("**Non-terminal `E`**")); + assert!(markup.value.contains("**Terminal `comma`**")); + assert!(markup.value.contains("Pattern syntax:")); + assert!(markup + .value + .contains("Pattern helper for separated repetition")); + } +} diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index 407cdc07..a5fcc79d 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -3,10 +3,10 @@ use lsp_types::{ notification::{ DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, }, - request::{Completion, GotoDefinition}, - CompletionOptions, Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Location, OneOf, - PublishDiagnosticsParams, Range, ServerCapabilities, TextDocumentSyncCapability, - TextDocumentSyncKind, Url, + request::{Completion, GotoDefinition, HoverRequest}, + CompletionOptions, Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Hover, + HoverProviderCapability, Location, OneOf, PublishDiagnosticsParams, Range, ServerCapabilities, + TextDocumentSyncCapability, TextDocumentSyncKind, Url, }; use std::collections::HashMap; use std::error::Error; @@ -19,6 +19,7 @@ use lsp_types::request::Request as LspRequest; mod completion; mod diagnostics; mod goto_definition; +mod hover; mod position; fn main() -> Result<(), Box> { @@ -31,6 +32,7 @@ fn main() -> Result<(), Box> { let server_capabilities = serde_json::to_value(&ServerCapabilities { text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), definition_provider: Some(OneOf::Left(true)), + hover_provider: Some(HoverProviderCapability::Simple(true)), completion_provider: Some(CompletionOptions { trigger_characters: Some(completion_trigger_characters()), ..Default::default() @@ -108,6 +110,29 @@ fn main() -> Result<(), Box> { Response::new_ok(id, lsp_types::CompletionResponse::Array(Vec::new())) }; connection.sender.send(Message::Response(response))?; + } else if req.method == HoverRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting hover request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position_params.text_document.uri; + let position = params.text_document_position_params.position; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| hover::hover(content, position)) { + Ok(hover) => Response::new_ok(id, hover), + Err(message) => { + eprintln!("RustyLR hover panicked: {message}"); + Response::new_ok(id, Option::::None) + } + } + } else { + Response::new_ok(id, Option::::None) + }; + connection.sender.send(Message::Response(response))?; } } Message::Response(_resp) => {} From c02fe0a9b89e84b6ce562686450248ce2d415976 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 07:43:27 +0900 Subject: [PATCH 06/20] inlay hint --- rusty_lr_lsp/README.md | 1 + rusty_lr_lsp/src/hover.rs | 20 ++++- rusty_lr_lsp/src/inlay_hint.rs | 140 +++++++++++++++++++++++++++++++++ rusty_lr_lsp/src/main.rs | 37 ++++++++- 4 files changed, 193 insertions(+), 5 deletions(-) create mode 100644 rusty_lr_lsp/src/inlay_hint.rs diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md index 533e1090..9770324f 100644 --- a/rusty_lr_lsp/README.md +++ b/rusty_lr_lsp/README.md @@ -16,6 +16,7 @@ Other Rust files are intentionally not matched by default. - **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. - **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. - **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. +- **Inlay Hints:** Shows `Pattern: Type` hints for top-level patterns in non-terminal definitions. - **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. Completion details include the resolved Rust type for terminals and non-terminals, including inferred placeholders and a note when the value is boxed for parser storage. - **Completion for directives and keywords:** Suggests directives such as `%token`, `%start`, `%tokentype`, `%left`, `%right`, `%precedence`, `%prec`, `%dprec`, `%glr`, `%lalr`, `%nooptim`, `%allow`, and common identifiers such as `error`, `$sep`, `data`, `lookahead`, and `shift`. - **Completion for `$...` variables:** Suggests built-in substitutions (`$tokentype`, `$location`, `$userdata`, `$error`, `$errortype`), terminal and non-terminal substitutions (`$terminal_name`, `$NonTerminalName`), current reduce-action bindings (`$left`, `$value`, etc.), and positional semantic variables (`$1`, `$2`, ...). diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs index 0070a6e3..4ffc770c 100644 --- a/rusty_lr_lsp/src/hover.rs +++ b/rusty_lr_lsp/src/hover.rs @@ -389,11 +389,19 @@ fn pattern_keyword_documentation(pattern: &PatternArgs) -> Option { } #[derive(Clone, Debug, PartialEq, Eq)] -struct HoverRustType { +pub(crate) struct HoverRustType { name: String, boxed: bool, } +pub(crate) fn pattern_final_type( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, +) -> String { + hover_type_name(pattern_type(args, grammar, pattern).as_ref()) +} + fn hover_type_line(ty: Option<&HoverRustType>) -> String { match ty { Some(ty) if ty.boxed => format!("Final type: `{}` (boxed)", ty.name), @@ -402,7 +410,15 @@ fn hover_type_line(ty: Option<&HoverRustType>) -> String { } } -fn pattern_type( +fn hover_type_name(ty: Option<&HoverRustType>) -> String { + match ty { + Some(ty) if ty.boxed => format!("{} (boxed)", ty.name), + Some(ty) => ty.name.clone(), + None => "()".to_string(), + } +} + +pub(crate) fn pattern_type( args: &GrammarArgs, grammar: &Grammar, pattern: &PatternArgs, diff --git a/rusty_lr_lsp/src/inlay_hint.rs b/rusty_lr_lsp/src/inlay_hint.rs new file mode 100644 index 00000000..75a2bb7c --- /dev/null +++ b/rusty_lr_lsp/src/inlay_hint.rs @@ -0,0 +1,140 @@ +use lsp_types::{InlayHint, InlayHintKind, InlayHintLabel, Range}; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{GrammarArgs, PatternArgs}; + +use crate::completion; +use crate::hover; +use crate::position::{offset_to_position, position_to_offset}; + +pub fn inlay_hints(content: &str, range: Range) -> Vec { + let Ok(args) = completion::parse_args(content) else { + return Vec::new(); + }; + let Ok(grammar) = Grammar::from_grammar_args(args.clone()) else { + return Vec::new(); + }; + + let range_start = position_to_offset(content, range.start); + let range_end = position_to_offset(content, range.end); + let mut hints = Vec::new(); + + for rule in &args.rules { + for line in &rule.rule_lines { + for (_, pattern) in &line.tokens { + let Some(pattern_range) = args.span_manager.get_byterange(&pattern.location()) + else { + continue; + }; + if !ranges_overlap( + pattern_range.start, + pattern_range.end, + range_start, + range_end, + ) { + continue; + } + + hints.push(pattern_inlay_hint(&args, &grammar, content, pattern)); + } + } + } + + hints +} + +fn pattern_inlay_hint( + args: &GrammarArgs, + grammar: &Grammar, + content: &str, + pattern: &PatternArgs, +) -> InlayHint { + let end = args + .span_manager + .get_byterange(&pattern.location()) + .map_or(0, |range| range.end); + let final_type = hover::pattern_final_type(args, grammar, pattern); + InlayHint { + position: offset_to_position(content, end), + label: InlayHintLabel::String(format!(": {final_type}")), + kind: Some(InlayHintKind::TYPE), + text_edits: None, + tooltip: None, + padding_left: Some(true), + padding_right: None, + data: None, + } +} + +fn ranges_overlap(start_a: usize, end_a: usize, start_b: usize, end_b: usize) -> bool { + start_a <= end_b && start_b <= end_a +} + +#[cfg(test)] +mod tests { + use super::*; + use lsp_types::Position; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Comma, +} + +%% + +%tokentype Token; +%start List; + +%token num Token::Num(_); +%token plus Token::Plus; +%token comma Token::Comma; + +E(i32) : left=E plus num { 0 } + | num { 0 } + ; +List(Vec) : $sep(E, comma, +) { E }; +"#; + + #[test] + fn hints_top_level_patterns_in_rule_definitions() { + let hints = inlay_hints( + MOCK_GRAMMAR, + Range::new(Position::new(0, 0), Position::new(100, 0)), + ); + let labels = hints + .iter() + .map(|hint| match &hint.label { + InlayHintLabel::String(label) => label.as_str(), + InlayHintLabel::LabelParts(_) => "", + }) + .collect::>(); + + assert!(labels.contains(&": i32")); + assert!(labels.contains(&": Token")); + assert!(labels.contains(&": Vec")); + } + + #[test] + fn does_not_hint_nested_sep_children_separately() { + let sep_line_start = MOCK_GRAMMAR.find("List(Vec)").unwrap(); + let sep_line_end = MOCK_GRAMMAR[sep_line_start..].find(';').unwrap() + sep_line_start; + let hints = inlay_hints( + MOCK_GRAMMAR, + Range::new( + offset_to_position(MOCK_GRAMMAR, sep_line_start), + offset_to_position(MOCK_GRAMMAR, sep_line_end), + ), + ); + let labels = hints + .iter() + .map(|hint| match &hint.label { + InlayHintLabel::String(label) => label.as_str(), + InlayHintLabel::LabelParts(_) => "", + }) + .collect::>(); + + assert_eq!(labels, vec![": Vec"]); + } +} diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index a5fcc79d..31f6a4ff 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -3,10 +3,11 @@ use lsp_types::{ notification::{ DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, }, - request::{Completion, GotoDefinition, HoverRequest}, + request::{Completion, GotoDefinition, HoverRequest, InlayHintRequest}, CompletionOptions, Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Hover, - HoverProviderCapability, Location, OneOf, PublishDiagnosticsParams, Range, ServerCapabilities, - TextDocumentSyncCapability, TextDocumentSyncKind, Url, + HoverProviderCapability, InlayHint, InlayHintOptions, InlayHintServerCapabilities, Location, + OneOf, PublishDiagnosticsParams, Range, ServerCapabilities, TextDocumentSyncCapability, + TextDocumentSyncKind, Url, }; use std::collections::HashMap; use std::error::Error; @@ -20,6 +21,7 @@ mod completion; mod diagnostics; mod goto_definition; mod hover; +mod inlay_hint; mod position; fn main() -> Result<(), Box> { @@ -33,6 +35,12 @@ fn main() -> Result<(), Box> { text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), definition_provider: Some(OneOf::Left(true)), hover_provider: Some(HoverProviderCapability::Simple(true)), + inlay_hint_provider: Some(OneOf::Right(InlayHintServerCapabilities::Options( + InlayHintOptions { + resolve_provider: Some(false), + ..Default::default() + }, + ))), completion_provider: Some(CompletionOptions { trigger_characters: Some(completion_trigger_characters()), ..Default::default() @@ -133,6 +141,29 @@ fn main() -> Result<(), Box> { Response::new_ok(id, Option::::None) }; connection.sender.send(Message::Response(response))?; + } else if req.method == InlayHintRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting inlay hint request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let range = params.range; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| inlay_hint::inlay_hints(content, range)) { + Ok(hints) => Response::new_ok(id, Some(hints)), + Err(message) => { + eprintln!("RustyLR inlay hint panicked: {message}"); + Response::new_ok(id, Option::>::None) + } + } + } else { + Response::new_ok(id, Option::>::None) + }; + connection.sender.send(Message::Response(response))?; } } Message::Response(_resp) => {} From dbda29d3b952c98d6b225a1e2653eebabde1192e Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 07:46:54 +0900 Subject: [PATCH 07/20] fix hovering details for plain identifier --- rusty_lr_lsp/src/hover.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs index 4ffc770c..51fab980 100644 --- a/rusty_lr_lsp/src/hover.rs +++ b/rusty_lr_lsp/src/hover.rs @@ -93,6 +93,10 @@ fn pattern_at_offset( } fn pattern_documentation(args: &GrammarArgs, pattern: &PatternArgs, content: &str) -> String { + if let Some(documentation) = identifier_pattern_documentation(args, pattern, content) { + return documentation; + } + let pattern_text = pattern_text(args, pattern, content); let grammar = Grammar::from_grammar_args(args.clone()).ok(); let pattern_type = grammar @@ -118,6 +122,21 @@ fn pattern_documentation(args: &GrammarArgs, pattern: &PatternArgs, content: &st documentation } +fn identifier_pattern_documentation( + args: &GrammarArgs, + pattern: &PatternArgs, + content: &str, +) -> Option { + let PatternArgs::Ident(ident) = pattern else { + return None; + }; + + let grammar = Grammar::from_grammar_args(args.clone()).ok()?; + nonterminal_symbol_documentation(args, &grammar, content, ident.value()) + .or_else(|| terminal_symbol_documentation(args, &grammar, content, ident.value())) + .or_else(|| pattern_keyword_documentation(pattern)) +} + fn pattern_text(args: &GrammarArgs, pattern: &PatternArgs, content: &str) -> String { args.span_manager .get_byterange(&pattern.location()) @@ -577,13 +596,13 @@ List(Vec) : $sep(E, comma, +) { E }; let HoverContents::Markup(markup) = hover.contents else { panic!("expected markup hover"); }; - assert!(markup.value.contains("Pattern `num`")); assert!(markup.value.contains("Final type: `Token`")); - assert!(markup.value.contains("Identifiers:")); assert!(markup.value.contains("**Terminal `num`**")); assert!(markup .value .contains("```rustylr\n%token num Token::Num(_);\n```")); + assert!(!markup.value.contains("Pattern `num`")); + assert!(!markup.value.contains("Identifiers:")); assert!(!markup.value.contains("Identifier pattern")); } From 5e3205a11d94249cf0aff0ed90dfbb038bed93b6 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 07:57:37 +0900 Subject: [PATCH 08/20] %allow suggestion --- rusty_lr_lsp/README.md | 1 + rusty_lr_lsp/src/code_action.rs | 123 ++++++++++++++++++++++++++++++++ rusty_lr_lsp/src/diagnostics.rs | 5 +- rusty_lr_lsp/src/main.rs | 42 +++++++++-- 4 files changed, 164 insertions(+), 7 deletions(-) create mode 100644 rusty_lr_lsp/src/code_action.rs diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md index 9770324f..9a498e25 100644 --- a/rusty_lr_lsp/README.md +++ b/rusty_lr_lsp/README.md @@ -14,6 +14,7 @@ Other Rust files are intentionally not matched by default. ## Features - **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. +- **Code Actions:** Offers quick fixes for suppressible diagnostics by inserting the appropriate `%allow ...;` directive. - **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. - **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. - **Inlay Hints:** Shows `Pattern: Type` hints for top-level patterns in non-terminal definitions. diff --git a/rusty_lr_lsp/src/code_action.rs b/rusty_lr_lsp/src/code_action.rs new file mode 100644 index 00000000..ff1d74c2 --- /dev/null +++ b/rusty_lr_lsp/src/code_action.rs @@ -0,0 +1,123 @@ +use lsp_types::{ + CodeAction, CodeActionKind, CodeActionOrCommand, Diagnostic, Position, Range, TextEdit, Url, + WorkspaceEdit, +}; +use std::collections::{HashMap, HashSet}; + +pub fn code_actions( + content: &str, + uri: Url, + diagnostics: Vec, +) -> Vec { + let insert_position = allow_insert_position(content); + let mut seen = HashSet::new(); + let mut actions = Vec::new(); + + for diagnostic in diagnostics { + let Some(allow) = allow_suggestion(&diagnostic) else { + continue; + }; + if !seen.insert(allow.clone()) { + continue; + } + + let mut changes = HashMap::new(); + changes.insert( + uri.clone(), + vec![TextEdit { + range: Range::new(insert_position, insert_position), + new_text: format!("{allow}\n"), + }], + ); + + actions.push(CodeActionOrCommand::CodeAction(CodeAction { + title: format!("Insert `{allow}`"), + kind: Some(CodeActionKind::QUICKFIX), + diagnostics: Some(vec![diagnostic]), + edit: Some(WorkspaceEdit { + changes: Some(changes), + document_changes: None, + change_annotations: None, + }), + command: None, + is_preferred: Some(true), + disabled: None, + data: None, + })); + } + + actions +} + +fn allow_suggestion(diagnostic: &Diagnostic) -> Option { + diagnostic + .data + .as_ref()? + .get("rustylr_allow")? + .as_str() + .filter(|suggestion| suggestion.starts_with("%allow ")) + .map(str::to_string) +} + +fn allow_insert_position(content: &str) -> Position { + let line = content + .lines() + .position(|line| line.trim() == "%%") + .map_or(0, |line| line + 1); + Position::new(line as u32, 0) +} + +#[cfg(test)] +mod tests { + use super::*; + use lsp_types::DiagnosticSeverity; + use serde_json::json; + + #[test] + fn creates_allow_quick_fix_from_diagnostic_data() { + let uri = Url::parse("file:///test.rustylr").unwrap(); + let diagnostic = Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: "unused".to_string(), + related_information: None, + tags: None, + data: Some(json!({ "rustylr_allow": "%allow unused_terminals(num);" })), + }; + + let actions = code_actions("mod x {}\n%%\n%start E;\n", uri, vec![diagnostic]); + assert_eq!(actions.len(), 1); + + let CodeActionOrCommand::CodeAction(action) = &actions[0] else { + panic!("expected code action"); + }; + assert_eq!(action.title, "Insert `%allow unused_terminals(num);`"); + let edit = action.edit.as_ref().unwrap(); + let changes = edit.changes.as_ref().unwrap(); + let text_edit = changes.values().next().unwrap().first().unwrap(); + assert_eq!(text_edit.range.start, Position::new(2, 0)); + assert_eq!(text_edit.new_text, "%allow unused_terminals(num);\n"); + } + + #[test] + fn deduplicates_same_allow_suggestion() { + let uri = Url::parse("file:///test.rustylr").unwrap(); + let diagnostic = Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: "unused".to_string(), + related_information: None, + tags: None, + data: Some(json!({ "rustylr_allow": "%allow nonterm_unreachable(E);" })), + }; + + let actions = code_actions("%%\n%start E;\n", uri, vec![diagnostic.clone(), diagnostic]); + assert_eq!(actions.len(), 1); + } +} diff --git a/rusty_lr_lsp/src/diagnostics.rs b/rusty_lr_lsp/src/diagnostics.rs index 4169ac18..9a52bd17 100644 --- a/rusty_lr_lsp/src/diagnostics.rs +++ b/rusty_lr_lsp/src/diagnostics.rs @@ -1,6 +1,7 @@ use lsp_types::{Diagnostic, DiagnosticSeverity, Range}; use proc_macro2::{Spacing, TokenStream, TokenTree}; use rusty_lr_parser::grammar::Grammar; +use serde_json::json; use std::str::FromStr; use crate::position::range_to_lsp_range; @@ -260,7 +261,7 @@ pub fn compile_and_get_diagnostics(content: &str) -> Vec { message: msg, related_information: None, tags: None, - data: None, + data: Some(json!({ "rustylr_allow": warning.suggestion(&grammar) })), }); } else { for loc in locs { @@ -274,7 +275,7 @@ pub fn compile_and_get_diagnostics(content: &str) -> Vec { message: msg.clone(), related_information: None, tags: None, - data: None, + data: Some(json!({ "rustylr_allow": warning.suggestion(&grammar) })), }); } } diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index 31f6a4ff..66e45f7c 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -3,11 +3,11 @@ use lsp_types::{ notification::{ DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, }, - request::{Completion, GotoDefinition, HoverRequest, InlayHintRequest}, - CompletionOptions, Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Hover, - HoverProviderCapability, InlayHint, InlayHintOptions, InlayHintServerCapabilities, Location, - OneOf, PublishDiagnosticsParams, Range, ServerCapabilities, TextDocumentSyncCapability, - TextDocumentSyncKind, Url, + request::{CodeActionRequest, Completion, GotoDefinition, HoverRequest, InlayHintRequest}, + CodeActionKind, CodeActionOptions, CompletionOptions, Diagnostic, DiagnosticSeverity, + GotoDefinitionResponse, Hover, HoverProviderCapability, InlayHint, InlayHintOptions, + InlayHintServerCapabilities, Location, OneOf, PublishDiagnosticsParams, Range, + ServerCapabilities, TextDocumentSyncCapability, TextDocumentSyncKind, Url, }; use std::collections::HashMap; use std::error::Error; @@ -17,6 +17,7 @@ use std::panic::{catch_unwind, set_hook, take_hook, AssertUnwindSafe}; use lsp_types::notification::Notification as LspNotification; use lsp_types::request::Request as LspRequest; +mod code_action; mod completion; mod diagnostics; mod goto_definition; @@ -34,6 +35,13 @@ fn main() -> Result<(), Box> { let server_capabilities = serde_json::to_value(&ServerCapabilities { text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), definition_provider: Some(OneOf::Left(true)), + code_action_provider: Some(lsp_types::CodeActionProviderCapability::Options( + CodeActionOptions { + code_action_kinds: Some(vec![CodeActionKind::QUICKFIX]), + resolve_provider: Some(false), + ..Default::default() + }, + )), hover_provider: Some(HoverProviderCapability::Simple(true)), inlay_hint_provider: Some(OneOf::Right(InlayHintServerCapabilities::Options( InlayHintOptions { @@ -164,6 +172,30 @@ fn main() -> Result<(), Box> { Response::new_ok(id, Option::>::None) }; connection.sender.send(Message::Response(response))?; + } else if req.method == CodeActionRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting code action request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| { + code_action::code_actions(content, uri, params.context.diagnostics) + }) { + Ok(actions) => Response::new_ok(id, Some(actions)), + Err(message) => { + eprintln!("RustyLR code action panicked: {message}"); + Response::new_ok(id, Option::::None) + } + } + } else { + Response::new_ok(id, Option::::None) + }; + connection.sender.send(Message::Response(response))?; } } Message::Response(_resp) => {} From f5419b01c5a01fb40896bbe4fb8fd37dd43c04c6 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 08:20:07 +0900 Subject: [PATCH 09/20] formatting --- rusty_lr_lsp/README.md | 1 + rusty_lr_lsp/src/formatter.rs | 499 ++++++++++++++++++++++++++++++++++ rusty_lr_lsp/src/main.rs | 28 +- 3 files changed, 527 insertions(+), 1 deletion(-) create mode 100644 rusty_lr_lsp/src/formatter.rs diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md index 9a498e25..e22c26ee 100644 --- a/rusty_lr_lsp/README.md +++ b/rusty_lr_lsp/README.md @@ -15,6 +15,7 @@ Other Rust files are intentionally not matched by default. - **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. - **Code Actions:** Offers quick fixes for suppressible diagnostics by inserting the appropriate `%allow ...;` directive. +- **Formatting:** Normalizes `%token` declarations, one-space pattern separators, and indentation for production rules and reduce-action bodies. - **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. - **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. - **Inlay Hints:** Shows `Pattern: Type` hints for top-level patterns in non-terminal definitions. diff --git a/rusty_lr_lsp/src/formatter.rs b/rusty_lr_lsp/src/formatter.rs new file mode 100644 index 00000000..fa044d8c --- /dev/null +++ b/rusty_lr_lsp/src/formatter.rs @@ -0,0 +1,499 @@ +use lsp_types::TextEdit; +use proc_macro2::{TokenStream, TokenTree}; +use rusty_lr_parser::{GrammarArgs, PatternArgs}; + +use crate::completion; +use crate::position::range_to_lsp_range; + +const RULE_INDENT: &str = " "; +const ACTION_INNER_INDENT: &str = " "; + +pub fn formatting(content: &str) -> Vec { + let Ok(args) = completion::parse_args(content) else { + return Vec::new(); + }; + + let mut edits = Vec::new(); + edits.extend(token_edits(&args, content)); + edits.extend(rule_edits(&args, content)); + edits +} + +fn token_edits(args: &GrammarArgs, content: &str) -> Vec { + args.terminals + .iter() + .filter_map(|(name, body)| { + let name_range = args.span_manager.get_byterange(&name.location())?; + let line_start = line_start(content, name_range.start); + let line_end = line_end(content, name_range.end); + let semicolon = content[name_range.end.min(content.len())..line_end] + .find(';') + .map(|idx| name_range.end + idx)?; + + let body_text = token_stream_text(content, body).unwrap_or_default(); + let trailing = content[semicolon + 1..line_end].trim_end(); + let formatted = if body_text.is_empty() { + format!("%token {}", name.value()) + } else { + format!("%token {} {}", name.value(), body_text) + }; + let mut new_text = format!("{formatted};"); + if !trailing.is_empty() { + new_text.push_str(trailing); + } + + Some(TextEdit { + range: range_to_lsp_range(content, line_start..line_end), + new_text, + }) + }) + .collect() +} + +fn rule_edits(args: &GrammarArgs, content: &str) -> Vec { + args.rules + .iter() + .filter_map(|rule| { + let rule_start = args + .span_manager + .get_byterange(&rule.name.location())? + .start; + let start = line_start(content, rule_start); + let first_separator = rule.rule_lines.first().and_then(|line| { + args.span_manager + .get_byterange(&line.separator_location) + .map(|range| range.start) + })?; + let header = content[start..first_separator.min(content.len())].trim(); + + let mut formatted = String::new(); + formatted.push_str(header); + for (line_idx, line) in rule.rule_lines.iter().enumerate() { + let tokens = line_tokens_text(args, content, line); + let modifiers = line_modifiers_text(args, content, rule, line_idx); + let action = line + .reduce_action + .as_ref() + .and_then(|action| token_stream_text(content, action)); + + formatted.push('\n'); + formatted.push_str(RULE_INDENT); + formatted.push(if line_idx == 0 { ':' } else { '|' }); + if !tokens.is_empty() { + formatted.push(' '); + formatted.push_str(&tokens); + } + if !modifiers.is_empty() { + formatted.push(' '); + formatted.push_str(&modifiers); + } + if let Some(action) = action { + formatted.push(' '); + formatted.push_str(&format_reduce_action(&action)); + } + } + formatted.push('\n'); + formatted.push_str(RULE_INDENT); + formatted.push(';'); + + let end = rule_block_end(args, content, rule)?; + Some(TextEdit { + range: range_to_lsp_range(content, start..end), + new_text: formatted, + }) + }) + .collect() +} + +fn format_reduce_action(action: &str) -> String { + let trimmed = action.trim(); + if !trimmed.contains('\n') { + return trimmed.to_string(); + } + + let lines = trimmed.lines().collect::>(); + let last_non_empty = lines.iter().rposition(|line| !line.trim().is_empty()); + let body_indent = lines + .iter() + .enumerate() + .filter(|(idx, line)| *idx != 0 && Some(*idx) != last_non_empty && !line.trim().is_empty()) + .map(|(_, line)| leading_indent_len(line)) + .min() + .unwrap_or(0); + + lines + .iter() + .enumerate() + .map(|(idx, line)| { + if idx == 0 { + line.trim_end().to_string() + } else if line.trim().is_empty() { + String::new() + } else if Some(idx) == last_non_empty && line.trim_start().starts_with('}') { + format!("{RULE_INDENT}{}", line.trim_start()) + } else { + format!("{ACTION_INNER_INDENT}{}", strip_indent(line, body_indent)) + } + }) + .collect::>() + .join("\n") +} + +fn leading_indent_len(line: &str) -> usize { + line.char_indices() + .find_map(|(idx, ch)| (!matches!(ch, ' ' | '\t')).then_some(idx)) + .unwrap_or(line.len()) +} + +fn strip_indent(line: &str, indent: usize) -> &str { + if leading_indent_len(line) >= indent { + &line[indent..] + } else { + line.trim_start() + } +} + +fn line_tokens_text( + args: &GrammarArgs, + content: &str, + line: &rusty_lr_parser::RuleLineArgs, +) -> String { + line.tokens + .iter() + .map(|(mapped_name, pattern)| { + let start = mapped_name + .as_ref() + .and_then(|name| { + args.span_manager + .get_byterange(&name.location()) + .map(|range| range.start) + }) + .unwrap_or_else(|| pattern_start(args, pattern)); + let end = pattern_end(args, pattern); + content[start.min(content.len())..end.min(content.len())] + .trim() + .to_string() + }) + .collect::>() + .join(" ") +} + +fn line_modifiers_text( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, + line_idx: usize, +) -> String { + let line = &rule.rule_lines[line_idx]; + let start = line + .tokens + .iter() + .map(|(_, pattern)| pattern_end(args, pattern)) + .max() + .unwrap_or_else(|| { + args.span_manager + .get_byterange(&line.separator_location) + .map_or(0, |range| range.end) + }); + let end = line + .reduce_action + .as_ref() + .and_then(|action| token_stream_range(action).map(|range| range.start)) + .unwrap_or_else(|| rule_line_end(args, content, rule, line_idx)); + + content[start.min(content.len())..end.min(content.len())] + .trim() + .to_string() +} + +fn rule_block_end( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, +) -> Option { + let last_line_idx = rule.rule_lines.len().checked_sub(1)?; + let end_hint = rule_line_end(args, content, rule, last_line_idx); + let semicolon = content[end_hint.min(content.len())..].find(';')?; + Some(line_end(content, end_hint + semicolon + 1)) +} + +fn rule_line_end( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, + line_idx: usize, +) -> usize { + if let Some(next_line) = rule.rule_lines.get(line_idx + 1) { + return args + .span_manager + .get_byterange(&next_line.separator_location) + .map_or(content.len(), |range| range.start); + } + + let mut end = args + .span_manager + .get_byterange(&rule.name.location()) + .map_or(0, |range| range.end); + for (_, pattern) in &rule.rule_lines[line_idx].tokens { + end = end.max(pattern_end(args, pattern)); + } + if let Some(action) = &rule.rule_lines[line_idx].reduce_action { + if let Some(range) = token_stream_range(action) { + end = end.max(range.end); + } + } + + content[end.min(content.len())..] + .find(';') + .map_or(content.len(), |semi| end + semi) +} + +fn pattern_end(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.end), + PatternArgs::Plus { base, op_location } + | PatternArgs::Star { base, op_location } + | PatternArgs::Question { base, op_location } + | PatternArgs::Exclamation { base, op_location } => pattern_end(args, base).max( + args.span_manager + .get_byterange(op_location) + .map_or(0, |range| range.end), + ), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.end), + PatternArgs::Group { + alternatives, + close_location, + .. + } => alternatives + .iter() + .flatten() + .map(|pattern| pattern_end(args, pattern)) + .max() + .unwrap_or(0) + .max( + args.span_manager + .get_byterange(close_location) + .map_or(0, |range| range.end), + ), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Minus { base, exclude } => { + pattern_end(args, base).max(pattern_end(args, exclude)) + } + PatternArgs::Sep { + base, + delimiter, + location, + .. + } => pattern_end(args, base) + .max(pattern_end(args, delimiter)) + .max( + args.span_manager + .get_byterange(location) + .map_or(0, |range| range.end), + ), + } +} + +fn pattern_start(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.start), + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => pattern_start(args, base), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.start), + PatternArgs::Group { open_location, .. } => args + .span_manager + .get_byterange(open_location) + .map_or(0, |range| range.start), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Minus { base, .. } => pattern_start(args, base), + PatternArgs::Sep { location, .. } => args + .span_manager + .get_byterange(location) + .map_or(0, |range| range.start), + } +} + +fn token_stream_text(content: &str, stream: &TokenStream) -> Option { + let range = token_stream_range(stream)?; + content + .get(range.start.min(content.len())..range.end.min(content.len())) + .map(str::trim) + .filter(|text| !text.is_empty()) + .map(str::to_string) +} + +fn token_stream_range(stream: &TokenStream) -> Option> { + let mut start = usize::MAX; + let mut end = 0; + for token in stream.clone() { + let range = token_tree_range(token); + start = start.min(range.start); + end = end.max(range.end); + } + if start == usize::MAX { + None + } else { + Some(start..end) + } +} + +fn token_tree_range(token: TokenTree) -> std::ops::Range { + match token { + TokenTree::Group(group) => { + let open = group.span_open().byte_range(); + let close = group.span_close().byte_range(); + let inner = token_stream_range(&group.stream()); + let start = inner + .as_ref() + .map_or(open.start, |range| range.start) + .min(open.start); + let end = inner + .as_ref() + .map_or(close.end, |range| range.end) + .max(close.end); + start..end + } + TokenTree::Ident(ident) => ident.span().byte_range(), + TokenTree::Punct(punct) => punct.span().byte_range(), + TokenTree::Literal(lit) => lit.span().byte_range(), + } +} + +fn line_start(content: &str, offset: usize) -> usize { + content[..offset.min(content.len())] + .rfind('\n') + .map_or(0, |idx| idx + 1) +} + +fn line_end(content: &str, offset: usize) -> usize { + content[offset.min(content.len())..] + .find('\n') + .map_or(content.len(), |idx| offset + idx) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::position::position_to_offset; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype Token; +%start E; +%token n Token::Num(_); +%token plus Token::Plus; + +E(i32):left=E plus n { left } +| n { + n +} +; +"#; + + #[test] + fn formats_tokens_and_productions() { + let edits = formatting(MOCK_GRAMMAR); + let formatted = apply_edits(MOCK_GRAMMAR, edits); + + assert!(formatted.contains("%token n Token::Num(_);")); + assert!(formatted.contains("%token plus Token::Plus;")); + assert!(formatted + .contains("E(i32)\n : left=E plus n { left }\n | n {\n n\n }\n ;")); + } + + #[test] + fn indents_reduce_action_body_one_level_deeper() { + let edits = formatting(MOCK_GRAMMAR); + let formatted = apply_edits(MOCK_GRAMMAR, edits); + assert!(formatted.contains("{\n n\n }")); + } + + #[test] + fn reindents_reduce_action_as_a_whole_block() { + let action = r#"{ + if n > 0 { + n + } else { + 0 + } + }"#; + + assert_eq!( + format_reduce_action(action), + "{\n if n > 0 {\n n\n } else {\n 0\n }\n }" + ); + } + + fn apply_edits(content: &str, edits: Vec) -> String { + let mut edits = edits + .into_iter() + .map(|edit| { + let start = position_to_offset(content, edit.range.start); + let end = position_to_offset(content, edit.range.end); + (start, end, edit.new_text) + }) + .collect::>(); + edits.sort_by_key(|(start, _, _)| *start); + + let mut result = String::new(); + let mut cursor = 0; + for (start, end, new_text) in edits { + result.push_str(&content[cursor..start]); + result.push_str(&new_text); + cursor = end; + } + result.push_str(&content[cursor..]); + result + } +} diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index 66e45f7c..69cae3a6 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -3,7 +3,9 @@ use lsp_types::{ notification::{ DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, }, - request::{CodeActionRequest, Completion, GotoDefinition, HoverRequest, InlayHintRequest}, + request::{ + CodeActionRequest, Completion, Formatting, GotoDefinition, HoverRequest, InlayHintRequest, + }, CodeActionKind, CodeActionOptions, CompletionOptions, Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Hover, HoverProviderCapability, InlayHint, InlayHintOptions, InlayHintServerCapabilities, Location, OneOf, PublishDiagnosticsParams, Range, @@ -20,6 +22,7 @@ use lsp_types::request::Request as LspRequest; mod code_action; mod completion; mod diagnostics; +mod formatter; mod goto_definition; mod hover; mod inlay_hint; @@ -35,6 +38,7 @@ fn main() -> Result<(), Box> { let server_capabilities = serde_json::to_value(&ServerCapabilities { text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), definition_provider: Some(OneOf::Left(true)), + document_formatting_provider: Some(OneOf::Left(true)), code_action_provider: Some(lsp_types::CodeActionProviderCapability::Options( CodeActionOptions { code_action_kinds: Some(vec![CodeActionKind::QUICKFIX]), @@ -196,6 +200,28 @@ fn main() -> Result<(), Box> { Response::new_ok(id, Option::::None) }; connection.sender.send(Message::Response(response))?; + } else if req.method == Formatting::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting formatting request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| formatter::formatting(content)) { + Ok(edits) => Response::new_ok(id, Some(edits)), + Err(message) => { + eprintln!("RustyLR formatting panicked: {message}"); + Response::new_ok(id, Option::>::None) + } + } + } else { + Response::new_ok(id, Option::>::None) + }; + connection.sender.send(Message::Response(response))?; } } Message::Response(_resp) => {} From e798a20fe158931e4063777d165b402211adc59e Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 08:32:23 +0900 Subject: [PATCH 10/20] formatting comments --- rusty_lr_lsp/README.md | 2 +- rusty_lr_lsp/src/formatter.rs | 377 +++++++++++++++++++++++++++++++--- 2 files changed, 348 insertions(+), 31 deletions(-) diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md index e22c26ee..14ef2aa5 100644 --- a/rusty_lr_lsp/README.md +++ b/rusty_lr_lsp/README.md @@ -15,7 +15,7 @@ Other Rust files are intentionally not matched by default. - **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. - **Code Actions:** Offers quick fixes for suppressible diagnostics by inserting the appropriate `%allow ...;` directive. -- **Formatting:** Normalizes `%token` declarations, one-space pattern separators, and indentation for production rules and reduce-action bodies. +- **Formatting:** Normalizes directive declarations into one-space, single-line forms, one-space pattern separators, and indentation for production rules and reduce-action bodies. - **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. - **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. - **Inlay Hints:** Shows `Pattern: Type` hints for top-level patterns in non-terminal definitions. diff --git a/rusty_lr_lsp/src/formatter.rs b/rusty_lr_lsp/src/formatter.rs index fa044d8c..f8556440 100644 --- a/rusty_lr_lsp/src/formatter.rs +++ b/rusty_lr_lsp/src/formatter.rs @@ -1,6 +1,7 @@ use lsp_types::TextEdit; use proc_macro2::{TokenStream, TokenTree}; use rusty_lr_parser::{GrammarArgs, PatternArgs}; +use std::ops::Range; use crate::completion; use crate::position::range_to_lsp_range; @@ -14,43 +15,178 @@ pub fn formatting(content: &str) -> Vec { }; let mut edits = Vec::new(); - edits.extend(token_edits(&args, content)); + edits.extend(directive_edits(content)); edits.extend(rule_edits(&args, content)); edits } -fn token_edits(args: &GrammarArgs, content: &str) -> Vec { - args.terminals - .iter() - .filter_map(|(name, body)| { - let name_range = args.span_manager.get_byterange(&name.location())?; - let line_start = line_start(content, name_range.start); - let line_end = line_end(content, name_range.end); - let semicolon = content[name_range.end.min(content.len())..line_end] - .find(';') - .map(|idx| name_range.end + idx)?; - - let body_text = token_stream_text(content, body).unwrap_or_default(); - let trailing = content[semicolon + 1..line_end].trim_end(); - let formatted = if body_text.is_empty() { - format!("%token {}", name.value()) - } else { - format!("%token {} {}", name.value(), body_text) - }; - let mut new_text = format!("{formatted};"); - if !trailing.is_empty() { - new_text.push_str(trailing); +fn directive_edits(content: &str) -> Vec { + let Some(grammar_start) = content.find("%%").map(|idx| idx + 2) else { + return Vec::new(); + }; + + let comments = comment_ranges(content); + let mut edits = Vec::new(); + let mut offset = line_start(content, grammar_start); + while offset < content.len() { + let current_line_end = line_end(content, offset); + if offset >= grammar_start { + let line_prefix = &content[offset..current_line_end]; + let leading = line_prefix.len() - line_prefix.trim_start().len(); + let directive_start = offset + leading; + if content[directive_start..current_line_end].starts_with('%') { + if let Some((range_end, new_text)) = + format_directive_block(content, directive_start, &comments) + { + edits.push(TextEdit { + range: range_to_lsp_range(content, offset..range_end), + new_text, + }); + offset = content[range_end..] + .find('\n') + .map_or(content.len(), |idx| range_end + idx + 1); + continue; + } } + } + offset = content[current_line_end..] + .find('\n') + .map_or(content.len(), |idx| current_line_end + idx + 1); + } + edits +} - Some(TextEdit { - range: range_to_lsp_range(content, line_start..line_end), - new_text, - }) - }) - .collect() +fn format_directive_block( + content: &str, + start: usize, + comments: &[Range], +) -> Option<(usize, String)> { + let semicolon = find_directive_semicolon(content, start)?; + if range_has_comment(comments, start..semicolon) { + return None; + } + + let range_end = line_end(content, semicolon + 1); + let directive = &content[start..semicolon]; + let trailing = content[semicolon + 1..range_end].trim_end(); + let mut formatted = normalize_directive_spacing(directive); + formatted.push(';'); + if !trailing.is_empty() { + formatted.push_str(trailing); + } + + (formatted != content[line_start(content, start)..range_end]).then_some((range_end, formatted)) +} + +fn normalize_directive_spacing(directive: &str) -> String { + let mut result = String::new(); + let mut pending_space = false; + let trimmed = directive.trim(); + let mut chars = trimmed.char_indices(); + let mut quote = None; + let mut escaped = false; + + while let Some((idx, ch)) = chars.next() { + if let Some(quote_ch) = quote { + result.push(ch); + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote_ch { + quote = None; + } + continue; + } + + if ch == '"' || (ch == '\'' && is_single_quote_literal_start(trimmed, idx)) { + if pending_space && !result.is_empty() { + result.push(' '); + pending_space = false; + } + result.push(ch); + quote = Some(ch); + } else if ch.is_whitespace() { + pending_space = true; + } else { + if pending_space && !result.is_empty() { + result.push(' '); + pending_space = false; + } + result.push(ch); + } + } + + result +} + +fn find_directive_semicolon(content: &str, start: usize) -> Option { + let mut quote = None; + let mut escaped = false; + let mut paren_depth = 0usize; + let mut bracket_depth = 0usize; + let mut brace_depth = 0usize; + + let remaining = &content[start..]; + for (relative_idx, ch) in remaining.char_indices() { + if let Some(quote_ch) = quote { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote_ch { + quote = None; + } + continue; + } + + match ch { + '"' => quote = Some(ch), + '\'' if is_single_quote_literal_start(remaining, relative_idx) => quote = Some(ch), + '(' => paren_depth += 1, + ')' => paren_depth = paren_depth.saturating_sub(1), + '[' => bracket_depth += 1, + ']' => bracket_depth = bracket_depth.saturating_sub(1), + '{' => brace_depth += 1, + '}' => brace_depth = brace_depth.saturating_sub(1), + ';' if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => { + return Some(start + relative_idx); + } + _ => {} + } + } + None +} + +fn is_single_quote_literal_start(text: &str, quote_idx: usize) -> bool { + let mut escaped = false; + for (relative_idx, ch) in text[quote_idx + 1..].char_indices() { + if ch == '\n' || ch == '\r' { + return false; + } + + if escaped { + escaped = false; + continue; + } + if ch == '\\' { + escaped = true; + continue; + } + if ch == '\'' { + let close_end = quote_idx + 1 + relative_idx + ch.len_utf8(); + return match text[close_end..].chars().next() { + Some(next) => !matches!(next, '_' | 'a'..='z' | 'A'..='Z' | '0'..='9'), + None => true, + }; + } + } + + false } fn rule_edits(args: &GrammarArgs, content: &str) -> Vec { + let comments = comment_ranges(content); args.rules .iter() .filter_map(|rule| { @@ -97,6 +233,15 @@ fn rule_edits(args: &GrammarArgs, content: &str) -> Vec { formatted.push(';'); let end = rule_block_end(args, content, rule)?; + let action_ranges = rule + .rule_lines + .iter() + .filter_map(|line| line.reduce_action.as_ref().and_then(token_stream_range)) + .collect::>(); + if has_comment_outside_ranges(&comments, start..end, &action_ranges) { + return None; + } + Some(TextEdit { range: range_to_lsp_range(content, start..end), new_text: formatted, @@ -153,6 +298,92 @@ fn strip_indent(line: &str, indent: usize) -> &str { } } +fn comment_ranges(content: &str) -> Vec> { + let mut ranges = Vec::new(); + let mut iter = content.char_indices().peekable(); + let mut quote = None; + let mut escaped = false; + + while let Some((idx, ch)) = iter.next() { + if let Some(quote_ch) = quote { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote_ch { + quote = None; + } + continue; + } + + match ch { + '"' => quote = Some(ch), + '\'' if is_single_quote_literal_start(content, idx) => quote = Some(ch), + '/' => match iter.peek().copied() { + Some((next_idx, '/')) => { + iter.next(); + let end = content[next_idx + 1..] + .find('\n') + .map_or(content.len(), |line_end| next_idx + 1 + line_end); + ranges.push(idx..end); + while let Some((comment_idx, _)) = iter.peek().copied() { + if comment_idx >= end { + break; + } + iter.next(); + } + } + Some((_, '*')) => { + iter.next(); + let end = content[idx + 2..] + .find("*/") + .map_or(content.len(), |comment_end| idx + 2 + comment_end + 2); + ranges.push(idx..end); + while let Some((comment_idx, _)) = iter.peek().copied() { + if comment_idx >= end { + break; + } + iter.next(); + } + } + _ => {} + }, + _ => {} + } + } + + ranges +} + +fn range_has_comment(comments: &[Range], range: Range) -> bool { + comments + .iter() + .any(|comment| ranges_overlap(comment, &range)) +} + +fn has_comment_outside_ranges( + comments: &[Range], + outer: Range, + allowed: &[Range], +) -> bool { + comments + .iter() + .filter(|comment| ranges_overlap(comment, &outer)) + .any(|comment| { + !allowed + .iter() + .any(|allowed_range| range_contains(allowed_range, comment)) + }) +} + +fn ranges_overlap(left: &Range, right: &Range) -> bool { + left.start < right.end && right.start < left.end +} + +fn range_contains(outer: &Range, inner: &Range) -> bool { + outer.start <= inner.start && inner.end <= outer.end +} + fn line_tokens_text( args: &GrammarArgs, content: &str, @@ -429,8 +660,17 @@ pub enum Token { %% -%tokentype Token; -%start E; +%tokentype + Token; +%start E; +%userdata + ParserState; +%allow + unused_terminals([ + 'a'-'z' + '+' + ]); +%left plus "spaced literal"; %token n Token::Num(_); %token plus Token::Plus; @@ -448,6 +688,11 @@ E(i32):left=E plus n { left } assert!(formatted.contains("%token n Token::Num(_);")); assert!(formatted.contains("%token plus Token::Plus;")); + assert!(formatted.contains("%tokentype Token;")); + assert!(formatted.contains("%start E;")); + assert!(formatted.contains("%userdata ParserState;")); + assert!(formatted.contains("%allow unused_terminals([ 'a'-'z' '+' ]);")); + assert!(formatted.contains("%left plus \"spaced literal\";")); assert!(formatted .contains("E(i32)\n : left=E plus n { left }\n | n {\n n\n }\n ;")); } @@ -475,6 +720,78 @@ E(i32):left=E plus n { left } ); } + #[test] + fn formats_multiline_directive_as_one_line() { + let content = "%%\n%tokentype\n [u8; 32];\n%userdata\n &'a str;\n%start\n E;\n"; + let formatted = apply_edits(content, directive_edits(content)); + + assert!(formatted.contains("%tokentype [u8; 32];")); + assert!(formatted.contains("%userdata &'a str;")); + assert!(formatted.contains("%start E;")); + } + + #[test] + fn skips_rule_formatting_when_grammar_comment_would_be_lost() { + let content = r#" +#[derive(Debug, Clone)] +pub enum Token { A } + +%% + +%tokentype Token; +%token a Token::A; + +Rule(i32): a { 1 } +// | a { 2 } +; +"#; + let formatted = apply_edits(content, formatting(content)); + + assert!(formatted.contains("Rule(i32): a { 1 }\n// | a { 2 }\n;")); + } + + #[test] + fn formats_reduce_action_comments_inside_action_range() { + let content = r#" +#[derive(Debug, Clone)] +pub enum Token { A } + +%% + +%tokentype Token; +%token a Token::A; + +Rule(i32): a { + // keep this comment + 1 +} +; +"#; + let formatted = apply_edits(content, formatting(content)); + + assert!(formatted.contains( + "Rule(i32)\n : a {\n // keep this comment\n 1\n }\n ;" + )); + } + + #[test] + fn skips_multiline_directive_with_inline_comment() { + let content = "%%\n%tokentype\n // token type comment\n Token;\n"; + let formatted = apply_edits(content, directive_edits(content)); + + assert!(formatted.contains("%tokentype\n // token type comment\n Token;")); + } + + #[test] + fn preserves_comments_in_parser_grammar_fixture() { + let content = include_str!("../../rusty_lr_parser/src/parser/parser.rustylr"); + let formatted = apply_edits(content, formatting(content)); + + assert!(formatted.contains("// | Pattern error {")); + assert!(formatted.contains("// Pattern")); + assert!(crate::completion::parse_args(&formatted).is_ok()); + } + fn apply_edits(content: &str, edits: Vec) -> String { let mut edits = edits .into_iter() From 961f04bce3fa81e0a1c067371d58aa79f8aa248d Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 08:59:03 +0900 Subject: [PATCH 11/20] semantic tokens and VSCode extension settings --- editors/vscode-rustylr/package.json | 8 +- rusty_lr_lsp/src/main.rs | 95 ++++++ rusty_lr_lsp/src/semantic_tokens.rs | 468 ++++++++++++++++++++++++++++ 3 files changed, 570 insertions(+), 1 deletion(-) create mode 100644 rusty_lr_lsp/src/semantic_tokens.rs diff --git a/editors/vscode-rustylr/package.json b/editors/vscode-rustylr/package.json index b4ef54df..b7daa292 100644 --- a/editors/vscode-rustylr/package.json +++ b/editors/vscode-rustylr/package.json @@ -56,6 +56,11 @@ "**/rustylr.rs" ], "description": "Additional file globs handled by the RustyLR LSP server." + }, + "rustylr.semanticTokens.enabled": { + "type": "boolean", + "default": true, + "description": "Enable or disable semantic token highlighting for RustyLR grammar files." } } }, @@ -89,7 +94,8 @@ "comments": false, "strings": false }, - "editor.suggestOnTriggerCharacters": true + "editor.suggestOnTriggerCharacters": true, + "editor.semanticHighlighting.enabled": true } } }, diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index 69cae3a6..c5f7fc01 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -5,6 +5,7 @@ use lsp_types::{ }, request::{ CodeActionRequest, Completion, Formatting, GotoDefinition, HoverRequest, InlayHintRequest, + SemanticTokensFullRequest, }, CodeActionKind, CodeActionOptions, CompletionOptions, Diagnostic, DiagnosticSeverity, GotoDefinitionResponse, Hover, HoverProviderCapability, InlayHint, InlayHintOptions, @@ -27,6 +28,7 @@ mod goto_definition; mod hover; mod inlay_hint; mod position; +mod semantic_tokens; fn main() -> Result<(), Box> { eprintln!("Starting RustyLR LSP server..."); @@ -57,6 +59,28 @@ fn main() -> Result<(), Box> { trigger_characters: Some(completion_trigger_characters()), ..Default::default() }), + semantic_tokens_provider: Some( + lsp_types::SemanticTokensServerCapabilities::SemanticTokensOptions( + lsp_types::SemanticTokensOptions { + work_done_progress_options: lsp_types::WorkDoneProgressOptions { + work_done_progress: Some(false), + }, + legend: lsp_types::SemanticTokensLegend { + token_types: vec![ + lsp_types::SemanticTokenType::ENUM_MEMBER, // terminal + lsp_types::SemanticTokenType::TYPE, // non-terminal + lsp_types::SemanticTokenType::KEYWORD, // directive + lsp_types::SemanticTokenType::PARAMETER, // binding + lsp_types::SemanticTokenType::VARIABLE, // $var + lsp_types::SemanticTokenType::PROPERTY, // @loc + ], + token_modifiers: vec![], + }, + range: Some(false), + full: Some(lsp_types::SemanticTokensFullOptions::Bool(true)), + }, + ), + ), ..Default::default() })?; @@ -66,6 +90,7 @@ fn main() -> Result<(), Box> { // Store open document contents let mut documents: HashMap = HashMap::new(); + let mut semantic_tokens_enabled = true; // Main event loop for msg in &connection.receiver { @@ -222,6 +247,42 @@ fn main() -> Result<(), Box> { Response::new_ok(id, Option::>::None) }; connection.sender.send(Message::Response(response))?; + } else if req.method == SemanticTokensFullRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting semantic tokens request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let response = if semantic_tokens_enabled { + if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| semantic_tokens::semantic_tokens(content)) { + Ok(Some(tokens)) => Response::new_ok( + id, + Some(lsp_types::SemanticTokensResult::Tokens(tokens)), + ), + Ok(None) => Response::new_ok( + id, + Option::::None, + ), + Err(message) => { + eprintln!("RustyLR semantic tokens panicked: {message}"); + Response::new_ok( + id, + Option::::None, + ) + } + } + } else { + Response::new_ok(id, Option::::None) + } + } else { + Response::new_ok(id, Option::::None) + }; + connection.sender.send(Message::Response(response))?; } } Message::Response(_resp) => {} @@ -265,6 +326,40 @@ fn main() -> Result<(), Box> { if let Some(text) = documents.get(&uri) { publish_diagnostics(&connection, uri, text); } + } else if not.method == lsp_types::notification::DidChangeConfiguration::METHOD { + let params = match cast_notification::< + lsp_types::notification::DidChangeConfiguration, + >(not) + { + Ok(res) => res, + Err(e) => { + eprintln!( + "Error extracting didChangeConfiguration notification: {:?}", + e + ); + continue; + } + }; + let mut enabled = None; + if let Some(rustylr) = params.settings.get("rustylr") { + if let Some(sem_toks) = rustylr.get("semanticTokens") { + if let Some(val) = sem_toks.get("enabled").and_then(|v| v.as_bool()) { + enabled = Some(val); + } + } + } + if enabled.is_none() { + if let Some(val) = params + .settings + .get("rustylr.semanticTokens.enabled") + .and_then(|v| v.as_bool()) + { + enabled = Some(val); + } + } + if let Some(val) = enabled { + semantic_tokens_enabled = val; + } } } } diff --git a/rusty_lr_lsp/src/semantic_tokens.rs b/rusty_lr_lsp/src/semantic_tokens.rs new file mode 100644 index 00000000..3e2d6bf5 --- /dev/null +++ b/rusty_lr_lsp/src/semantic_tokens.rs @@ -0,0 +1,468 @@ +use crate::position::offset_to_position; +use lsp_types::{SemanticToken, SemanticTokens}; +use proc_macro2::{TokenStream, TokenTree}; +use std::collections::HashSet; +use std::str::FromStr; + +#[derive(Debug, Clone, PartialEq, Eq)] +struct RawSemanticToken { + line: u32, + start: u32, + length: u32, + token_type: u32, +} + +/// Main entry point for semantic tokens: takes file content and returns encoded SemanticTokens. +pub fn semantic_tokens(content: &str) -> Option { + let grammar_start = find_grammar_start_offset(content).unwrap_or(0); + let grammar_section = &content[grammar_start..]; + + let token_stream = TokenStream::from_str(grammar_section).ok()?; + let tokens: Vec = token_stream.into_iter().collect(); + + // 1. Collect terminal and non-terminal names + let (mut terminals, mut non_terminals) = collect_names(&tokens); + + // Also attempt to get names from completion module's parsed GrammarArgs if possible + if let Ok(args) = crate::completion::parse_args(content) { + for (term, _) in args.terminals { + terminals.insert(term.value().clone()); + } + for rule in args.rules { + non_terminals.insert(rule.name.value().clone()); + } + } + + terminals.insert("error".to_string()); + + // 2. Traverse tokens to build RawSemanticToken list + let mut raw_tokens = Vec::new(); + traverse_tokens( + tokens, + content, + grammar_start, + false, + &terminals, + &non_terminals, + &mut raw_tokens, + ); + + // 3. Sort tokens: by line, then by start character + raw_tokens.sort_by(|a, b| { + if a.line != b.line { + a.line.cmp(&b.line) + } else { + a.start.cmp(&b.start) + } + }); + + // 4. Delta-encode the sorted tokens + let mut data = Vec::new(); + let mut last_line = 0; + let mut last_start = 0; + + for token in raw_tokens { + let delta_line = token.line - last_line; + let delta_start = if delta_line == 0 { + token.start - last_start + } else { + token.start + }; + + data.push(SemanticToken { + delta_line, + delta_start, + length: token.length, + token_type: token.token_type, + token_modifiers_bitset: 0, + }); + + last_line = token.line; + last_start = token.start; + } + + Some(SemanticTokens { + result_id: None, + data, + }) +} + +/// Find the end of the `%%` separator, which marks the start of the grammar section. +fn find_grammar_start_offset(content: &str) -> Option { + let token_stream = TokenStream::from_str(content).ok()?; + let mut iter = token_stream.into_iter().peekable(); + while let Some(token) = iter.next() { + if let TokenTree::Punct(punct) = &token { + if punct.as_char() == '%' && punct.spacing() == proc_macro2::Spacing::Joint { + if let Some(TokenTree::Punct(next)) = iter.peek() { + if next.as_char() == '%' && next.spacing() == proc_macro2::Spacing::Alone { + return Some(next.span().byte_range().end); + } + } + } + } + } + None +} + +/// Helper to scan top-level tokens in the grammar section to extract terminal/non-terminal declarations. +fn collect_names(tokens: &[TokenTree]) -> (HashSet, HashSet) { + let mut terminals = HashSet::new(); + let mut non_terminals = HashSet::new(); + let mut iter = tokens.iter().peekable(); + + while let Some(token) = iter.next() { + match token { + TokenTree::Punct(punct) if punct.as_char() == '%' => { + if let Some(TokenTree::Ident(ident)) = iter.peek() { + if ident.to_string() == "token" { + iter.next(); // consume "token" + if let Some(TokenTree::Ident(term_name)) = iter.peek() { + terminals.insert(term_name.to_string()); + } + } + } + } + TokenTree::Ident(ident) => { + // Rule definition: Ident [type] : ... + let mut temp_iter = iter.clone(); + let mut is_rule = false; + if let Some(next) = temp_iter.peek() { + if let TokenTree::Group(group) = next { + if group.delimiter() == proc_macro2::Delimiter::Parenthesis { + temp_iter.next(); + } + } + } + if let Some(TokenTree::Punct(punct)) = temp_iter.peek() { + if punct.as_char() == ':' { + is_rule = true; + } + } + if is_rule { + non_terminals.insert(ident.to_string()); + } + } + _ => {} + } + } + + (terminals, non_terminals) +} + +/// Recursive traversal of the token stream. +fn traverse_tokens( + tokens: Vec, + full_content: &str, + section_offset: usize, + in_action: bool, + terminals: &HashSet, + non_terminals: &HashSet, + raw_tokens: &mut Vec, +) { + let mut iter = tokens.into_iter().peekable(); + + while let Some(token) = iter.next() { + match token { + TokenTree::Group(group) => { + let delimiter = group.delimiter(); + let sub_in_action = in_action || delimiter == proc_macro2::Delimiter::Brace; + let sub_tokens: Vec = group.stream().into_iter().collect(); + traverse_tokens( + sub_tokens, + full_content, + section_offset, + sub_in_action, + terminals, + non_terminals, + raw_tokens, + ); + } + TokenTree::Punct(punct) => { + let ch = punct.as_char(); + let span = punct.span(); + let range = span.byte_range(); + let absolute_start = section_offset + range.start; + + if ch == '%' { + if !in_action { + if let Some(TokenTree::Ident(next_ident)) = iter.peek() { + let next_range = next_ident.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 2, // directive + }); + iter.next(); + continue; + } + } + } else if ch == '$' { + let mut handled = false; + if let Some(next) = iter.peek() { + match next { + TokenTree::Ident(next_ident) => { + let next_range = next_ident.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + iter.next(); + handled = true; + } + TokenTree::Literal(next_lit) => { + let next_str = next_lit.to_string(); + if next_str.chars().all(|c| c.is_ascii_digit()) { + let next_range = next_lit.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = + offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + iter.next(); + handled = true; + } + } + TokenTree::Punct(next_punct) if next_punct.as_char() == '$' => { + let next_range = next_punct.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + iter.next(); + handled = true; + } + _ => {} + } + } + if !handled { + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, section_offset + range.end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + } + } else if ch == '@' { + let mut handled = false; + if let Some(next) = iter.peek() { + match next { + TokenTree::Ident(next_ident) => { + let next_range = next_ident.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + iter.next(); + handled = true; + } + TokenTree::Literal(next_lit) => { + let next_str = next_lit.to_string(); + if next_str.chars().all(|c| c.is_ascii_digit()) { + let next_range = next_lit.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = + offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + iter.next(); + handled = true; + } + } + TokenTree::Punct(next_punct) if next_punct.as_char() == '$' => { + let next_range = next_punct.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + iter.next(); + handled = true; + } + _ => {} + } + } + if !handled { + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, section_offset + range.end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + } + } + } + TokenTree::Ident(ident) => { + let name = ident.to_string(); + if in_action { + if name == "data" || name == "lookahead" || name == "shift" { + let span = ident.span(); + let range = span.byte_range(); + let absolute_start = section_offset + range.start; + let absolute_end = section_offset + range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 3, // binding / parameter + }); + } + continue; + } + + let span = ident.span(); + let range = span.byte_range(); + let absolute_start = section_offset + range.start; + let absolute_end = section_offset + range.end; + let name = ident.to_string(); + + let mut is_binding = false; + if let Some(TokenTree::Punct(next_punct)) = iter.peek() { + if next_punct.as_char() == '=' { + is_binding = true; + } + } + + let token_type = if is_binding { + Some(3) // binding + } else if terminals.contains(&name) { + Some(0) // terminal + } else if non_terminals.contains(&name) { + Some(1) // non-terminal + } else { + None + }; + + if let Some(tt) = token_type { + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: tt, + }); + } + } + _ => {} + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Comma, +} + +%% + +%tokentype Token; +%start List; + +%token num Token::Num(_); +%token plus Token::Plus; +%token comma Token::Comma; + +E(i32) : left=E plus num { $1 + $3 } + | error { *data += 1; 0 } + ; +List(Vec) : $sep(E, comma, +) { E }; +"#; + + #[test] + fn test_semantic_tokens() { + let tokens_res = semantic_tokens(MOCK_GRAMMAR).expect("Failed to parse semantic tokens"); + let data_res = tokens_res.data; + assert!(!data_res.is_empty()); + + // Decode delta-encoded tokens and map back to substrings + let mut decoded = Vec::new(); + let mut current_line = 0; + let mut current_char = 0; + for token in &data_res { + current_line += token.delta_line; + if token.delta_line == 0 { + current_char += token.delta_start; + } else { + current_char = token.delta_start; + } + + // Find substring in MOCK_GRAMMAR + let pos = lsp_types::Position::new(current_line, current_char); + let start_offset = crate::position::position_to_offset(MOCK_GRAMMAR, pos); + let end_offset = start_offset + token.length as usize; + let text = &MOCK_GRAMMAR[start_offset..end_offset]; + decoded.push((text.to_string(), token.token_type)); + } + + // Directives (type 2) + assert!(decoded.contains(&("%tokentype".to_string(), 2))); + assert!(decoded.contains(&("%start".to_string(), 2))); + assert!(decoded.contains(&("%token".to_string(), 2))); + + // Terminals (type 0) + assert!(decoded.contains(&("num".to_string(), 0))); + assert!(decoded.contains(&("plus".to_string(), 0))); + assert!(decoded.contains(&("comma".to_string(), 0))); + assert!(decoded.contains(&("error".to_string(), 0))); // reserved terminal + + // Non-terminals (type 1) + assert!(decoded.contains(&("E".to_string(), 1))); + assert!(decoded.contains(&("List".to_string(), 1))); + + // Bindings / parameters (type 3) + assert!(decoded.contains(&("left".to_string(), 3))); + assert!(decoded.contains(&("data".to_string(), 3))); // reserved reduce parameter + + // $vars (type 4) + assert!(decoded.contains(&("$1".to_string(), 4))); + assert!(decoded.contains(&("$3".to_string(), 4))); + assert!(decoded.contains(&("$sep".to_string(), 4))); + } +} From 912a59cafbb6b574a89f56562a3a8a711b3d70b5 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:08:09 +0900 Subject: [PATCH 12/20] show userdata definition --- rusty_lr_lsp/src/hover.rs | 52 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs index 51fab980..a362d900 100644 --- a/rusty_lr_lsp/src/hover.rs +++ b/rusty_lr_lsp/src/hover.rs @@ -26,7 +26,31 @@ pub fn hover(content: &str, position: Position) -> Option { } let word = hover_word(content, offset)?; - let documentation = hover_word_documentation(&word)?; + let documentation = if word == "data" { + let mut userdata_type = "()".to_string(); + let mut definition_info = "".to_string(); + + if let Some(args) = &parsed { + if let Some((_, ts)) = args.userdata_typename.first() { + userdata_type = ts.to_string(); + definition_info = format!( + "\n\nDefinition:\n```rustylr\n%userdata {};\n```", + userdata_type + ); + } + } + + Some(format!( + "### `data: &mut {}`{}\n\nMutable user-data binding available inside reduce actions.\n\nExample:\n\n```rustylr\nExpr : num {{ data.count += 1; num }};\n```\n\n[User data]({}#4-user-data-data)", + userdata_type, + definition_info, + SYNTAX_URL + )) + } else { + hover_word_documentation(&word) + }; + + let documentation = documentation?; Some(markdown_hover(content, documentation, None)) } @@ -658,4 +682,30 @@ List(Vec) : $sep(E, comma, +) { E }; .value .contains("Pattern helper for separated repetition")); } + + #[test] + fn hovers_data_with_userdata_type() { + let grammar_with_userdata = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32) } +%% +%userdata MyCoolData; +%tokentype Token; +%start Expr; +%token num Token::Num(_); +Expr : num { *data += 1; 0 }; +"#; + let offset = grammar_with_userdata.find("*data").unwrap() + 1; // points to 'd' in 'data' + let hover = hover( + grammar_with_userdata, + crate::position::offset_to_position(grammar_with_userdata, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("data: &mut MyCoolData")); + assert!(markup.value.contains("%userdata MyCoolData;")); + } } + From 7ecfce144d6b495d9a333361a9e21c07b5ee82b0 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:10:30 +0900 Subject: [PATCH 13/20] hover detection at '@' --- rusty_lr_lsp/src/hover.rs | 53 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs index a362d900..626c2e46 100644 --- a/rusty_lr_lsp/src/hover.rs +++ b/rusty_lr_lsp/src/hover.rs @@ -65,13 +65,26 @@ fn markdown_hover(content: &str, value: String, range: Option>) } fn hover_word(content: &str, offset: usize) -> Option { - let offset = offset.min(content.len()); + let mut offset = offset.min(content.len()); + if offset < content.len() { + let ch = content[offset..].chars().next()?; + if ch == '@' || ch == '$' || ch == '%' { + offset += ch.len_utf8(); + } + } + let start = completion::current_prefix_start(content, offset, true); let mut end = offset; while end < content.len() { let ch = content[end..].chars().next()?; if completion::is_ident_continue(ch) { end += ch.len_utf8(); + } else if ch == '$' && &content[start..end] == "@" { + end += ch.len_utf8(); + break; + } else if ch == '$' && &content[start..end] == "$" { + end += ch.len_utf8(); + break; } else { break; } @@ -707,5 +720,41 @@ Expr : num { *data += 1; 0 }; assert!(markup.value.contains("data: &mut MyCoolData")); assert!(markup.value.contains("%userdata MyCoolData;")); } -} + #[test] + fn hovers_sigils() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32) } +%% +%userdata MyCoolData; +%tokentype Token; +%start Expr; +%token num Token::Num(_); +Expr : num { println!("{:?}, {:?}", @1, @$); 0 }; +"#; + // Hover on '@' of '@1' + let offset = grammar.find("@1").unwrap(); + let hover1 = hover( + grammar, + crate::position::offset_to_position(grammar, offset), + ) + .unwrap(); + let HoverContents::Markup(markup1) = hover1.contents else { + panic!("expected markup hover"); + }; + assert!(markup1.value.contains("`@1` refers to a source-location")); + + // Hover on '@' of '@$' + let offset = grammar.find("@$").unwrap(); + let hover2 = hover( + grammar, + crate::position::offset_to_position(grammar, offset), + ) + .unwrap(); + let HoverContents::Markup(markup2) = hover2.contents else { + panic!("expected markup hover"); + }; + assert!(markup2.value.contains("`@$` refers to a source-location")); + } +} From 01f27af350923aee745853f09792422de36020a2 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:13:42 +0900 Subject: [PATCH 14/20] semantic and hover support for %prec defined identifiers --- rusty_lr_lsp/src/hover.rs | 99 +++++++++++++++++++++++------ rusty_lr_lsp/src/semantic_tokens.rs | 41 +++++++++++- 2 files changed, 119 insertions(+), 21 deletions(-) diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs index 626c2e46..32d95cd9 100644 --- a/rusty_lr_lsp/src/hover.rs +++ b/rusty_lr_lsp/src/hover.rs @@ -26,29 +26,60 @@ pub fn hover(content: &str, position: Position) -> Option { } let word = hover_word(content, offset)?; - let documentation = if word == "data" { - let mut userdata_type = "()".to_string(); - let mut definition_info = "".to_string(); - - if let Some(args) = &parsed { - if let Some((_, ts)) = args.userdata_typename.first() { - userdata_type = ts.to_string(); - definition_info = format!( - "\n\nDefinition:\n```rustylr\n%userdata {};\n```", - userdata_type - ); + let mut documentation = None; + + if let Some(args) = &parsed { + let mut assoc_type = ""; + let mut declaration_items = Vec::new(); + let mut found_prec = false; + for (_, assoc, items) in &args.precedences { + if items.iter().any(|item| item.to_string() == word) { + assoc_type = match assoc { + Some(rusty_lr_core::production::Associativity::Left) => "%left", + Some(rusty_lr_core::production::Associativity::Right) => "%right", + None => "%precedence", + }; + declaration_items = items.iter().map(|i| i.to_string()).collect(); + found_prec = true; + break; } } - Some(format!( - "### `data: &mut {}`{}\n\nMutable user-data binding available inside reduce actions.\n\nExample:\n\n```rustylr\nExpr : num {{ data.count += 1; num }};\n```\n\n[User data]({}#4-user-data-data)", - userdata_type, - definition_info, - SYNTAX_URL - )) - } else { - hover_word_documentation(&word) - }; + if found_prec { + documentation = Some(format!( + "### Precedence Symbol `{}`\n\nDeclared via:\n```rustylr\n{} {};\n```", + word, + assoc_type, + declaration_items.join(" ") + )); + } + } + + if documentation.is_none() { + if word == "data" { + let mut userdata_type = "()".to_string(); + let mut definition_info = "".to_string(); + + if let Some(args) = &parsed { + if let Some((_, ts)) = args.userdata_typename.first() { + userdata_type = ts.to_string(); + definition_info = format!( + "\n\nDefinition:\n```rustylr\n%userdata {};\n```", + userdata_type + ); + } + } + + documentation = Some(format!( + "### `data: &mut {}`{}\n\nMutable user-data binding available inside reduce actions.\n\nExample:\n\n```rustylr\nExpr : num {{ data.count += 1; num }};\n```\n\n[User data]({}#4-user-data-data)", + userdata_type, + definition_info, + SYNTAX_URL + )); + } else { + documentation = hover_word_documentation(&word); + } + } let documentation = documentation?; Some(markdown_hover(content, documentation, None)) @@ -757,4 +788,32 @@ Expr : num { println!("{:?}, {:?}", @1, @$); 0 }; }; assert!(markup2.value.contains("`@$` refers to a source-location")); } + + #[test] + fn hovers_precedence_symbol() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32) } +%% +%userdata MyCoolData; +%tokentype Token; +%start Expr; +%left plus minus; +%token num Token::Num(_); +Expr : Expr plus Expr + | num %prec minus + ; +"#; + let offset = grammar.find("minus").unwrap(); + let hover_res = hover( + grammar, + crate::position::offset_to_position(grammar, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover_res.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Precedence Symbol `minus`")); + assert!(markup.value.contains("```rustylr\n%left plus minus;\n```")); + } } diff --git a/rusty_lr_lsp/src/semantic_tokens.rs b/rusty_lr_lsp/src/semantic_tokens.rs index 3e2d6bf5..7a159bf4 100644 --- a/rusty_lr_lsp/src/semantic_tokens.rs +++ b/rusty_lr_lsp/src/semantic_tokens.rs @@ -31,6 +31,11 @@ pub fn semantic_tokens(content: &str) -> Option { for rule in args.rules { non_terminals.insert(rule.name.value().clone()); } + for (_, _, items) in args.precedences { + for item in items { + terminals.insert(item.to_string()); + } + } } terminals.insert("error".to_string()); @@ -115,11 +120,39 @@ fn collect_names(tokens: &[TokenTree]) -> (HashSet, HashSet) { match token { TokenTree::Punct(punct) if punct.as_char() == '%' => { if let Some(TokenTree::Ident(ident)) = iter.peek() { - if ident.to_string() == "token" { + let directive = ident.to_string(); + if directive == "token" { iter.next(); // consume "token" if let Some(TokenTree::Ident(term_name)) = iter.peek() { terminals.insert(term_name.to_string()); } + } else if directive == "left" + || directive == "right" + || directive == "precedence" + { + iter.next(); // consume directive keyword + while let Some(next_token) = iter.peek() { + match next_token { + TokenTree::Punct(p) if p.as_char() == ';' => { + iter.next(); + break; + } + TokenTree::Ident(id) => { + terminals.insert(id.to_string()); + iter.next(); + } + TokenTree::Literal(lit) => { + terminals.insert(lit.to_string()); + iter.next(); + } + TokenTree::Punct(_) => { + iter.next(); + } + _ => { + iter.next(); + } + } + } } } } @@ -405,12 +438,15 @@ pub enum Token { %tokentype Token; %start List; +%left plus minus; + %token num Token::Num(_); %token plus Token::Plus; %token comma Token::Comma; E(i32) : left=E plus num { $1 + $3 } | error { *data += 1; 0 } + | num %prec minus { 0 } ; List(Vec) : $sep(E, comma, +) { E }; "#; @@ -445,11 +481,14 @@ List(Vec) : $sep(E, comma, +) { E }; assert!(decoded.contains(&("%tokentype".to_string(), 2))); assert!(decoded.contains(&("%start".to_string(), 2))); assert!(decoded.contains(&("%token".to_string(), 2))); + assert!(decoded.contains(&("%left".to_string(), 2))); + assert!(decoded.contains(&("%prec".to_string(), 2))); // Terminals (type 0) assert!(decoded.contains(&("num".to_string(), 0))); assert!(decoded.contains(&("plus".to_string(), 0))); assert!(decoded.contains(&("comma".to_string(), 0))); + assert!(decoded.contains(&("minus".to_string(), 0))); // precedence-only symbol highlighted as terminal assert!(decoded.contains(&("error".to_string(), 0))); // reserved terminal // Non-terminals (type 1) From 9f7539579bce7396cab8f145c4cfe4de61948b65 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:15:08 +0900 Subject: [PATCH 15/20] fix AGENTS.md to sync for lsp projects --- AGENTS.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 32523fcd..f671877c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -55,3 +55,9 @@ If an implementation plan artifact is created, also print the full plan directly When modifying code, comments, or documentation, use formal terminology based on Programming Language Theory, Theory of Computation, and Type Theory for internal logic. Prefer terms such as `Symbol` and `Production` internally. For user-facing Bison-inspired syntax, keep familiar Bison terminology such as `%token` and `%tokentype`. + +## 9. Keep LSP Synchronized with Grammar Changes + +Whenever changes are made to the grammar syntax, directives, patterns, or variables: +- Update the LSP implementation in `rusty_lr_lsp` to fully support and recognize the updated grammar. +- Ensure that semantic tokens, hover information, completions, inlay hints, and diagnostic handling are kept aligned with the new grammar specifications. From 20dc9c640b09e1f9efef68c9e45e729ffef2a5a5 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:20:16 +0900 Subject: [PATCH 16/20] hover, inlay support for reduce action --- SYNTAX.md | 2 +- rusty_lr_lsp/src/hover.rs | 159 +++++++++++++++++++++++++++++++++ rusty_lr_lsp/src/inlay_hint.rs | 61 ++++++++++++- 3 files changed, 220 insertions(+), 2 deletions(-) diff --git a/SYNTAX.md b/SYNTAX.md index 9e15b661..8ed658f2 100644 --- a/SYNTAX.md +++ b/SYNTAX.md @@ -10,7 +10,7 @@ This document provides a comprehensive guide to the grammar definition syntax us - [Token Definition (`%token`)](#token-definition-must-defined) - [Production Rules](#production-rules) - [Patterns](#patterns) -- [ProductionType (Non-Terminal Types)](#ruletype-optional) +- [ProductionType (Non-Terminal Types)](#productiontype-optional) - [Reduce Actions](#reduceaction-optional) - [Accessing Data in Reduce Actions](#accessing-token-data-in-reduceaction) - [Exclamation Mark (`!`) Value Discard](#exclamation-mark-) diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs index 32d95cd9..e72123f0 100644 --- a/rusty_lr_lsp/src/hover.rs +++ b/rusty_lr_lsp/src/hover.rs @@ -16,6 +16,10 @@ pub fn hover(content: &str, position: Position) -> Option { let parsed = completion::parse_args(content).ok(); if let Some(args) = &parsed { + if let Some((brace_range, doc)) = reduce_action_brace_at_offset(args, content, offset) { + return Some(markdown_hover(content, doc, Some(brace_range))); + } + if let Some((pattern, range)) = pattern_at_offset(args, offset) { return Some(markdown_hover( content, @@ -95,6 +99,64 @@ fn markdown_hover(content: &str, value: String, range: Option>) } } +fn reduce_action_brace_at_offset( + args: &GrammarArgs, + content: &str, + offset: usize, +) -> Option<(ByteRange, String)> { + for rule in &args.rules { + for line in &rule.rule_lines { + if let Some(reduce_action) = &line.reduce_action { + if let Some(proc_macro2::TokenTree::Group(group)) = reduce_action.clone().into_iter().next() { + if group.delimiter() == proc_macro2::Delimiter::Brace { + let action_range = group.span().byte_range(); + + // Check start brace(s) + if action_range.start < content.len() && content.as_bytes()[action_range.start] == b'{' { + let start_brace_end = if action_range.start + 1 < action_range.end + && content.as_bytes()[action_range.start + 1] == b'{' + { + action_range.start + 2 + } else { + action_range.start + 1 + }; + let start_brace_range = action_range.start .. start_brace_end; + if start_brace_range.contains(&offset) { + return Some((start_brace_range, reduce_action_documentation())); + } + } + + // Check end brace(s) + if action_range.end > action_range.start && action_range.end <= content.len() { + if content.as_bytes()[action_range.end - 1] == b'}' { + let end_brace_start = if action_range.end - 2 >= action_range.start + && content.as_bytes()[action_range.end - 2] == b'}' + { + action_range.end - 2 + } else { + action_range.end - 1 + }; + let end_brace_range = end_brace_start .. action_range.end; + if end_brace_range.contains(&offset) { + return Some((end_brace_range, reduce_action_documentation())); + } + } + } + } + } + } + } + } + None +} + +fn reduce_action_documentation() -> String { + format!( + "### Reduce Action\n\nA block of Rust code executed when this production rule is reduced.\n\n[Reduce Actions]({}#reduceaction-optional)", + SYNTAX_URL + ) +} + fn hover_word(content: &str, offset: usize) -> Option { let mut offset = offset.min(content.len()); if offset < content.len() { @@ -816,4 +878,101 @@ Expr : Expr plus Expr assert!(markup.value.contains("Precedence Symbol `minus`")); assert!(markup.value.contains("```rustylr\n%left plus minus;\n```")); } + + #[test] + fn hovers_reduce_action_braces() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32), Plus } +%% +%tokentype Token; +%start Expr; +%token num Token::Num(_); +%token plus Token::Plus; +Expr : num { 0 } + | num plus num {{ 0 }} + ; +"#; + + // 1. Single brace start hover + let start_brace_offset = grammar.find("{ 0 }").unwrap(); + let hover_start = hover( + grammar, + crate::position::offset_to_position(grammar, start_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_start) = hover_start.contents else { + panic!("expected markup hover"); + }; + assert!(markup_start.value.contains("### Reduce Action")); + assert!(markup_start.value.contains("A block of Rust code executed when this production rule is reduced")); + assert!(markup_start.value.contains("#reduceaction-optional")); + assert_eq!( + hover_start.range.unwrap(), + crate::position::range_to_lsp_range(grammar, start_brace_offset .. start_brace_offset + 1) + ); + + // 2. Single brace end hover + let end_brace_offset = start_brace_offset + 4; // points to '}' of '{ 0 }' + let hover_end = hover( + grammar, + crate::position::offset_to_position(grammar, end_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_end) = hover_end.contents else { + panic!("expected markup hover"); + }; + assert!(markup_end.value.contains("### Reduce Action")); + assert_eq!( + hover_end.range.unwrap(), + crate::position::range_to_lsp_range(grammar, end_brace_offset .. end_brace_offset + 1) + ); + + // 3. Double brace start hover (first brace) + let dstart_brace_offset = grammar.find("{{ 0 }}").unwrap(); + let hover_dstart1 = hover( + grammar, + crate::position::offset_to_position(grammar, dstart_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_dstart1) = hover_dstart1.contents else { + panic!("expected markup hover"); + }; + assert!(markup_dstart1.value.contains("### Reduce Action")); + assert_eq!( + hover_dstart1.range.unwrap(), + crate::position::range_to_lsp_range(grammar, dstart_brace_offset .. dstart_brace_offset + 2) + ); + + // 4. Double brace start hover (second brace) + let hover_dstart2 = hover( + grammar, + crate::position::offset_to_position(grammar, dstart_brace_offset + 1), + ) + .unwrap(); + let HoverContents::Markup(markup_dstart2) = hover_dstart2.contents else { + panic!("expected markup hover"); + }; + assert!(markup_dstart2.value.contains("### Reduce Action")); + assert_eq!( + hover_dstart2.range.unwrap(), + crate::position::range_to_lsp_range(grammar, dstart_brace_offset .. dstart_brace_offset + 2) + ); + + // 5. Double brace end hover (first of closing braces) + let dend_brace_offset = grammar.find("}}").unwrap(); + let hover_dend1 = hover( + grammar, + crate::position::offset_to_position(grammar, dend_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_dend1) = hover_dend1.contents else { + panic!("expected markup hover"); + }; + assert!(markup_dend1.value.contains("### Reduce Action")); + assert_eq!( + hover_dend1.range.unwrap(), + crate::position::range_to_lsp_range(grammar, dend_brace_offset .. dend_brace_offset + 2) + ); + } } diff --git a/rusty_lr_lsp/src/inlay_hint.rs b/rusty_lr_lsp/src/inlay_hint.rs index 75a2bb7c..649eebe8 100644 --- a/rusty_lr_lsp/src/inlay_hint.rs +++ b/rusty_lr_lsp/src/inlay_hint.rs @@ -36,6 +36,37 @@ pub fn inlay_hints(content: &str, range: Range) -> Vec { hints.push(pattern_inlay_hint(&args, &grammar, content, pattern)); } + + if let Some(reduce_action) = &line.reduce_action { + if let Some(proc_macro2::TokenTree::Group(group)) = reduce_action.clone().into_iter().next() { + if group.delimiter() == proc_macro2::Delimiter::Brace { + let action_range = group.span().byte_range(); + if ranges_overlap( + action_range.start, + action_range.end, + range_start, + range_end, + ) { + hints.push(InlayHint { + position: offset_to_position(content, action_range.start), + label: InlayHintLabel::String("ReduceAction".to_string()), + kind: None, + text_edits: None, + tooltip: Some(lsp_types::InlayHintTooltip::MarkupContent(lsp_types::MarkupContent { + kind: lsp_types::MarkupKind::Markdown, + value: format!( + "A block of Rust code executed when this production rule is reduced.\n\n[Reduce Actions]({}#reduceaction-optional)", + completion::SYNTAX_URL + ), + })), + padding_left: Some(true), + padding_right: Some(true), + data: None, + }); + } + } + } + } } } @@ -135,6 +166,34 @@ List(Vec) : $sep(E, comma, +) { E }; }) .collect::>(); - assert_eq!(labels, vec![": Vec"]); + assert_eq!(labels, vec![": Vec", "ReduceAction"]); + } + + #[test] + fn hints_reduce_actions_with_custom_tooltip() { + let hints = inlay_hints( + MOCK_GRAMMAR, + Range::new(Position::new(0, 0), Position::new(100, 0)), + ); + + let reduce_action_hints = hints + .iter() + .filter(|hint| match &hint.label { + InlayHintLabel::String(label) => label == "ReduceAction", + _ => false, + }) + .collect::>(); + + assert!(!reduce_action_hints.is_empty()); + for hint in reduce_action_hints { + let tooltip = hint.tooltip.as_ref().unwrap(); + match tooltip { + lsp_types::InlayHintTooltip::MarkupContent(markup) => { + assert!(markup.value.contains("A block of Rust code executed when this production rule is reduced")); + assert!(markup.value.contains("#reduceaction-optional")); + } + _ => panic!("expected MarkupContent tooltip"), + } + } } } From 7669b094172ad02c8f1c967dd00deaeb32066275 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:23:03 +0900 Subject: [PATCH 17/20] fixed wording ruletype to production type --- SYNTAX.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYNTAX.md b/SYNTAX.md index 8ed658f2..1de9bd28 100644 --- a/SYNTAX.md +++ b/SYNTAX.md @@ -511,7 +511,7 @@ You can use variables prefixed with `$` inside any RustCode block in the grammar - `$location` -> Evaluates to the type defined by `%location` (defaults to `::rusty_lr::DefaultLocation`). - `$userdata` -> Evaluates to the type defined by `%userdata` (defaults to `()`). - `$error` or `$errortype` -> Evaluates to the type defined by `%errortype` / `%error` (defaults to `::rusty_lr::DefaultReduceActionError`). -- `$NonTerminalName` -> Evaluates to the `ruletype` defined for `NonTerminalName`. +- `$NonTerminalName` -> Evaluates to the `ProductionType` defined for `NonTerminalName`. - `$terminal_name` -> Evaluates to the match pattern/definition of ``. ### Substitution Errors From 0532e7adb9d315ec958d9e0d09f29a9a491e8af2 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:31:46 +0900 Subject: [PATCH 18/20] support for Find References/Goto Definitino --- rusty_lr_lsp/README.md | 33 ++-- rusty_lr_lsp/src/goto_definition.rs | 73 ++++++- rusty_lr_lsp/src/main.rs | 35 ++++ rusty_lr_lsp/src/references.rs | 288 ++++++++++++++++++++++++++++ rusty_lr_parser/src/lib.rs | 4 +- 5 files changed, 418 insertions(+), 15 deletions(-) create mode 100644 rusty_lr_lsp/src/references.rs diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md index 14ef2aa5..79b4f74f 100644 --- a/rusty_lr_lsp/README.md +++ b/rusty_lr_lsp/README.md @@ -13,17 +13,28 @@ Other Rust files are intentionally not matched by default. ## Features -- **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. -- **Code Actions:** Offers quick fixes for suppressible diagnostics by inserting the appropriate `%allow ...;` directive. -- **Formatting:** Normalizes directive declarations into one-space, single-line forms, one-space pattern separators, and indentation for production rules and reduce-action bodies. -- **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions. -- **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. -- **Inlay Hints:** Shows `Pattern: Type` hints for top-level patterns in non-terminal definitions. -- **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. Completion details include the resolved Rust type for terminals and non-terminals, including inferred placeholders and a note when the value is boxed for parser storage. -- **Completion for directives and keywords:** Suggests directives such as `%token`, `%start`, `%tokentype`, `%left`, `%right`, `%precedence`, `%prec`, `%dprec`, `%glr`, `%lalr`, `%nooptim`, `%allow`, and common identifiers such as `error`, `$sep`, `data`, `lookahead`, and `shift`. -- **Completion for `$...` variables:** Suggests built-in substitutions (`$tokentype`, `$location`, `$userdata`, `$error`, `$errortype`), terminal and non-terminal substitutions (`$terminal_name`, `$NonTerminalName`), current reduce-action bindings (`$left`, `$value`, etc.), and positional semantic variables (`$1`, `$2`, ...). -- **Completion for locations:** Suggests `@$`, `@0`, positional locations (`@1`, `@2`, ...), and named binding locations (`@left`, `@value`, etc.). -- **Completion for `%allow`:** Suggests valid diagnostic names such as `nonterm_unreachable`, `unused_terminals`, and conflict-resolution diagnostic identifiers. +### Supported Features + +- [x] **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. +- [x] **Code Actions:** Offers quick fixes for suppressible diagnostics by inserting the appropriate `%allow ...;` directive. +- [x] **Formatting:** Normalizes directive declarations into one-space, single-line forms, one-space pattern separators, and indentation for production rules and reduce-action bodies. +- [x] **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions, including `%prec` and precedence symbols. +- [x] **Find References:** Finds all references to terminal and non-terminal symbols throughout the grammar definitions, `%start` rules, `%token` definitions, precedence symbols, and the `error` keyword. +- [x] **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. +- [x] **Inlay Hints:** Shows `Pattern: Type` hints for top-level patterns in non-terminal definitions, and `ReduceAction` labels before action blocks. +- [x] **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. Completion details include the resolved Rust type for terminals and non-terminals, including inferred placeholders and a note when the value is boxed for parser storage. +- [x] **Completion for directives and keywords:** Suggests directives such as `%token`, `%start`, `%tokentype`, `%left`, `%right`, `%precedence`, `%prec`, `%dprec`, `%glr`, `%lalr`, `%nooptim`, `%allow`, and common identifiers such as `error`, `$sep`, `data`, `lookahead`, and `shift`. +- [x] **Completion for `$...` variables:** Suggests built-in substitutions (`$tokentype`, `$location`, `$userdata`, `$error`, `$errortype`), terminal and non-terminal substitutions (`$terminal_name`, `$NonTerminalName`), current reduce-action bindings (`$left`, `$value`, etc.), and positional semantic variables (`$1`, `$2`, ...). +- [x] **Completion for locations:** Suggests `@$`, `@0`, positional locations (`@1`, `@2`, ...), and named binding locations (`@left`, `@value`, etc.). +- [x] **Completion for `%allow`:** Suggests valid diagnostic names such as `nonterm_unreachable`, `unused_terminals`, and conflict-resolution diagnostic identifiers. + +### Unsupported / Planned Features + +- [ ] **Go to Definition / Find References inside Reduce Actions:** Navigating to definitions or finding references of symbols inside `ReduceAction` Rust code blocks is currently not supported. +- [ ] **Document Symbols / Outline:** Showing all rules and tokens in the file outline window is not supported. +- [ ] **Rename Symbol:** Rename refactoring of terminal and non-terminal symbols throughout the grammar file is not supported. +- [ ] **Signature Help:** Parameter information for helper patterns like `$sep` is not supported. +- [ ] **Multi-file Project Support:** Referencing definitions across multiple files is not supported (the grammar file is treated as a self-contained unit). ## Running the Server diff --git a/rusty_lr_lsp/src/goto_definition.rs b/rusty_lr_lsp/src/goto_definition.rs index 236822ae..b3fb89c1 100644 --- a/rusty_lr_lsp/src/goto_definition.rs +++ b/rusty_lr_lsp/src/goto_definition.rs @@ -1,7 +1,9 @@ use lsp_types::{Position, Range}; use proc_macro2::TokenStream; use rusty_lr_parser::grammar::Grammar; -use rusty_lr_parser::{GrammarArgs, Located, PatternArgs, TerminalSetItem}; +use rusty_lr_parser::{ + GrammarArgs, IdentOrLiteral, Located, PatternArgs, PrecDPrecArgs, TerminalSetItem, +}; use std::str::FromStr; use crate::diagnostics::split_stream; @@ -21,12 +23,21 @@ fn collect_located(args: &GrammarArgs) -> Vec> { collected.push(t_name.clone()); } - // 3. %allow diagnostics names + // 3. Precedence definitions + for (_, _, items) in &args.precedences { + for item in items { + if let IdentOrLiteral::Ident(ident) = item { + collected.push(ident.clone()); + } + } + } + + // 4. %allow diagnostics names for (allow_name, _) in &args.allowed_diagnostics { collected.push(allow_name.clone()); } - // 4. Rule definitions + // 5. Rule definitions for rule in &args.rules { collected.push(rule.name.clone()); for line in &rule.rule_lines { @@ -36,6 +47,12 @@ fn collect_located(args: &GrammarArgs) -> Vec> { } collect_pattern_located(pattern, &mut collected); } + // %prec identifiers + for prec in &line.precs { + if let PrecDPrecArgs::Prec(IdentOrLiteral::Ident(ident)) = prec { + collected.push(ident.clone()); + } + } } } @@ -130,6 +147,18 @@ pub fn find_definition(content: &str, target_pos: Position) -> Option { return Some(range_to_lsp_range(content, def_range)); } + // 3. Check precedence definitions + for (_, _, items) in &grammar_args.precedences { + for item in items { + if let IdentOrLiteral::Ident(ident) = item { + if ident.value() == name { + let def_range = span_manager.get_byterange(&ident.location())?; + return Some(range_to_lsp_range(content, def_range)); + } + } + } + } + None } @@ -213,4 +242,42 @@ E : num plus error ; crate::position::offset_to_position(MOCK_GRAMMAR, token_def_index + 7); // start of 'plus' assert_eq!(def_range.start, expected_start_pos); } + + #[test] + fn test_goto_definition_prec() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), +} + +%% + +%tokentype Token; +%start E; + +%precedence empty_action; +%token num Token::Num(_); + +E(_) : num + | %prec empty_action { 0 } + ; +"#; + + // Click on 'empty_action' after '%prec' + let index = grammar.find("%prec empty_action").unwrap() + 6; // start of 'empty_action' + let pos = crate::position::offset_to_position(grammar, index); + + let def_range = find_definition(grammar, pos).unwrap(); + + // The definition should point to '%precedence empty_action;' + let def_offset = crate::position::position_to_offset(grammar, def_range.start); + let def_substring = &grammar[def_offset..]; + assert!(def_substring.starts_with("empty_action")); + + let prec_def_index = grammar.find("%precedence empty_action").unwrap(); + let expected_start_pos = + crate::position::offset_to_position(grammar, prec_def_index + 12); // start of 'empty_action' + assert_eq!(def_range.start, expected_start_pos); + } } diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index c5f7fc01..88783134 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -28,6 +28,7 @@ mod goto_definition; mod hover; mod inlay_hint; mod position; +mod references; mod semantic_tokens; fn main() -> Result<(), Box> { @@ -40,6 +41,7 @@ fn main() -> Result<(), Box> { let server_capabilities = serde_json::to_value(&ServerCapabilities { text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), definition_provider: Some(OneOf::Left(true)), + references_provider: Some(OneOf::Left(true)), document_formatting_provider: Some(OneOf::Left(true)), code_action_provider: Some(lsp_types::CodeActionProviderCapability::Options( CodeActionOptions { @@ -129,6 +131,39 @@ fn main() -> Result<(), Box> { } } connection.sender.send(Message::Response(response))?; + } else if req.method == lsp_types::request::References::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting references request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position.text_document.uri; + let position = params.text_document_position.position; + + let mut response = Response::new_ok(id.clone(), serde_json::Value::Null); + if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| { + references::find_references(content, position) + }) { + Ok(Some(locations)) => { + let mapped_locations = locations + .into_iter() + .map(|range| Location::new(uri.clone(), range)) + .collect::>(); + response = Response::new_ok(id, mapped_locations); + } + Ok(None) => { + response = Response::new_ok(id, Vec::::new()); + } + Err(message) => { + eprintln!("RustyLR references panicked: {message}"); + } + } + } + connection.sender.send(Message::Response(response))?; } else if req.method == Completion::METHOD { let (id, params) = match cast_request::(req) { Ok(res) => res, diff --git a/rusty_lr_lsp/src/references.rs b/rusty_lr_lsp/src/references.rs new file mode 100644 index 00000000..d88e6d9f --- /dev/null +++ b/rusty_lr_lsp/src/references.rs @@ -0,0 +1,288 @@ +use lsp_types::{Position, Range}; +use proc_macro2::TokenStream; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{ + GrammarArgs, IdentOrLiteral, Located, PatternArgs, PrecDPrecArgs, TerminalSetItem, +}; +use std::str::FromStr; + +use crate::diagnostics::split_stream; +use crate::position::{position_to_offset, range_to_lsp_range}; + +/// Traverses the AST of GrammarArgs to collect only terminal, non-terminal, prec, and error references. +fn collect_references(args: &GrammarArgs) -> Vec> { + let mut collected = Vec::new(); + + // 1. %start names + for start_name in &args.start_rule_name { + collected.push(start_name.clone()); + } + + // 2. %token definitions + for (t_name, _) in &args.terminals { + collected.push(t_name.clone()); + } + + // 3. Precedence definitions + for (_, _, items) in &args.precedences { + for item in items { + if let IdentOrLiteral::Ident(ident) = item { + collected.push(ident.clone()); + } + } + } + + // 4. Rule definitions, pattern idents, and %prec + for rule in &args.rules { + collected.push(rule.name.clone()); + for line in &rule.rule_lines { + // Pattern idents + for (_, pattern) in &line.tokens { + collect_pattern_located(pattern, &mut collected); + } + // %prec identifiers + for prec in &line.precs { + if let PrecDPrecArgs::Prec(IdentOrLiteral::Ident(ident)) = prec { + collected.push(ident.clone()); + } + } + } + } + + collected +} + +/// Recursively traverses a PatternArgs structure to collect Located instances. +fn collect_pattern_located(pattern: &PatternArgs, collected: &mut Vec>) { + match pattern { + PatternArgs::Ident(ident) => { + collected.push(ident.clone()); + } + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => { + collect_pattern_located(base, collected); + } + PatternArgs::TerminalSet(ts) => { + for item in &ts.items { + match item { + TerminalSetItem::Terminal(ident) => { + collected.push(ident.clone()); + } + TerminalSetItem::Range(first, last) => { + collected.push(first.clone()); + collected.push(last.clone()); + } + _ => {} + } + } + } + PatternArgs::Group { alternatives, .. } => { + for alt in alternatives { + for pat in alt { + collect_pattern_located(pat, collected); + } + } + } + PatternArgs::Minus { base, exclude } => { + collect_pattern_located(base, collected); + collect_pattern_located(exclude, collected); + } + PatternArgs::Sep { + base, delimiter, .. + } => { + collect_pattern_located(base, collected); + collect_pattern_located(delimiter, collected); + } + _ => {} + } +} + +/// Finds all references of the terminal or non-terminal symbol under the cursor. +pub fn find_references(content: &str, target_pos: Position) -> Option> { + let offset = position_to_offset(content, target_pos); + + // Parse the entire document into TokenStream + let token_stream = TokenStream::from_str(content).ok()?; + let (_, macro_stream) = split_stream(token_stream).ok()?; + let grammar_args = Grammar::parse_args(macro_stream).ok()?; + let span_manager = grammar_args.span_manager.clone(); + + // Collect all referenceable locations + let all_references = collect_references(&grammar_args); + + // Find the one that contains the click offset + let clicked = all_references.iter().find(|loc| { + if let Some(range) = span_manager.get_byterange(&loc.location()) { + range.contains(&offset) + } else { + false + } + })?; + + let name = clicked.value(); + + // Ensure the symbol is indeed a valid terminal, non-terminal, precedence symbol, or 'error' + let is_terminal = grammar_args.terminals.iter().any(|(t, _)| t.value == *name); + let is_nonterminal = grammar_args.rules.iter().any(|r| r.name.value == *name); + let is_prec_symbol = grammar_args.precedences.iter().any(|(_, _, items)| { + items.iter().any(|item| match item { + IdentOrLiteral::Ident(ident) => ident.value() == name, + _ => false, + }) + }); + let is_error = name == "error"; + + if !is_terminal && !is_nonterminal && !is_prec_symbol && !is_error { + return None; + } + + // Filter and map all matches of the clicked name to LSP Range + let mut result = Vec::new(); + for loc in &all_references { + if loc.value() == name { + if let Some(range) = span_manager.get_byterange(&loc.location()) { + result.push(range_to_lsp_range(content, range)); + } + } + } + + Some(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype Token; +%start E; + +%token num Token::Num(_); +%token plus Token::Plus; + +E(_) : E plus num { 0 } + | num { 0 } + ; +"#; + + #[test] + fn test_find_references_terminal() { + // Find position of the 'plus' in rule "E plus num" + let index = MOCK_GRAMMAR.find("plus num").unwrap(); + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let refs = find_references(MOCK_GRAMMAR, pos).unwrap(); + + // There should be 2 references: + // 1. "%token plus Token::Plus;" (definition) + // 2. "E plus num" (usage) + assert_eq!(refs.len(), 2); + + // Verify the content at each range + for range in refs { + let start = crate::position::position_to_offset(MOCK_GRAMMAR, range.start); + let end = crate::position::position_to_offset(MOCK_GRAMMAR, range.end); + assert_eq!(&MOCK_GRAMMAR[start..end], "plus"); + } + } + + #[test] + fn test_find_references_nonterminal() { + // Find position of '%start E' + let index = MOCK_GRAMMAR.find("start E").unwrap() + 6; // start of 'E' + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let refs = find_references(MOCK_GRAMMAR, pos).unwrap(); + + // References to E: + // 1. "%start E;" + // 2. "E(_)" (definition) + // 3. "E plus num" (usage) + assert_eq!(refs.len(), 3); + + for range in refs { + let start = crate::position::position_to_offset(MOCK_GRAMMAR, range.start); + let end = crate::position::position_to_offset(MOCK_GRAMMAR, range.end); + assert_eq!(&MOCK_GRAMMAR[start..end], "E"); + } + } + + #[test] + fn test_find_references_prec_and_error() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Minus, +} + +%% + +%tokentype Token; +%start E; + +%left plus; +%left minus; +%token num Token::Num(_); +%token plus Token::Plus; +%token minus Token::Minus; + +E(_) : E plus E + | minus E %prec minus + | error + ; +"#; + + // 1. Find references to precedence/terminal 'minus' + // Click on '%prec minus' + let index = grammar.find("%prec minus").unwrap() + 6; // start of 'minus' + let pos = crate::position::offset_to_position(grammar, index); + let refs = find_references(grammar, pos).unwrap(); + + // References to 'minus': + // - "%left minus;" + // - "%token minus Token::Minus;" + // - "minus E" (rule pattern) + // - "%prec minus" (precedence override) + assert_eq!(refs.len(), 4); + for range in refs { + let start = crate::position::position_to_offset(grammar, range.start); + let end = crate::position::position_to_offset(grammar, range.end); + assert_eq!(&grammar[start..end], "minus"); + } + + // 2. Find references to 'error' + let index = grammar.find("error").unwrap(); + let pos = crate::position::offset_to_position(grammar, index); + let refs = find_references(grammar, pos).unwrap(); + + assert_eq!(refs.len(), 1); + let range = refs[0]; + let start = crate::position::position_to_offset(grammar, range.start); + let end = crate::position::position_to_offset(grammar, range.end); + assert_eq!(&grammar[start..end], "error"); + } + + #[test] + fn test_find_references_no_action_leak() { + // The mock grammar has `{ 0 }` inside the reduce action. + // If we search inside the reduce action, it shouldn't match anything. + // We verify that clicking inside `{ 0 }` returns None. + let index = MOCK_GRAMMAR.find("{ 0 }").unwrap() + 2; // points to '0' + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let refs = find_references(MOCK_GRAMMAR, pos); + assert!(refs.is_none()); + } +} diff --git a/rusty_lr_parser/src/lib.rs b/rusty_lr_parser/src/lib.rs index 1181de61..ad6e83a8 100644 --- a/rusty_lr_parser/src/lib.rs +++ b/rusty_lr_parser/src/lib.rs @@ -16,7 +16,9 @@ pub mod terminal_info; pub(crate) mod terminalset; pub mod utils; -pub use parser::args::{GrammarArgs, PatternArgs, RuleDefArgs, RuleLineArgs, TableLayout}; +pub use parser::args::{ + GrammarArgs, IdentOrLiteral, PatternArgs, PrecDPrecArgs, RuleDefArgs, RuleLineArgs, TableLayout, +}; pub use parser::location::{Located, Location}; pub use terminalset::{TerminalSet, TerminalSetItem}; From d2817bc44f5bc42d9eebd3b9b46ffd571ba224fc Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:35:20 +0900 Subject: [PATCH 19/20] support for VSCode extension --- editors/vscode-rustylr/CHANGELOG.md | 17 +++++++ editors/vscode-rustylr/README.md | 71 +++++++++-------------------- editors/vscode-rustylr/package.json | 26 +++++++++-- 3 files changed, 62 insertions(+), 52 deletions(-) create mode 100644 editors/vscode-rustylr/CHANGELOG.md diff --git a/editors/vscode-rustylr/CHANGELOG.md b/editors/vscode-rustylr/CHANGELOG.md new file mode 100644 index 00000000..73116095 --- /dev/null +++ b/editors/vscode-rustylr/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +All notable changes to the "RustyLR" extension will be documented in this file. + +## 0.1.0 + +- First public release of RustyLR language support! +- Fully integrated with the `rusty_lr_lsp` server: + - **Syntax Highlighting (Semantic Tokens):** Distinct syntax coloring for terminals, non-terminals, directives, bindings, location bindings, and variables. + - **Diagnostics:** Inline warning and error reporting directly in the editor. + - **Code Actions:** Quick-fix actions to suppress warnings with `%allow` directives. + - **Formatting:** Code formatting and indentation support for rule definitions and reduce actions. + - **Go to Definition:** Jump directly to token declarations, production definitions, and precedence definitions. + - **Find References:** Find all usages of terminals, non-terminals, and precedence symbols across the grammar document. + - **Hover Tooltips:** Interactive documentation tooltips for keywords, patterns, and variables. + - **Inlay Hints:** Inline type hints for grammar patterns and reduce actions. + - **Auto-Completion:** Intelligent suggestions for symbols, directives, variables, and locations. diff --git a/editors/vscode-rustylr/README.md b/editors/vscode-rustylr/README.md index 467d86b0..d929ed15 100644 --- a/editors/vscode-rustylr/README.md +++ b/editors/vscode-rustylr/README.md @@ -1,61 +1,34 @@ -# RustyLR VSCode Extension +# RustyLR Language Support -Temporary VSCode extension client for the `rusty_lr_lsp` server in this repository. +This extension provides rich language support for the [RustyLR](https://github.com/ehwan/RustyLR) parser generator grammar files (`*.rustylr` and `rustylr.rs`). -## Run From This Repository - -1. Build the language server once: - - ```bash - cargo build -p rusty_lr_lsp - ``` - -2. Install the extension client dependencies: +## Features - ```bash - cd editors/vscode-rustylr - npm install - ``` +- **Diagnostics & Error Reporting:** Real-time diagnostics for grammar syntax errors, unused symbols, conflict resolutions, and more. +- **Go to Definition:** Quickly navigate to rule definitions, terminal declarations, and precedence rules. +- **Find References:** Find all occurrences and usages of terminals, non-terminals, and precedence symbols. +- **Syntax Highlighting (Semantic Tokens):** Distinct, theme-aligned colors for terminal names, non-terminal rules, directives, bindings, location bindings (`@loc`), and variables (`$var`). +- **Formatting:** Automatic document formatter that standardizes directives, separates rules, and indents rule lines and reduce-action bodies. +- **Code Actions (Quick Fixes):** Fast diagnostic suppression actions using the `%allow` directive. +- **Hover tooltips:** Documented explanations and types for terminal tokens, non-terminal rules, keywords, and patterns. +- **Inlay Hints:** Inline type annotations and reduce action indicators. +- **Auto-Completion:** Intelligent suggestions for directives, symbols, locations, variables, and diagnostics. -3. Open this extension folder in VSCode: +## Extension Settings - ```bash - code editors/vscode-rustylr - ``` +This extension contributes the following settings to control the language server behavior: -4. Press `F5` and choose `VS Code Extension Development` if prompted. +* `rustylr.server.command`: Path to the `rusty_lr_lsp` server binary. Leave empty to automatically detect or run from Cargo. +* `rustylr.server.args`: Arguments passed to the language server command. +* `rustylr.server.cwd`: Working directory for the language server. +* `rustylr.semanticTokens.enabled`: Toggle semantic token syntax highlighting. -5. In the Extension Development Host window, open the RustyLR repository folder and then open a grammar file such as `example/calculator/src/parser.rustylr`, or `src/rustylr.rs` in a downstream project. +## Requirements -The extension starts the already-built server binary when it exists: +The language features require the `rusty_lr_lsp` server, which is part of the RustyLR cargo workspace. You can build it from the repository root: ```bash -/home/ehwan/workspace/RustyLR/target/debug/rusty_lr_lsp -``` - -If that binary does not exist yet, it falls back to `cargo run --quiet --package rusty_lr_lsp`. - -The extension searches upward for the RustyLR repository root and uses that as the server working directory. You can override the command, arguments, and working directory with VSCode settings: - -```json -{ - "rustylr.server.command": "/home/ehwan/workspace/RustyLR/target/debug/rusty_lr_lsp", - "rustylr.server.args": [], - "rustylr.server.cwd": "/home/ehwan/workspace/RustyLR" -} +cargo build -p rusty_lr_lsp ``` -## File Matching - -The extension contributes a `rustylr` language mode for: - -- `*.rustylr` -- `rustylr.rs` - -It also sends those file patterns to the LSP server even when the VSCode language mode is not manually changed. - -## Features - -The extension is intentionally thin: VSCode starts `rusty_lr_lsp` over stdio and the server provides the language features. - -See [`rusty_lr_lsp/README.md`](../../rusty_lr_lsp/README.md) for the current diagnostics, go-to-definition, and completion feature details. +By default, the extension will attempt to auto-detect the built binary in your workspace target folder or run it dynamically using Cargo. diff --git a/editors/vscode-rustylr/package.json b/editors/vscode-rustylr/package.json index b7daa292..4b7c5356 100644 --- a/editors/vscode-rustylr/package.json +++ b/editors/vscode-rustylr/package.json @@ -1,15 +1,35 @@ { "name": "rustylr-vscode", "displayName": "RustyLR", - "description": "Temporary VSCode extension client for the RustyLR language server.", - "version": "0.0.1", + "description": "Rich language support for the RustyLR parser generator, featuring diagnostics, formatting, auto-completion, hover, goto-definition, find-references, and inlay hints.", + "version": "0.1.0", "publisher": "rustylr", "license": "MIT OR Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ehwan/RustyLR.git" + }, + "bugs": { + "url": "https://github.com/ehwan/RustyLR/issues" + }, + "homepage": "https://github.com/ehwan/RustyLR#readme", + "keywords": [ + "parser", + "grammar", + "lsp", + "rust", + "rustylr", + "bison", + "yacc", + "compiler" + ], "engines": { "vscode": "^1.84.0" }, "categories": [ - "Programming Languages" + "Programming Languages", + "Linters", + "Formatters" ], "main": "./extension.js", "activationEvents": [ From dee290a6d712e6b6fbfa0313db6f401e72a1a298 Mon Sep 17 00:00:00 2001 From: Taehwan Kim Date: Tue, 23 Jun 2026 09:42:31 +0900 Subject: [PATCH 20/20] fix for comment inside formattable --- rusty_lr_lsp/src/formatter.rs | 45 ++++++++++++++++++++++++++++- rusty_lr_lsp/src/goto_definition.rs | 3 +- rusty_lr_lsp/src/hover.rs | 45 ++++++++++++++++++++--------- rusty_lr_lsp/src/inlay_hint.rs | 8 +++-- rusty_lr_lsp/src/main.rs | 4 +-- 5 files changed, 83 insertions(+), 22 deletions(-) diff --git a/rusty_lr_lsp/src/formatter.rs b/rusty_lr_lsp/src/formatter.rs index f8556440..d04315f2 100644 --- a/rusty_lr_lsp/src/formatter.rs +++ b/rusty_lr_lsp/src/formatter.rs @@ -128,7 +128,8 @@ fn find_directive_semicolon(content: &str, start: usize) -> Option { let mut brace_depth = 0usize; let remaining = &content[start..]; - for (relative_idx, ch) in remaining.char_indices() { + let mut iter = remaining.char_indices().peekable(); + while let Some((relative_idx, ch)) = iter.next() { if let Some(quote_ch) = quote { if escaped { escaped = false; @@ -143,6 +144,29 @@ fn find_directive_semicolon(content: &str, start: usize) -> Option { match ch { '"' => quote = Some(ch), '\'' if is_single_quote_literal_start(remaining, relative_idx) => quote = Some(ch), + '/' => match iter.peek().copied() { + Some((_, '/')) => { + iter.next(); + while let Some((_, next_ch)) = iter.peek() { + if *next_ch == '\n' || *next_ch == '\r' { + break; + } + iter.next(); + } + } + Some((_, '*')) => { + iter.next(); + while let Some((_, next_ch)) = iter.next() { + if next_ch == '*' { + if let Some((_, '/')) = iter.peek() { + iter.next(); + break; + } + } + } + } + _ => {} + }, '(' => paren_depth += 1, ')' => paren_depth = paren_depth.saturating_sub(1), '[' => bracket_depth += 1, @@ -782,6 +806,25 @@ Rule(i32): a { assert!(formatted.contains("%tokentype\n // token type comment\n Token;")); } + #[test] + fn formats_directive_with_comments_containing_semicolons() { + // Single-line comment with semicolon + let content1 = "%%\n%token num Token::Num(_); // comment; here\n"; + let formatted1 = apply_edits(content1, formatting(content1)); + assert_eq!( + formatted1, + "%%\n%token num Token::Num(_); // comment; here\n" + ); + + // Multi-line block comment with semicolon + let content2 = "%%\n%token num Token::Num(_) /* comment; here */ ;\n"; + let formatted2 = apply_edits(content2, formatting(content2)); + assert_eq!( + formatted2, + "%%\n%token num Token::Num(_) /* comment; here */ ;\n" + ); + } + #[test] fn preserves_comments_in_parser_grammar_fixture() { let content = include_str!("../../rusty_lr_parser/src/parser/parser.rustylr"); diff --git a/rusty_lr_lsp/src/goto_definition.rs b/rusty_lr_lsp/src/goto_definition.rs index b3fb89c1..eceab5ae 100644 --- a/rusty_lr_lsp/src/goto_definition.rs +++ b/rusty_lr_lsp/src/goto_definition.rs @@ -276,8 +276,7 @@ E(_) : num assert!(def_substring.starts_with("empty_action")); let prec_def_index = grammar.find("%precedence empty_action").unwrap(); - let expected_start_pos = - crate::position::offset_to_position(grammar, prec_def_index + 12); // start of 'empty_action' + let expected_start_pos = crate::position::offset_to_position(grammar, prec_def_index + 12); // start of 'empty_action' assert_eq!(def_range.start, expected_start_pos); } } diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs index e72123f0..47acf394 100644 --- a/rusty_lr_lsp/src/hover.rs +++ b/rusty_lr_lsp/src/hover.rs @@ -107,27 +107,33 @@ fn reduce_action_brace_at_offset( for rule in &args.rules { for line in &rule.rule_lines { if let Some(reduce_action) = &line.reduce_action { - if let Some(proc_macro2::TokenTree::Group(group)) = reduce_action.clone().into_iter().next() { + if let Some(proc_macro2::TokenTree::Group(group)) = + reduce_action.clone().into_iter().next() + { if group.delimiter() == proc_macro2::Delimiter::Brace { let action_range = group.span().byte_range(); - + // Check start brace(s) - if action_range.start < content.len() && content.as_bytes()[action_range.start] == b'{' { - let start_brace_end = if action_range.start + 1 < action_range.end - && content.as_bytes()[action_range.start + 1] == b'{' + if action_range.start < content.len() + && content.as_bytes()[action_range.start] == b'{' + { + let start_brace_end = if action_range.start + 1 < action_range.end + && content.as_bytes()[action_range.start + 1] == b'{' { action_range.start + 2 } else { action_range.start + 1 }; - let start_brace_range = action_range.start .. start_brace_end; + let start_brace_range = action_range.start..start_brace_end; if start_brace_range.contains(&offset) { return Some((start_brace_range, reduce_action_documentation())); } } // Check end brace(s) - if action_range.end > action_range.start && action_range.end <= content.len() { + if action_range.end > action_range.start + && action_range.end <= content.len() + { if content.as_bytes()[action_range.end - 1] == b'}' { let end_brace_start = if action_range.end - 2 >= action_range.start && content.as_bytes()[action_range.end - 2] == b'}' @@ -136,7 +142,7 @@ fn reduce_action_brace_at_offset( } else { action_range.end - 1 }; - let end_brace_range = end_brace_start .. action_range.end; + let end_brace_range = end_brace_start..action_range.end; if end_brace_range.contains(&offset) { return Some((end_brace_range, reduce_action_documentation())); } @@ -905,11 +911,16 @@ Expr : num { 0 } panic!("expected markup hover"); }; assert!(markup_start.value.contains("### Reduce Action")); - assert!(markup_start.value.contains("A block of Rust code executed when this production rule is reduced")); + assert!(markup_start + .value + .contains("A block of Rust code executed when this production rule is reduced")); assert!(markup_start.value.contains("#reduceaction-optional")); assert_eq!( hover_start.range.unwrap(), - crate::position::range_to_lsp_range(grammar, start_brace_offset .. start_brace_offset + 1) + crate::position::range_to_lsp_range( + grammar, + start_brace_offset..start_brace_offset + 1 + ) ); // 2. Single brace end hover @@ -925,7 +936,7 @@ Expr : num { 0 } assert!(markup_end.value.contains("### Reduce Action")); assert_eq!( hover_end.range.unwrap(), - crate::position::range_to_lsp_range(grammar, end_brace_offset .. end_brace_offset + 1) + crate::position::range_to_lsp_range(grammar, end_brace_offset..end_brace_offset + 1) ); // 3. Double brace start hover (first brace) @@ -941,7 +952,10 @@ Expr : num { 0 } assert!(markup_dstart1.value.contains("### Reduce Action")); assert_eq!( hover_dstart1.range.unwrap(), - crate::position::range_to_lsp_range(grammar, dstart_brace_offset .. dstart_brace_offset + 2) + crate::position::range_to_lsp_range( + grammar, + dstart_brace_offset..dstart_brace_offset + 2 + ) ); // 4. Double brace start hover (second brace) @@ -956,7 +970,10 @@ Expr : num { 0 } assert!(markup_dstart2.value.contains("### Reduce Action")); assert_eq!( hover_dstart2.range.unwrap(), - crate::position::range_to_lsp_range(grammar, dstart_brace_offset .. dstart_brace_offset + 2) + crate::position::range_to_lsp_range( + grammar, + dstart_brace_offset..dstart_brace_offset + 2 + ) ); // 5. Double brace end hover (first of closing braces) @@ -972,7 +989,7 @@ Expr : num { 0 } assert!(markup_dend1.value.contains("### Reduce Action")); assert_eq!( hover_dend1.range.unwrap(), - crate::position::range_to_lsp_range(grammar, dend_brace_offset .. dend_brace_offset + 2) + crate::position::range_to_lsp_range(grammar, dend_brace_offset..dend_brace_offset + 2) ); } } diff --git a/rusty_lr_lsp/src/inlay_hint.rs b/rusty_lr_lsp/src/inlay_hint.rs index 649eebe8..65cdf0bc 100644 --- a/rusty_lr_lsp/src/inlay_hint.rs +++ b/rusty_lr_lsp/src/inlay_hint.rs @@ -38,7 +38,9 @@ pub fn inlay_hints(content: &str, range: Range) -> Vec { } if let Some(reduce_action) = &line.reduce_action { - if let Some(proc_macro2::TokenTree::Group(group)) = reduce_action.clone().into_iter().next() { + if let Some(proc_macro2::TokenTree::Group(group)) = + reduce_action.clone().into_iter().next() + { if group.delimiter() == proc_macro2::Delimiter::Brace { let action_range = group.span().byte_range(); if ranges_overlap( @@ -189,7 +191,9 @@ List(Vec) : $sep(E, comma, +) { E }; let tooltip = hint.tooltip.as_ref().unwrap(); match tooltip { lsp_types::InlayHintTooltip::MarkupContent(markup) => { - assert!(markup.value.contains("A block of Rust code executed when this production rule is reduced")); + assert!(markup.value.contains( + "A block of Rust code executed when this production rule is reduced" + )); assert!(markup.value.contains("#reduceaction-optional")); } _ => panic!("expected MarkupContent tooltip"), diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs index 88783134..82a6bccc 100644 --- a/rusty_lr_lsp/src/main.rs +++ b/rusty_lr_lsp/src/main.rs @@ -145,9 +145,7 @@ fn main() -> Result<(), Box> { let mut response = Response::new_ok(id.clone(), serde_json::Value::Null); if let Some(content) = documents.get(&uri) { - match catch_lsp_panic(|| { - references::find_references(content, position) - }) { + match catch_lsp_panic(|| references::find_references(content, position)) { Ok(Some(locations)) => { let mapped_locations = locations .into_iter()