diff --git a/SYNTAX.md b/SYNTAX.md index 5813f9c9..9eca8934 100644 --- a/SYNTAX.md +++ b/SYNTAX.md @@ -336,6 +336,19 @@ Expr(i32) : '('! Expr ')'! ; Defines the entry point of the grammar. RustyLR automatically creates an augmented rule `Augmented -> NonTerminalName eof`. +### Multiple Start Symbols + +You can define multiple start symbols by writing multiple `%start` directives: + +``` +%start Expr; +%start Stmt; +``` + +When multiple start symbols are defined, RustyLR generates individual wrapper structs for each start symbol (e.g., `ExprContext` and `StmtContext`). +- Initializing the context (via `ExprContext::new(...)` or `StmtContext::new(...)`) automatically transitions the parser to the correct starting state for that symbol. +- Calling `accept()` on a context wrapper returns the exact type of that start symbol. + --- ## Userdata Type (Optional) diff --git a/example/glr/src/main.rs b/example/glr/src/main.rs index c49a9e78..18dc31fe 100644 --- a/example/glr/src/main.rs +++ b/example/glr/src/main.rs @@ -78,6 +78,29 @@ fn test_parser() { assert_eq!(results, [answer]); } +#[test] +fn test_multiple_start_symbols() { + // Test parsing with the EContext entry point + { + let mut context = parser::EContext::with_default_userdata(); + for ch in "12+34".chars() { + context.feed(ch).unwrap(); + } + let (val, _) = context.accept().unwrap(); + assert_eq!(val, 46); + } + + // Test parsing with the NumberContext entry point + { + let mut context = parser::NumberContext::with_default_userdata(); + for ch in " 567 ".chars() { + context.feed(ch).unwrap(); + } + let (val, _) = context.accept().unwrap(); + assert_eq!(val, 567); + } +} + #[cfg(test)] mod userdata_branch_tests { use rusty_lr::lr1; diff --git a/example/glr/src/parser.rs b/example/glr/src/parser.rs index b66835a5..08dca992 100644 --- a/example/glr/src/parser.rs +++ b/example/glr/src/parser.rs @@ -5,6 +5,7 @@ lr1! { %glr; %tokentype char; %start E; + %start Number; WS0: ' '*; diff --git a/rusty_lr_buildscript/src/lib.rs b/rusty_lr_buildscript/src/lib.rs index b117202f..a72af040 100644 --- a/rusty_lr_buildscript/src/lib.rs +++ b/rusty_lr_buildscript/src/lib.rs @@ -581,6 +581,17 @@ impl Builder { .with_labels(labels) .with_notes(vec!["Name is reserved and cannot be used".to_string()]) } + + ArgError::DuplicateStartSymbol { location, name } => { + let range = grammar_args + .span_manager + .get_byterange(&location) + .unwrap_or(0..0); + Diagnostic::error() + .with_message(format!("Duplicate start symbol definition: `{}`", name)) + .with_labels(vec![Label::primary(file_id, range) + .with_message("duplicate start symbol defined here")]) + } }; let writer = self.stream(); diff --git a/rusty_lr_core/src/parser/data_stack.rs b/rusty_lr_core/src/parser/data_stack.rs index 3c400d36..e8c689e4 100644 --- a/rusty_lr_core/src/parser/data_stack.rs +++ b/rusty_lr_core/src/parser/data_stack.rs @@ -23,6 +23,7 @@ pub trait DataStack: Sized + Default { fn pop(&mut self); fn push_terminal(&mut self, term: Self::Term); fn push_empty(&mut self); + fn set_branch_idx(&mut self, _branch_idx: u32) {} fn clear(&mut self); fn reserve(&mut self, additional: usize); diff --git a/rusty_lr_core/src/parser/deterministic/context.rs b/rusty_lr_core/src/parser/deterministic/context.rs index 4e0e4de2..4fe0614d 100644 --- a/rusty_lr_core/src/parser/deterministic/context.rs +++ b/rusty_lr_core/src/parser/deterministic/context.rs @@ -63,6 +63,42 @@ impl< { Self::new(Default::default()) } + /// Create a new context with a virtual start branch. + pub fn new_with_branch(userdata: Data::UserData, branch_idx: u32) -> Self + where + P::Term: Clone, + P::NonTerm: std::fmt::Debug, + { + let mut ctx = Self::new(userdata); + ctx.data_stack.set_branch_idx(branch_idx); + let class = P::TermClass::from_virtual_start(branch_idx); + let shift_to = ctx.tables.shift_goto_class(0, class).unwrap_or_else(|| { + panic!( + "Failed to resolve shift for virtual start branch {}", + branch_idx + ) + }); + ctx.state_stack.push(shift_to.state); + ctx.data_stack.push_empty(); + ctx.location_stack + .push(Data::Location::new(std::iter::empty(), 0)); + #[cfg(feature = "tree")] + { + ctx.tree_stack.push(crate::tree::Tree::new_terminal( + TerminalSymbol::VirtualStart(branch_idx), + )); + } + ctx + } + /// Create a new context with a virtual start branch using `Default::default()` as user data. + pub fn with_default_userdata_and_branch(branch_idx: u32) -> Self + where + Data::UserData: Default, + P::Term: Clone, + P::NonTerm: std::fmt::Debug, + { + Self::new_with_branch(Default::default(), branch_idx) + } /// Create a new context with given capacity of `state_stack` and `data_stack`. /// `state_stack` is initialized with [0] (root state). pub fn with_capacity(capacity: usize, userdata: Data::UserData) -> Self { @@ -90,6 +126,36 @@ impl< { Self::with_capacity(capacity, Default::default()) } + /// Create a new context with capacity and a virtual start branch. + pub fn with_capacity_and_branch( + capacity: usize, + userdata: Data::UserData, + branch_idx: u32, + ) -> Self + where + P::Term: Clone, + P::NonTerm: std::fmt::Debug, + { + let mut ctx = Self::with_capacity(capacity, userdata); + let class = P::TermClass::from_virtual_start(branch_idx); + let shift_to = ctx.tables.shift_goto_class(0, class).unwrap_or_else(|| { + panic!( + "Failed to resolve shift for virtual start branch {}", + branch_idx + ) + }); + ctx.state_stack.push(shift_to.state); + ctx.data_stack.push_empty(); + ctx.location_stack + .push(Data::Location::new(std::iter::empty(), 0)); + #[cfg(feature = "tree")] + { + ctx.tree_stack.push(crate::tree::Tree::new_terminal( + TerminalSymbol::VirtualStart(branch_idx), + )); + } + ctx + } /// Borrow the user data owned by this context. pub fn userdata(&self) -> &Data::UserData { &self.userdata @@ -496,13 +562,16 @@ impl< if next_state_id.push { match term { TerminalSymbol::Terminal(t) => self.data_stack.push_terminal(t), - TerminalSymbol::Error | TerminalSymbol::Eof => self.data_stack.push_empty(), + TerminalSymbol::Error + | TerminalSymbol::Eof + | TerminalSymbol::VirtualStart(_) => self.data_stack.push_empty(), } } else { match term { - TerminalSymbol::Terminal(_) | TerminalSymbol::Error | TerminalSymbol::Eof => { - self.data_stack.push_empty() - } + TerminalSymbol::Terminal(_) + | TerminalSymbol::Error + | TerminalSymbol::Eof + | TerminalSymbol::VirtualStart(_) => self.data_stack.push_empty(), } } diff --git a/rusty_lr_core/src/parser/nondeterministic/context.rs b/rusty_lr_core/src/parser/nondeterministic/context.rs index 938c3956..bc4d1fad 100644 --- a/rusty_lr_core/src/parser/nondeterministic/context.rs +++ b/rusty_lr_core/src/parser/nondeterministic/context.rs @@ -9,6 +9,7 @@ use crate::parser::table::Index; use crate::parser::table::ParserTables; use crate::parser::terminalclass::TerminalClass; use crate::parser::Parser; +use crate::Location; use crate::TerminalSymbol; /// Iterator for traverse node to root. @@ -129,6 +130,48 @@ impl< Self::new(Default::default()) } + pub fn new_with_branch(userdata: Data::UserData, branch_idx: u32) -> Self + where + P::Term: Clone, + P::NonTerm: std::fmt::Debug, + { + let mut context = Self::new(userdata); + let class = P::TermClass::from_virtual_start(branch_idx); + let shift_to = context + .tables + .shift_goto_class(0, class) + .unwrap_or_else(|| { + panic!( + "Failed to resolve shift for virtual start branch {}", + branch_idx + ) + }); + let root_node_idx = context.current_nodes[0]; + let root_node = context.node_mut(root_node_idx); + root_node.data_stack.set_branch_idx(branch_idx); + root_node.state_stack.push(shift_to.state); + root_node.data_stack.push_empty(); + root_node + .location_stack + .push(Data::Location::new(std::iter::empty(), 0)); + #[cfg(feature = "tree")] + { + root_node.tree_stack.push(crate::tree::Tree::new_terminal( + TerminalSymbol::VirtualStart(branch_idx), + )); + } + context + } + + pub fn with_default_userdata_and_branch(branch_idx: u32) -> Self + where + Data::UserData: Default, + P::Term: Clone, + P::NonTerm: std::fmt::Debug, + { + Self::new_with_branch(Default::default(), branch_idx) + } + /// Borrow the user data for the first active path. /// /// In GLR mode, each forked branch owns an independently cloned user data value. @@ -915,7 +958,9 @@ impl< TerminalSymbol::Terminal(term) => { node_.data_stack.push_terminal(term); } - TerminalSymbol::Error | TerminalSymbol::Eof => { + TerminalSymbol::Error + | TerminalSymbol::Eof + | TerminalSymbol::VirtualStart(_) => { node_.data_stack.push_empty(); } } @@ -923,7 +968,8 @@ impl< match term { TerminalSymbol::Terminal(_) | TerminalSymbol::Error - | TerminalSymbol::Eof => { + | TerminalSymbol::Eof + | TerminalSymbol::VirtualStart(_) => { node_.data_stack.push_empty(); } } @@ -951,13 +997,18 @@ impl< TerminalSymbol::Terminal(term) => { node_.data_stack.push_terminal(term); } - TerminalSymbol::Error | TerminalSymbol::Eof => { + TerminalSymbol::Error + | TerminalSymbol::Eof + | TerminalSymbol::VirtualStart(_) => { node_.data_stack.push_empty(); } } } else { match term { - TerminalSymbol::Terminal(_) | TerminalSymbol::Error | TerminalSymbol::Eof => { + TerminalSymbol::Terminal(_) + | TerminalSymbol::Error + | TerminalSymbol::Eof + | TerminalSymbol::VirtualStart(_) => { node_.data_stack.push_empty(); } } diff --git a/rusty_lr_core/src/parser/terminalclass.rs b/rusty_lr_core/src/parser/terminalclass.rs index 06fe6cb9..e9f420a0 100644 --- a/rusty_lr_core/src/parser/terminalclass.rs +++ b/rusty_lr_core/src/parser/terminalclass.rs @@ -11,4 +11,9 @@ pub trait TerminalClass: Copy { fn to_usize(&self) -> usize; fn from_term(term: &Self::Term) -> Self; + + /// Gets the terminal class for a virtual start branch. + fn from_virtual_start(_branch_idx: u32) -> Self { + panic!("from_virtual_start not supported on this terminal class") + } } diff --git a/rusty_lr_core/src/symbol.rs b/rusty_lr_core/src/symbol.rs index 6eb38fe9..d2f6d0bb 100644 --- a/rusty_lr_core/src/symbol.rs +++ b/rusty_lr_core/src/symbol.rs @@ -7,9 +7,10 @@ use std::hash::Hash; /// and future support for other special tokens. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum TerminalSymbol { - Terminal(Term), // index in the terminals vector - Error, // error token - Eof, // end of file token + Terminal(Term), // index in the terminals vector + Error, // error token + Eof, // end of file token + VirtualStart(u32), // virtual start branch token for multiple start symbols } impl TerminalSymbol { pub fn is_error(&self) -> bool { @@ -21,20 +22,21 @@ impl TerminalSymbol { pub fn is_eof(&self) -> bool { matches!(self, TerminalSymbol::Eof) } + pub fn is_virtual_start(&self) -> bool { + matches!(self, TerminalSymbol::VirtualStart(_)) + } /// converts self to a terminal if it is a `Terminal` variant, otherwise returns `None`. pub fn to_term(&self) -> Option<&Term> { match self { TerminalSymbol::Terminal(term) => Some(term), - TerminalSymbol::Error => None, - TerminalSymbol::Eof => None, + _ => None, } } /// converts self to a terminal if it is a `Terminal` variant, otherwise returns `None`. pub fn into_term(self) -> Option { match self { TerminalSymbol::Terminal(term) => Some(term), - TerminalSymbol::Error => None, - TerminalSymbol::Eof => None, + _ => None, } } } @@ -45,6 +47,7 @@ impl std::fmt::Display for TerminalSymbol { TerminalSymbol::Terminal(term) => write!(f, "{}", term), TerminalSymbol::Error => write!(f, "error"), TerminalSymbol::Eof => write!(f, "eof"), + TerminalSymbol::VirtualStart(i) => write!(f, "start_branch_{}", i), } } } diff --git a/rusty_lr_parser/Cargo.toml b/rusty_lr_parser/Cargo.toml index a7c61167..6a3ae1ec 100644 --- a/rusty_lr_parser/Cargo.toml +++ b/rusty_lr_parser/Cargo.toml @@ -15,7 +15,7 @@ quote = "1.0" rusty_lr_core = { version = "4.2.0", path = "../rusty_lr_core", features = [ "builder", ] } -syn = { version = "2.0", features = ["extra-traits"] } +syn = { version = "2.0", features = ["extra-traits", "full"] } [features] default = [] diff --git a/rusty_lr_parser/src/emit.rs b/rusty_lr_parser/src/emit.rs index b51cf598..ef9214fb 100644 --- a/rusty_lr_parser/src/emit.rs +++ b/rusty_lr_parser/src/emit.rs @@ -81,40 +81,304 @@ impl Grammar { rule_index_type.clone() }; - if self.glr { - stream.extend( - quote! { - /// type alias for `Context` - #[allow(non_camel_case_types,dead_code)] - pub type #context_struct_name = #module_prefix::parser::nondeterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename, #max_reduce_rules>; - /// type alias for CFG production rule + let start_type_enum_name = format_ident!("{}StartType", &self.start_rule_name.value()); + + if self.start_rule_names.len() > 1 { + let mut context_structs = TokenStream::new(); + for (branch_idx, start_rule_name) in self.start_rule_names.iter().enumerate() { + let ctx_name = format_ident!("{}Context", start_rule_name.value()); + let start_rule_ident = format_ident!("{}", start_rule_name.value()); + let start_idx = *self + .nonterminals_index + .get(start_rule_name.value()) + .unwrap(); + let s_ruletype = self.nonterminals[start_idx] + .ruletype + .as_ref() + .unwrap_or("e! {()}) + .clone(); + let branch_idx_u32 = branch_idx as u32; + + if self.glr { + context_structs.extend(quote! { + #[allow(non_camel_case_types, dead_code)] + pub struct #ctx_name { + inner: #module_prefix::parser::nondeterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename, #max_reduce_rules>, + } + impl #ctx_name { + pub fn new(userdata: <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData) -> Self { + Self { + inner: #module_prefix::parser::nondeterministic::Context::new_with_branch(userdata, #branch_idx_u32), + } + } + pub fn with_default_userdata() -> Self + where + <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData: Default, + { + Self { + inner: #module_prefix::parser::nondeterministic::Context::with_default_userdata_and_branch(#branch_idx_u32), + } + } + pub fn feed(&mut self, term: #token_typename) -> Result<(), #parse_error_typename> { + self.inner.feed(term) + } + pub fn feed_location(&mut self, term: #token_typename, location: #location_typename) -> Result<(), #parse_error_typename> { + self.inner.feed_location(term, location) + } + pub fn can_feed(&self, term: &#token_typename) -> bool { + self.inner.can_feed(term) + } + pub fn accept(self) -> Result<(#s_ruletype, <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData), #parse_error_typename> { + match self.inner.accept() { + Ok((#start_type_enum_name::#start_rule_ident(val), userdata)) => Ok((val, userdata)), + _ => unreachable!(), + } + } + pub fn accept_all(self) -> Result::UserData)>, #parse_error_typename> { + match self.inner.accept_all() { + Ok(iter) => Ok(iter.map(|(start_val, userdata)| match start_val { + #start_type_enum_name::#start_rule_ident(val) => (val, userdata), + _ => unreachable!(), + })), + Err(err) => Err(err), + } + } + pub fn len_paths(&self) -> usize { + self.inner.len_paths() + } + pub fn debug_check(&self) { + self.inner.debug_check(); + } + pub fn expected_token(&self) -> (::std::collections::BTreeSet<#termclass_typename>, ::std::collections::BTreeSet<#nonterm_typename>) { + self.inner.expected_token() + } + pub fn expected_token_str(&self) -> (impl Iterator, impl Iterator) { + self.inner.expected_token_str() + } + pub fn userdata(&self) -> &<#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData { + self.inner.userdata() + } + pub fn userdata_mut(&mut self) -> &mut <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData { + self.inner.userdata_mut() + } + } + impl ::std::ops::Deref for #ctx_name { + type Target = #module_prefix::parser::nondeterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename, #max_reduce_rules>; + fn deref(&self) -> &Self::Target { + &self.inner + } + } + impl ::std::ops::DerefMut for #ctx_name { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } + } + impl Clone for #ctx_name + where + #module_prefix::parser::nondeterministic::Node<#data_stack_typename, #state_index_typename>: Clone, + #reduce_error_typename: Clone, + <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData: Clone + Default, + { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } + } + impl Default for #ctx_name + where + <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData: Default, + { + fn default() -> Self { + Self::with_default_userdata() + } + } + impl std::fmt::Display for #ctx_name + where + #module_prefix::parser::nondeterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename, #max_reduce_rules>: std::fmt::Display, + { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.inner, f) + } + } + impl std::fmt::Debug for #ctx_name + where + #module_prefix::parser::nondeterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename, #max_reduce_rules>: std::fmt::Debug, + { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.inner, f) + } + } + }); + } else { + context_structs.extend(quote! { + #[allow(non_camel_case_types, dead_code)] + pub struct #ctx_name { + inner: #module_prefix::parser::deterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename>, + } + impl #ctx_name { + pub fn new(userdata: <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData) -> Self { + Self { + inner: #module_prefix::parser::deterministic::Context::new_with_branch(userdata, #branch_idx_u32), + } + } + pub fn with_default_userdata() -> Self + where + <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData: Default, + { + Self { + inner: #module_prefix::parser::deterministic::Context::with_default_userdata_and_branch(#branch_idx_u32), + } + } + pub fn with_capacity(capacity: usize, userdata: <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData) -> Self { + Self { + inner: #module_prefix::parser::deterministic::Context::with_capacity_and_branch(capacity, userdata, #branch_idx_u32), + } + } + pub fn with_capacity_and_default_userdata(capacity: usize) -> Self + where + <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData: Default, + { + Self { + inner: #module_prefix::parser::deterministic::Context::with_capacity_and_branch(capacity, Default::default(), #branch_idx_u32), + } + } + pub fn feed(&mut self, term: #token_typename) -> Result<(), #parse_error_typename> { + self.inner.feed(term) + } + pub fn feed_location(&mut self, term: #token_typename, location: #location_typename) -> Result<(), #parse_error_typename> { + self.inner.feed_location(term, location) + } + pub fn can_feed(&self, term: &#token_typename) -> bool { + self.inner.can_feed(term) + } + pub fn accept(self) -> Result<(#s_ruletype, <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData), #parse_error_typename> { + match self.inner.accept() { + Ok((#start_type_enum_name::#start_rule_ident(val), userdata)) => Ok((val, userdata)), + _ => unreachable!(), + } + } + pub fn expected_token(&self) -> (::std::collections::BTreeSet<#termclass_typename>, ::std::collections::BTreeSet<#nonterm_typename>) { + self.inner.expected_token() + } + pub fn expected_token_str(&self) -> (impl Iterator, impl Iterator) { + self.inner.expected_token_str() + } + pub fn userdata(&self) -> &<#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData { + self.inner.userdata() + } + pub fn userdata_mut(&mut self) -> &mut <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData { + self.inner.userdata_mut() + } + } + impl ::std::ops::Deref for #ctx_name { + type Target = #module_prefix::parser::deterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename>; + fn deref(&self) -> &Self::Target { + &self.inner + } + } + impl ::std::ops::DerefMut for #ctx_name { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } + } + impl Clone for #ctx_name + where + <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData: Clone, + #token_typename: Clone, + #nonterm_typename: Clone, + { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } + } + impl Default for #ctx_name + where + <#data_stack_typename as #module_prefix::parser::data_stack::DataStack>::UserData: Default, + { + fn default() -> Self { + Self::with_default_userdata() + } + } + impl std::fmt::Display for #ctx_name + where + #module_prefix::parser::deterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename>: std::fmt::Display, + { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.inner, f) + } + } + impl std::fmt::Debug for #ctx_name + where + #module_prefix::parser::deterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename>: std::fmt::Debug, + { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.inner, f) + } + } + }); + } + } + stream.extend(context_structs); + + // Also emit other aliases (excluding Context itself) + if self.glr { + stream.extend(quote! { #[allow(non_camel_case_types,dead_code)] pub type #rule_typename = #module_prefix::production::Production<#termclass_typename, #nonterm_typename>; - /// type alias for runtime parser tables #[allow(non_camel_case_types,dead_code)] pub type #tables_typename = #module_prefix::parser::table::#table_structname<#termclass_typename, #nonterm_typename, #rule_container_type, #state_index_typename>; - /// type alias for `InvalidTerminalError` #[allow(non_camel_case_types,dead_code)] pub type #parse_error_typename = #module_prefix::parser::nondeterministic::ParseError<#token_typename, #location_typename, #reduce_error_typename>; - } - ); + }); + } else { + stream.extend(quote! { + #[allow(non_camel_case_types,dead_code)] + pub type #rule_typename = #module_prefix::production::Production<#termclass_typename, #nonterm_typename>; + #[allow(non_camel_case_types,dead_code)] + pub type #tables_typename = #module_prefix::parser::table::#table_structname<#termclass_typename, #nonterm_typename, #rule_container_type, #state_index_typename>; + #[allow(non_camel_case_types,dead_code)] + pub type #parse_error_typename = #module_prefix::parser::deterministic::ParseError<#token_typename, #location_typename, #reduce_error_typename>; + }); + } } else { - stream.extend( - quote! { - /// type alias for `Context` - #[allow(non_camel_case_types,dead_code)] - pub type #context_struct_name = #module_prefix::parser::deterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename>; - /// type alias for CFG production rule - #[allow(non_camel_case_types,dead_code)] - pub type #rule_typename = #module_prefix::production::Production<#termclass_typename, #nonterm_typename>; - /// type alias for runtime parser tables - #[allow(non_camel_case_types,dead_code)] - pub type #tables_typename = #module_prefix::parser::table::#table_structname<#termclass_typename, #nonterm_typename, #rule_container_type, #state_index_typename>; - /// type alias for `ParseError` - #[allow(non_camel_case_types,dead_code)] - pub type #parse_error_typename = #module_prefix::parser::deterministic::ParseError<#token_typename, #location_typename, #reduce_error_typename>; + if self.glr { + stream.extend( + quote! { + /// type alias for `Context` + #[allow(non_camel_case_types,dead_code)] + pub type #context_struct_name = #module_prefix::parser::nondeterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename, #max_reduce_rules>; + /// type alias for CFG production rule + #[allow(non_camel_case_types,dead_code)] + pub type #rule_typename = #module_prefix::production::Production<#termclass_typename, #nonterm_typename>; + /// type alias for runtime parser tables + #[allow(non_camel_case_types,dead_code)] + pub type #tables_typename = #module_prefix::parser::table::#table_structname<#termclass_typename, #nonterm_typename, #rule_container_type, #state_index_typename>; + /// type alias for `InvalidTerminalError` + #[allow(non_camel_case_types,dead_code)] + pub type #parse_error_typename = #module_prefix::parser::nondeterministic::ParseError<#token_typename, #location_typename, #reduce_error_typename>; + } + ); + } else { + stream.extend( + quote! { + /// type alias for `Context` + #[allow(non_camel_case_types,dead_code)] + pub type #context_struct_name = #module_prefix::parser::deterministic::Context<#parser_struct_name, #data_stack_typename, #state_index_typename>; + /// type alias for CFG production rule + #[allow(non_camel_case_types,dead_code)] + pub type #rule_typename = #module_prefix::production::Production<#termclass_typename, #nonterm_typename>; + /// type alias for runtime parser tables + #[allow(non_camel_case_types,dead_code)] + pub type #tables_typename = #module_prefix::parser::table::#table_structname<#termclass_typename, #nonterm_typename, #rule_container_type, #state_index_typename>; + /// type alias for `ParseError` + #[allow(non_camel_case_types,dead_code)] + pub type #parse_error_typename = #module_prefix::parser::deterministic::ParseError<#token_typename, #location_typename, #reduce_error_typename>; + } + ); } - ); } } @@ -264,7 +528,16 @@ impl Grammar { }); } - let max_variants = self.terminal_classes.len() + 2; + let mut virtual_start_variants = Vec::new(); + let mut branch_indices = Vec::new(); + if self.start_rule_names.len() > 1 { + for i in 0..self.start_rule_names.len() { + virtual_start_variants.push(format_ident!("VirtualStart{}", i)); + branch_indices.push(i as u32); + } + } + + let max_variants = self.terminal_classes.len() + 2 + virtual_start_variants.len(); stream.extend(quote! { /// A enum that represents terminal classes @@ -273,9 +546,10 @@ impl Grammar { // repr(usize) is used to ensure a stable memory layout compatible with integer casting and transmutes #[repr(usize)] pub enum #termclass_typename { - #(#class_variants),*, + #(#class_variants,)* #error_name, #eof_name, + #(#virtual_start_variants,)* } impl #termclass_typename { @@ -298,6 +572,7 @@ impl Grammar { #as_str_match_stream #termclass_typename::#error_name => "error", #termclass_typename::#eof_name => "eof", + #( #termclass_typename::#virtual_start_variants => "virtual_start", )* } } fn to_usize(&self) -> usize { @@ -310,6 +585,13 @@ impl Grammar { #from_term_match_stream } } + + fn from_virtual_start(branch_idx: u32) -> Self { + match branch_idx { + #( #branch_indices => Self::#virtual_start_variants, )* + _ => panic!("Invalid virtual start branch index: {}", branch_idx), + } + } } impl std::fmt::Display for #termclass_typename { @@ -535,6 +817,7 @@ impl Grammar { TerminalSymbol::Terminal(t) => t, TerminalSymbol::Error => self.terminal_classes.len(), TerminalSymbol::Eof => self.terminal_classes.len() + 1, + TerminalSymbol::VirtualStart(i) => self.terminal_classes.len() + 2 + i as usize, }; let state_idx = next_state.state; let push = next_state.push; @@ -579,6 +862,9 @@ impl Grammar { TerminalSymbol::Terminal(t) => *t, TerminalSymbol::Error => self.terminal_classes.len(), TerminalSymbol::Eof => self.terminal_classes.len() + 1, + TerminalSymbol::VirtualStart(i) => { + self.terminal_classes.len() + 2 + *i as usize + } }; assert!( term_idx < 32768, @@ -871,7 +1157,9 @@ impl Grammar { &empty_variant_name } } - TerminalSymbol::Error | TerminalSymbol::Eof => &empty_variant_name, + TerminalSymbol::Error | TerminalSymbol::Eof | TerminalSymbol::VirtualStart(_) => { + &empty_variant_name + } }, Symbol::NonTerminal(nonterm_idx) => &variant_names_for_nonterm[nonterm_idx], }; @@ -1388,49 +1676,126 @@ impl Grammar { } } - let start_idx = *self - .nonterminals_index - .get(self.start_rule_name.value()) - .unwrap(); - let start_variant_name = &variant_names_for_nonterm[start_idx]; - // Generate the pop_start implementation. - // At the time of acceptance, the stack contains the EOF token (which has no data, i.e., Empty) - // on top of the actual start symbol's value. We pop the EOF token first, and then retrieve the start value. - let (start_typename, pop_start) = if start_variant_name != &empty_variant_name { - let ruletype = self.nonterminals[start_idx] - .ruletype - .as_ref() - .unwrap() - .clone(); + let (start_typename, pop_start) = if self.start_rule_names.len() > 1 { + let start_type_enum_name = format_ident!("{}StartType", &self.start_rule_name.value()); + let mut enum_variants = TokenStream::new(); + let mut match_arms = TokenStream::new(); + + for (branch_idx, start_rule_name) in self.start_rule_names.iter().enumerate() { + let s_idx = *self + .nonterminals_index + .get(start_rule_name.value()) + .unwrap(); + let s_variant_name = &variant_names_for_nonterm[s_idx]; + let s_ruletype = self.nonterminals[s_idx] + .ruletype + .as_ref() + .unwrap_or("e! {()}) + .clone(); + let s_variant_ident = format_ident!("{}", start_rule_name.value()); + let branch_idx_u32 = branch_idx as u32; - let is_start_boxed = self.nonterminals[start_idx].ruletype_boxed; - let val_expr = if is_start_boxed { - quote! { *val } - } else { - quote! { val } - }; + enum_variants.extend(quote! { + #s_variant_ident(#s_ruletype), + }); - ( - ruletype, - quote! { - self.__stack.pop(); - match self.__stack.pop() { - Some(#data_enum_typename::#start_variant_name(val)) => Some(#val_expr), + let s_val_expr = if self.nonterminals[s_idx].ruletype_boxed { + quote! { *val } + } else { + quote! { val } + }; + + let arm_body = if s_variant_name != &empty_variant_name { + quote! { + Some(#data_enum_typename::#s_variant_name(val)) => { + let res = Some(#start_type_enum_name::#s_variant_ident(#s_val_expr)); + self.__stack.pop(); // pop VirtualStart(i) + res + } _ => None, } - }, - ) - } else { + } else { + quote! { + Some(#data_enum_typename::Empty) => { + let res = Some(#start_type_enum_name::#s_variant_ident(())); + self.__stack.pop(); // pop VirtualStart(i) + res + } + _ => None, + } + }; + + match_arms.extend(quote! { + #branch_idx_u32 => { + match start_val { + #arm_body + } + } + }); + } + + stream.extend(quote! { + /// Sum-type for all start symbols returned by the parser + #[derive(Debug)] + #[allow(non_camel_case_types)] + pub enum #start_type_enum_name { + #enum_variants + } + }); + ( - quote! {()}, + quote! { #start_type_enum_name }, quote! { - self.__stack.pop(); - match self.__stack.pop() { - Some(#data_enum_typename::Empty) => Some(()), + self.__stack.pop(); // pop EOF + let start_val = self.__stack.pop(); + match self.branch_idx { + #match_arms _ => None, } }, ) + } else { + let start_idx = *self + .nonterminals_index + .get(self.start_rule_name.value()) + .unwrap(); + let start_variant_name = &variant_names_for_nonterm[start_idx]; + if start_variant_name != &empty_variant_name { + let ruletype = self.nonterminals[start_idx] + .ruletype + .as_ref() + .unwrap() + .clone(); + + let is_start_boxed = self.nonterminals[start_idx].ruletype_boxed; + let val_expr = if is_start_boxed { + quote! { *val } + } else { + quote! { val } + }; + + ( + ruletype, + quote! { + self.__stack.pop(); + match self.__stack.pop() { + Some(#data_enum_typename::#start_variant_name(val)) => Some(#val_expr), + _ => None, + } + }, + ) + } else { + ( + quote! {()}, + quote! { + self.__stack.pop(); + match self.__stack.pop() { + Some(#data_enum_typename::Empty) => Some(()), + _ => None, + } + }, + ) + } }; let derive_clone_for_glr = if self.glr { @@ -1494,12 +1859,14 @@ impl Grammar { #derive_clone_for_glr pub struct #data_stack_typename { pub __stack: Vec<#data_enum_typename>, + pub branch_idx: u32, } impl Default for #data_stack_typename { fn default() -> Self { Self { __stack: Vec::new(), + branch_idx: 0, } } } @@ -1533,6 +1900,9 @@ impl Grammar { fn push_empty(&mut self) { #push_empty_body_stream } + fn set_branch_idx(&mut self, branch_idx: u32) { + self.branch_idx = branch_idx; + } // Trait operations like clear, split_off, truncate, and append are highly simplified // and efficient because they only need to perform a single vector operation on the unified stack. @@ -1546,6 +1916,7 @@ impl Grammar { fn split_off(&mut self, at: usize) -> Self { Self { __stack: self.__stack.split_off(at), + branch_idx: self.branch_idx, } } fn truncate(&mut self, at: usize) { diff --git a/rusty_lr_parser/src/error.rs b/rusty_lr_parser/src/error.rs index 87314149..e8c89173 100644 --- a/rusty_lr_parser/src/error.rs +++ b/rusty_lr_parser/src/error.rs @@ -43,6 +43,12 @@ pub enum ArgError { /// can't use reserved keyword as token name ReservedName(Vec>), + + /// Duplicate start symbol defined + DuplicateStartSymbol { + location: Location, + name: String, + }, } #[derive(Debug)] @@ -163,6 +169,7 @@ impl ArgError { | ArgError::MultipleDPrecDefinition(locs) => locs.clone(), ArgError::MultipleNameDefinition(_, locs) => locs.clone(), ArgError::ReservedName(names) => names.iter().map(|name| name.location()).collect(), + ArgError::DuplicateStartSymbol { location, .. } => vec![*location], _ => vec![Location::default()], } } @@ -187,6 +194,9 @@ impl ArgError { format!("Duplicated name for terminal or non-terminal: {}", name) } ArgError::ReservedName(_) => "This name is reserved and cannot be used".into(), + ArgError::DuplicateStartSymbol { name, .. } => { + format!("Duplicate start symbol definition: `{}`", name) + } } } } @@ -616,6 +626,7 @@ impl Info { TerminalSymbol::Terminal(t) => grammar.class_pretty_name_abbr(*t), TerminalSymbol::Error => "error".to_string(), TerminalSymbol::Eof => "$".to_string(), + TerminalSymbol::VirtualStart(i) => format!("start_branch_{}", i), }; format!("%allow {}({});", self.name(), term_name) } @@ -625,6 +636,7 @@ impl Info { TerminalSymbol::Terminal(t) => grammar.class_pretty_name_abbr(*t), TerminalSymbol::Error => "error".to_string(), TerminalSymbol::Eof => "$".to_string(), + TerminalSymbol::VirtualStart(i) => format!("start_branch_{}", i), }; return format!("%allow {}({});", self.name(), term_name); } diff --git a/rusty_lr_parser/src/grammar.rs b/rusty_lr_parser/src/grammar.rs index 46cf5e7c..e2feeb05 100644 --- a/rusty_lr_parser/src/grammar.rs +++ b/rusty_lr_parser/src/grammar.rs @@ -74,6 +74,7 @@ pub struct Grammar { /// %start pub(crate) start_rule_name: Located, + pub(crate) start_rule_names: Vec>, pub terminals: Vec, /// ident -> index map for terminals @@ -177,6 +178,7 @@ impl Grammar { scopes: &[Option], ) -> bool { match term { + TerminalSymbol::VirtualStart(_) => return true, TerminalSymbol::Error => { for opt_target in scopes { match opt_target { @@ -491,14 +493,18 @@ impl Grammar { // %start if grammar_args.start_rule_name.is_empty() { return Err(ArgError::StartNotDefined); - } else if grammar_args.start_rule_name.len() > 1 { - return Err(ArgError::MultipleStartDefinition( - grammar_args - .start_rule_name - .iter() - .map(|start| start.location()) - .collect(), - )); + } + { + let mut seen_starts = HashSet::default(); + for start_rule_name in &grammar_args.start_rule_name { + let name = start_rule_name.value(); + if !seen_starts.insert(name.clone()) { + return Err(ArgError::DuplicateStartSymbol { + location: start_rule_name.location(), + name: name.clone(), + }); + } + } } // %prec and %dprec in each production rules @@ -1097,7 +1103,8 @@ impl Grammar { userdata_typename: grammar_args.userdata_typename.into_iter().next().unwrap().1, error_typename, - start_rule_name: grammar_args.start_rule_name.into_iter().next().unwrap(), + start_rule_name: grammar_args.start_rule_name.first().unwrap().clone(), + start_rule_names: grammar_args.start_rule_name.clone(), terminals: Default::default(), terminals_index: Default::default(), @@ -1406,8 +1413,8 @@ impl Grammar { return Err(ParseError::PrecedenceNotDefined(prec.clone())); } } - TerminalSymbol::Eof => { - unreachable!("eof token cannot be used in %prec, nor cannot be used in production rules") + TerminalSymbol::Eof | TerminalSymbol::VirtualStart(_) => { + unreachable!("eof/virtual start token cannot be used in %prec, nor cannot be used in production rules") } } } else { @@ -1443,8 +1450,8 @@ impl Grammar { break; } } - TerminalSymbol::Eof => { - unreachable!("eof token cannot be used in %prec, nor cannot be used in production rules, this case must be filtered out in parsing stage") + TerminalSymbol::Eof | TerminalSymbol::VirtualStart(_) => { + unreachable!("eof/virtual start token cannot be used in %prec, nor cannot be used in production rules, this case must be filtered out in parsing stage") } } } @@ -1886,57 +1893,98 @@ impl Grammar { } } - // check start rule is valid - if !grammar - .nonterminals_index - .contains_key(grammar.start_rule_name.value()) - { - return Err(ParseError::StartNonTerminalNotDefined( - grammar.start_rule_name.location(), - )); + // check start rules are valid + for start_rule_name in &grammar.start_rule_names { + let name = start_rule_name.value(); + if !grammar.nonterminals_index.contains_key(name) { + return Err(ParseError::StartNonTerminalNotDefined( + start_rule_name.location(), + )); + } } // insert augmented rule { let augmented_name = Located::new(utils::AUGMENTED_NAME.to_string(), Location::CallSite); - let start_idx = grammar - .nonterminals_index - .get(grammar.start_rule_name.value()) - .unwrap(); - let augmented_rule = Rule { - tokens: vec![ - MappedSymbol { - symbol: Symbol::NonTerminal(*start_idx), - mapto: None, - location: Location::CallSite, - reduce_action_chains: Vec::new(), - }, - MappedSymbol { - symbol: Symbol::Terminal(TerminalSymbol::Eof), - mapto: None, - location: Location::CallSite, - reduce_action_chains: Vec::new(), - }, - ], - reduce_action: None, - separator_location: Location::CallSite, - prec: None, - dprec: None, - is_used: true, - }; + let mut augmented_rules = Vec::new(); + + if grammar.start_rule_names.len() > 1 { + for (i, start_rule_name) in grammar.start_rule_names.iter().enumerate() { + let start_idx = grammar + .nonterminals_index + .get(start_rule_name.value()) + .unwrap(); + let rule = Rule { + tokens: vec![ + MappedSymbol { + symbol: Symbol::Terminal(TerminalSymbol::VirtualStart(i as u32)), + mapto: None, + location: Location::CallSite, + reduce_action_chains: Vec::new(), + }, + MappedSymbol { + symbol: Symbol::NonTerminal(*start_idx), + mapto: None, + location: Location::CallSite, + reduce_action_chains: Vec::new(), + }, + MappedSymbol { + symbol: Symbol::Terminal(TerminalSymbol::Eof), + mapto: None, + location: Location::CallSite, + reduce_action_chains: Vec::new(), + }, + ], + reduce_action: None, + separator_location: Location::CallSite, + prec: None, + dprec: None, + is_used: true, + }; + augmented_rules.push(rule); + grammar.nonterminals[*start_idx].protected = true; + } + } else { + let start_idx = grammar + .nonterminals_index + .get(grammar.start_rule_name.value()) + .unwrap(); + let augmented_rule = Rule { + tokens: vec![ + MappedSymbol { + symbol: Symbol::NonTerminal(*start_idx), + mapto: None, + location: Location::CallSite, + reduce_action_chains: Vec::new(), + }, + MappedSymbol { + symbol: Symbol::Terminal(TerminalSymbol::Eof), + mapto: None, + location: Location::CallSite, + reduce_action_chains: Vec::new(), + }, + ], + reduce_action: None, + separator_location: Location::CallSite, + prec: None, + dprec: None, + is_used: true, + }; + augmented_rules.push(augmented_rule); + grammar.nonterminals[*start_idx].protected = true; + } + let nonterminal_info = NonTerminalInfo { name: augmented_name.clone(), pretty_name: utils::AUGMENTED_NAME.to_string(), ruletype: None, ruletype_boxed: false, root_location: None, - rules: vec![augmented_rule], + rules: augmented_rules, protected: true, nonterm_type: Some(rusty_lr_core::parser::nonterminal::NonTerminalType::Augmented), }; - // start rule is protected - grammar.nonterminals[*start_idx].protected = true; let augmented_idx = grammar.nonterminals.len(); grammar.nonterminals.push(nonterminal_info); @@ -2226,14 +2274,16 @@ impl Grammar { self.other_used = other_was_used; } - // Reachability analysis from the start symbol + // Reachability analysis from the start symbols let mut nonterm_used = vec![false; self.nonterminals.len()]; let mut queue = Vec::new(); - let start_idx_opt = self.nonterminals_index.get(self.start_rule_name.value()); - if let Some(&start_idx) = start_idx_opt { - nonterm_used[start_idx] = true; - queue.push(start_idx); + for start_rule_name in &self.start_rule_names { + let start_idx_opt = self.nonterminals_index.get(start_rule_name.value()); + if let Some(&start_idx) = start_idx_opt { + nonterm_used[start_idx] = true; + queue.push(start_idx); + } } // Also queue protected non-terminals @@ -2490,16 +2540,17 @@ impl Grammar { // check if RuleType and ReduceAction can be removed from certain non-terminals let mut add_to_diags = BTreeSet::new(); loop { - let start_rule_idx = *self - .nonterminals_index - .get(self.start_rule_name.value()) - .unwrap(); + let start_rule_indices: std::collections::HashSet = self + .start_rule_names + .iter() + .map(|name| *self.nonterminals_index.get(name.value()).unwrap()) + .collect(); let mut changed = false; let mut can_removes = Vec::new(); for (i, nonterm) in self.nonterminals.iter().enumerate() { - if i == start_rule_idx { - // do not remove ruletype from start rule + if start_rule_indices.contains(&i) { + // do not remove ruletype from start rules continue; } @@ -2739,6 +2790,7 @@ impl Grammar { match class { TerminalSymbol::Error => return "error".to_string(), TerminalSymbol::Eof => return "eof".to_string(), + TerminalSymbol::VirtualStart(i) => return format!("VirtualStart({})", i), TerminalSymbol::Terminal(class_idx) => { let class = &self.terminal_classes[class_idx]; let len: usize = class @@ -3061,6 +3113,9 @@ impl Grammar { TerminalSymbol::Terminal(t) => t, TerminalSymbol::Error => self.terminal_classes.len(), TerminalSymbol::Eof => self.terminal_classes.len() + 1, + TerminalSymbol::VirtualStart(i) => { + self.terminal_classes.len() + 2 + i as usize + } } }; @@ -4322,4 +4377,49 @@ mod tests { "Expected range 'b'-'d' to be registered and split in terminals" ); } + + #[test] + fn test_multiple_start_symbols() { + let input = quote! { + %tokentype char; + %start Expr; + %start Stmt; + + Expr : 'a'; + Stmt : 'b'; + }; + + let grammar_args = Grammar::parse_args(input).expect("Failed to parse grammar"); + assert_eq!(grammar_args.start_rule_name.len(), 2); + assert_eq!(grammar_args.start_rule_name[0].value(), "Expr"); + assert_eq!(grammar_args.start_rule_name[1].value(), "Stmt"); + + let grammar = + Grammar::from_grammar_args(grammar_args).expect("Failed to construct grammar"); + assert_eq!(grammar.start_rule_names.len(), 2); + assert_eq!(grammar.start_rule_names[0].value(), "Expr"); + assert_eq!(grammar.start_rule_names[1].value(), "Stmt"); + } + + #[test] + fn test_duplicate_start_symbol() { + let input = quote! { + %tokentype char; + %start Expr; + %start Expr; + + Expr : 'a'; + }; + + let grammar_args = Grammar::parse_args(input).expect("Failed to parse grammar"); + let err = match Grammar::arg_check_error(&grammar_args) { + Ok(_) => panic!("Expected arg check to fail"), + Err(e) => e, + }; + assert!( + matches!(err, ArgError::DuplicateStartSymbol { ref name, .. } if name == "Expr"), + "Expected DuplicateStartSymbol error, got {:?}", + err + ); + } } diff --git a/rusty_lr_parser/src/parser/parser_expanded.rs b/rusty_lr_parser/src/parser/parser_expanded.rs index 62644795..16081566 100644 --- a/rusty_lr_parser/src/parser/parser_expanded.rs +++ b/rusty_lr_parser/src/parser/parser_expanded.rs @@ -393,6 +393,11 @@ impl ::rusty_lr_core::parser::terminalclass::TerminalClass for GrammarTerminalCl _ => GrammarTerminalClasses::TermClass1, } } + fn from_virtual_start(branch_idx: u32) -> Self { + match branch_idx { + _ => panic!("Invalid virtual start branch index: {}", branch_idx), + } + } } impl std::fmt::Display for GrammarTerminalClasses { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -613,11 +618,13 @@ pub enum GrammarData { #[allow(unused_braces, unused_parens, non_snake_case, non_camel_case_types)] pub struct GrammarDataStack { pub __stack: Vec, + pub branch_idx: u32, } impl Default for GrammarDataStack { fn default() -> Self { Self { __stack: Vec::new(), + branch_idx: 0, } } } @@ -5874,6 +5881,9 @@ impl ::rusty_lr_core::parser::data_stack::DataStack for GrammarDataStack { fn push_empty(&mut self) { self.__stack.push(GrammarData::Empty); } + fn set_branch_idx(&mut self, branch_idx: u32) { + self.branch_idx = branch_idx; + } fn clear(&mut self) { self.__stack.clear(); } @@ -5883,6 +5893,7 @@ impl ::rusty_lr_core::parser::data_stack::DataStack for GrammarDataStack { fn split_off(&mut self, at: usize) -> Self { Self { __stack: self.__stack.split_off(at), + branch_idx: self.branch_idx, } } fn truncate(&mut self, at: usize) { diff --git a/scripts/diff/calculator.rs b/scripts/diff/calculator.rs index 98141ee8..bc043060 100644 --- a/scripts/diff/calculator.rs +++ b/scripts/diff/calculator.rs @@ -114,6 +114,11 @@ impl ::rusty_lr::parser::terminalclass::TerminalClass for ETerminalClasses { _ => ETerminalClasses::__rustylr_other_terminals, } } + fn from_virtual_start(branch_idx: u32) -> Self { + match branch_idx { + _ => panic!("Invalid virtual start branch index: {}", branch_idx), + } + } } impl std::fmt::Display for ETerminalClasses { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -206,10 +211,14 @@ pub enum EData { #[allow(unused_braces, unused_parens, non_snake_case, non_camel_case_types)] pub struct EDataStack { pub __stack: Vec, + pub branch_idx: u32, } impl Default for EDataStack { fn default() -> Self { - Self { __stack: Vec::new() } + Self { + __stack: Vec::new(), + branch_idx: 0, + } } } #[rustfmt::skip] @@ -508,6 +517,9 @@ impl ::rusty_lr::parser::data_stack::DataStack for EDataStack { fn push_empty(&mut self) { self.__stack.push(EData::Empty); } + fn set_branch_idx(&mut self, branch_idx: u32) { + self.branch_idx = branch_idx; + } fn clear(&mut self) { self.__stack.clear(); } @@ -517,6 +529,7 @@ impl ::rusty_lr::parser::data_stack::DataStack for EDataStack { fn split_off(&mut self, at: usize) -> Self { Self { __stack: self.__stack.split_off(at), + branch_idx: self.branch_idx, } } fn truncate(&mut self, at: usize) { diff --git a/scripts/diff/calculator_u8.rs b/scripts/diff/calculator_u8.rs index a783ca69..067b31f8 100644 --- a/scripts/diff/calculator_u8.rs +++ b/scripts/diff/calculator_u8.rs @@ -123,6 +123,11 @@ impl ::rusty_lr::parser::terminalclass::TerminalClass for ETerminalClasses { _ => ETerminalClasses::TermClass1, } } + fn from_virtual_start(branch_idx: u32) -> Self { + match branch_idx { + _ => panic!("Invalid virtual start branch index: {}", branch_idx), + } + } } impl std::fmt::Display for ETerminalClasses { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -232,10 +237,14 @@ pub enum EData { #[allow(unused_braces, unused_parens, non_snake_case, non_camel_case_types)] pub struct EDataStack { pub __stack: Vec, + pub branch_idx: u32, } impl Default for EDataStack { fn default() -> Self { - Self { __stack: Vec::new() } + Self { + __stack: Vec::new(), + branch_idx: 0, + } } } #[rustfmt::skip] @@ -703,6 +712,9 @@ impl ::rusty_lr::parser::data_stack::DataStack for EDataStack { fn push_empty(&mut self) { self.__stack.push(EData::Empty); } + fn set_branch_idx(&mut self, branch_idx: u32) { + self.branch_idx = branch_idx; + } fn clear(&mut self) { self.__stack.clear(); } @@ -712,6 +724,7 @@ impl ::rusty_lr::parser::data_stack::DataStack for EDataStack { fn split_off(&mut self, at: usize) -> Self { Self { __stack: self.__stack.split_off(at), + branch_idx: self.branch_idx, } } fn truncate(&mut self, at: usize) { diff --git a/scripts/diff/json.rs b/scripts/diff/json.rs index 71d01c85..0807aa96 100644 --- a/scripts/diff/json.rs +++ b/scripts/diff/json.rs @@ -289,6 +289,11 @@ impl ::rusty_lr::parser::terminalclass::TerminalClass for JsonTerminalClasses { _ => JsonTerminalClasses::TermClass2, } } + fn from_virtual_start(branch_idx: u32) -> Self { + match branch_idx { + _ => panic!("Invalid virtual start branch index: {}", branch_idx), + } + } } impl std::fmt::Display for JsonTerminalClasses { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -490,10 +495,14 @@ pub enum JsonData { #[allow(unused_braces, unused_parens, non_snake_case, non_camel_case_types)] pub struct JsonDataStack { pub __stack: Vec, + pub branch_idx: u32, } impl Default for JsonDataStack { fn default() -> Self { - Self { __stack: Vec::new() } + Self { + __stack: Vec::new(), + branch_idx: 0, + } } } #[rustfmt::skip] @@ -1493,6 +1502,9 @@ impl ::rusty_lr::parser::data_stack::DataStack for JsonDataStack { fn push_empty(&mut self) { self.__stack.push(JsonData::Empty); } + fn set_branch_idx(&mut self, branch_idx: u32) { + self.branch_idx = branch_idx; + } fn clear(&mut self) { self.__stack.clear(); } @@ -1502,6 +1514,7 @@ impl ::rusty_lr::parser::data_stack::DataStack for JsonDataStack { fn split_off(&mut self, at: usize) -> Self { Self { __stack: self.__stack.split_off(at), + branch_idx: self.branch_idx, } } fn truncate(&mut self, at: usize) {