Skip to content

Commit ea3c539

Browse files
committed
feat: fix Grammar2 interpreter to correctly parse extern calls and impl blocks
Key fixes for Grammar2 runtime parsing: - Add strip_name_bindings() to generator.rs to remove `name:rule` binding syntax from patterns when generating pest grammar (pest doesn't support named bindings) - Fix prepend_list() helper in interpreter.rs to handle missing bindings when repetitions (*) match zero times, treating them as empty lists - Update build_action_block() in ast.rs to extract return types from type_path nodes instead of expecting direct rust_type children - Add regex dependency to zyn_peg for pattern processing Grammar2 now correctly parses the full ZynML stdlib including: - TypeAlias declarations - All impl blocks (Display, Clone, Drop, Add, Sub, Mul, Div, Neg) - extern calls as proper Call expressions - Methods with associated types All 133 ZynML tests pass.
1 parent 97bd76a commit ea3c539

11 files changed

Lines changed: 2435 additions & 613 deletions

File tree

crates/typed_ast/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ pub use typed_ast::{
148148
TypedExtern, TypedExternClass, TypedExternStruct, TypedExternEnum,
149149
TypedExternEnumVariant, TypedExternTypeDef, TypedExternMethod, TypedExternProperty,
150150
// Trait implementation types
151-
TypedTraitImpl, TypedImplAssociatedType,
151+
TypedTraitImpl, TypedImplAssociatedType, TypedInterface,
152152
};
153153

154154
pub use type_inference::{

crates/zyn_peg/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ log = "0.4"
2626
serde = { version = "1.0", features = ["derive"] }
2727
serde_json = "1.0"
2828

29+
# Regex for pattern processing
30+
regex = "1.10"
31+
2932
# Interned strings (shared with typed_ast)
3033
internment = "0.8"
3134

crates/zyn_peg/generated/ast_builder.rs

Lines changed: 1843 additions & 1 deletion
Large diffs are not rendered by default.

crates/zyn_peg/generated/zig.pest

Lines changed: 34 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,24 @@
33
program = { SOI ~ declarations ~ EOI }
44
declarations = { declaration* }
55
declaration = { struct_decl | enum_decl | extern_fn_decl | fn_decl | const_decl | var_decl }
6-
struct_decl = { "const" ~ identifier ~ "=" ~ "struct" ~ "{" ~ struct_fields? ~ "}" ~ ";" }
7-
struct_fields = { struct_field ~ ("," ~ struct_field)* ~ ","? }
8-
struct_field = { identifier ~ ":" ~ type_expr }
9-
enum_decl = { "const" ~ identifier ~ "=" ~ "enum" ~ "{" ~ enum_variants? ~ "}" ~ ";" }
10-
enum_variants = { enum_variant ~ ("," ~ enum_variant)* ~ ","? }
11-
enum_variant = { identifier }
6+
struct_decl = { "const" ~ identifier ~ "=" ~ "struct" ~ "{" ~ struct_field_list? ~ "}" ~ ";" }
7+
struct_field_list = { struct_field* }
8+
struct_field = { identifier ~ ":" ~ type_expr ~ ","? }
9+
enum_decl = { "const" ~ identifier ~ "=" ~ "enum" ~ "{" ~ enum_variant_list? ~ "}" ~ ";" }
10+
enum_variant_list = { enum_variant* }
11+
enum_variant = { identifier ~ ","? }
1212
extern_fn_decl = { extern_fn_decl_with_params | extern_fn_decl_no_params }
13-
extern_fn_decl_with_params = { "extern" ~ "fn" ~ identifier ~ "(" ~ fn_params ~ ")" ~ type_expr ~ ";" }
13+
extern_fn_decl_with_params = { "extern" ~ "fn" ~ identifier ~ "(" ~ fn_param_list ~ ")" ~ type_expr ~ ";" }
1414
extern_fn_decl_no_params = { "extern" ~ "fn" ~ identifier ~ "(" ~ ")" ~ type_expr ~ ";" }
1515
fn_decl = { async_fn_decl | sync_fn_decl }
1616
sync_fn_decl = { fn_decl_with_params | fn_decl_no_params }
1717
async_fn_decl = { async_fn_decl_with_params | async_fn_decl_no_params }
18-
async_fn_decl_with_params = { "async" ~ "fn" ~ identifier ~ "(" ~ fn_params ~ ")" ~ type_expr ~ block }
18+
async_fn_decl_with_params = { "async" ~ "fn" ~ identifier ~ "(" ~ fn_param_list ~ ")" ~ type_expr ~ block }
1919
async_fn_decl_no_params = { "async" ~ "fn" ~ identifier ~ "(" ~ ")" ~ type_expr ~ block }
20-
fn_decl_with_params = { "fn" ~ identifier ~ "(" ~ fn_params ~ ")" ~ type_expr ~ block }
20+
fn_decl_with_params = { "fn" ~ identifier ~ "(" ~ fn_param_list ~ ")" ~ type_expr ~ block }
2121
fn_decl_no_params = { "fn" ~ identifier ~ "(" ~ ")" ~ type_expr ~ block }
22-
fn_params = { fn_param_any ~ ("," ~ fn_param_any)* }
22+
fn_param_list = { fn_param_item* }
23+
fn_param_item = { fn_param_any ~ ","? }
2324
fn_param_any = { comptime_param | fn_param }
2425
comptime_param = { "comptime" ~ identifier ~ ":" ~ type_expr }
2526
fn_param = { identifier ~ ":" ~ type_expr }
@@ -29,12 +30,13 @@ const_decl_untyped = { "const" ~ identifier ~ "=" ~ expr ~ ";" }
2930
var_decl = { var_decl_typed | var_decl_untyped }
3031
var_decl_typed = { "var" ~ identifier ~ ":" ~ type_expr ~ "=" ~ expr ~ ";" }
3132
var_decl_untyped = { "var" ~ identifier ~ "=" ~ expr ~ ";" }
32-
type_expr = { pointer_type | optional_type | error_union_type | array_type | primitive_type | identifier }
33+
type_expr = { pointer_type | optional_type | error_union_type | array_type | primitive_type | named_type }
3334
pointer_type = { "*" ~ "const"? ~ type_expr }
3435
optional_type = { "?" ~ type_expr }
3536
error_union_type = { "!" ~ type_expr }
3637
array_type = { "[" ~ integer_literal? ~ "]" ~ type_expr }
37-
primitive_type = { "i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "bool" | "void" | "type" }
38+
primitive_type = @{ "i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "bool" | "void" | "type" }
39+
named_type = { identifier }
3840
statement = { if_stmt | while_stmt | for_stmt | return_stmt | break_stmt | continue_stmt | local_const | local_var | assign_stmt | expr_stmt }
3941
break_stmt = { "break" ~ ";" }
4042
continue_stmt = { "continue" ~ ";" }
@@ -53,52 +55,33 @@ local_var_typed = { "var" ~ identifier ~ ":" ~ type_expr ~ "=" ~ expr ~ ";" }
5355
local_var_untyped = { "var" ~ identifier ~ "=" ~ expr ~ ";" }
5456
expr_stmt = { expr ~ ";" }
5557
block = { "{" ~ statement* ~ "}" }
56-
expr = { logical_or }
57-
logical_or = { logical_and ~ (or_op ~ logical_and)* }
58-
logical_and = { comparison ~ (and_op ~ comparison)* }
59-
comparison = { addition ~ ((eq_op | neq_op | lte_op | gte_op | lt_op | gt_op) ~ addition)* }
60-
addition = { multiplication ~ ((add_op | sub_op) ~ multiplication)* }
61-
multiplication = { unary ~ ((mul_op | div_op) ~ unary)* }
62-
unary = { unary_with_op | primary }
58+
expr = { comparison_expr }
59+
comparison_expr = { add_expr ~ (comparison_op ~ add_expr)? }
60+
comparison_op = @{ "==" | "!=" | "<=" | ">=" | "<" | ">" }
61+
add_expr = { mul_expr ~ (add_op ~ mul_expr)? }
62+
add_op = @{ "+" | "-" }
63+
mul_expr = { unary_expr ~ (mul_op ~ unary_expr)? }
64+
mul_op = @{ "*" | "/" }
65+
unary_expr = { unary_with_op | primary }
6366
unary_with_op = { unary_op ~ primary }
67+
unary_op = { "-" | "!" }
6468
primary = { postfix_expr }
6569
postfix_expr = { call_expr | field_expr | index_expr | atom }
66-
call_expr = { atom ~ "(" ~ call_args? ~ ")" }
70+
call_expr = { atom ~ "(" ~ call_arg_list? ~ ")" }
71+
call_arg_list = { call_arg* }
72+
call_arg = { expr ~ ","? }
6773
field_expr = { atom ~ "." ~ identifier }
6874
index_expr = { atom ~ "[" ~ expr ~ "]" }
69-
call_args = { expr ~ ("," ~ expr)* }
70-
atom = { switch_expr | try_expr | await_expr | struct_init | array_literal | bool_literal | string_literal | integer_literal | type_value | identifier_expr | paren_expr }
71-
switch_expr = { "switch" ~ "(" ~ expr ~ ")" ~ "{" ~ switch_cases? ~ "}" }
72-
switch_cases = { switch_case ~ ("," ~ switch_case)* ~ ","? }
73-
switch_case = { switch_case_value | switch_case_else }
74-
switch_case_value = { switch_pattern ~ "=>" ~ expr }
75-
switch_case_else = { "else" ~ "=>" ~ expr }
76-
switch_pattern = { switch_or_pattern }
77-
switch_or_pattern = { switch_primary_pattern ~ ("|" ~ switch_primary_pattern)* }
78-
switch_primary_pattern = { switch_struct_pattern | switch_tagged_union_pattern | switch_error_pattern | switch_pointer_pattern | switch_range_pattern | switch_array_pattern | switch_literal_pattern | switch_wildcard_pattern | switch_identifier_pattern }
79-
switch_range_pattern = { switch_simple_literal ~ ".." ~ switch_simple_literal }
80-
switch_array_pattern = { ".{" ~ switch_array_elements? ~ "}" }
81-
switch_array_elements = { switch_pattern ~ ("," ~ switch_pattern)* ~ ","? }
82-
switch_literal_pattern = { switch_simple_literal }
83-
switch_simple_literal = { integer_literal | string_literal }
84-
switch_wildcard_pattern = { "_" }
85-
switch_identifier_pattern = { identifier }
86-
switch_struct_pattern = { identifier ~ "{" ~ switch_struct_field_patterns? ~ "}" }
87-
switch_struct_field_patterns = { switch_struct_field_pattern ~ ("," ~ switch_struct_field_pattern)* ~ ","? }
88-
switch_struct_field_pattern = { "." ~ identifier ~ ("=" ~ switch_pattern)? }
89-
switch_tagged_union_pattern = { "." ~ identifier }
90-
switch_error_pattern = { "error" ~ "." ~ identifier }
91-
switch_pointer_pattern = { "*" ~ switch_primary_pattern }
92-
type_value = { type_expr_as_value }
93-
type_expr_as_value = { primitive_type }
94-
struct_init = { identifier ~ "{" ~ struct_init_fields? ~ "}" }
95-
struct_init_fields = { struct_init_field ~ ("," ~ struct_init_field)* ~ ","? }
96-
struct_init_field = { "." ~ identifier ~ "=" ~ expr }
75+
atom = { try_expr | await_expr | struct_init | array_literal | bool_literal | string_literal | integer_literal | identifier_expr | paren_expr }
9776
try_expr = { "try" ~ primary }
9877
await_expr = { "await" ~ primary }
78+
struct_init = { identifier ~ "{" ~ struct_init_field_list? ~ "}" }
79+
struct_init_field_list = { struct_init_field* }
80+
struct_init_field = { "." ~ identifier ~ "=" ~ expr ~ ","? }
81+
array_literal = { "[" ~ array_element_list? ~ "]" }
82+
array_element_list = { array_element* }
83+
array_element = { expr ~ ","? }
9984
paren_expr = _{ "(" ~ expr ~ ")" }
100-
array_literal = { "[" ~ array_elements? ~ "]" }
101-
array_elements = { expr ~ ("," ~ expr)* }
10285
identifier_expr = { identifier }
10386
bool_literal = { "true" | "false" }
10487
integer_literal = @{ "-"? ~ ASCII_DIGIT+ }
@@ -107,23 +90,10 @@ string_inner = { (!("\"" | "\\") ~ ANY) | escape_seq }
10790
escape_seq = { "\\" ~ ("n" | "r" | "t" | "\\" | "\"" | "0") }
10891
keyword = @{ ("struct" | "enum" | "fn" | "const" | "var" | "if" | "else" | "while" | "for" |
10992
"return" | "break" | "continue" | "try" | "await" | "async" | "and" | "or" | "true" | "false" |
110-
"comptime" | "type" | "switch" |
93+
"comptime" | "type" | "switch" | "extern" | "error" |
11194
"i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "bool" | "void")
11295
~ !(ASCII_ALPHANUMERIC | "_")
11396
}
11497
identifier = @{ !keyword ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
115-
and_op = { "and" }
116-
or_op = { "or" }
117-
eq_op = { "==" }
118-
neq_op = { "!=" }
119-
lte_op = { "<=" }
120-
gte_op = { ">=" }
121-
lt_op = { "<" }
122-
gt_op = { ">" }
123-
add_op = { "+" }
124-
sub_op = { "-" }
125-
mul_op = { "*" }
126-
div_op = { "/" }
127-
unary_op = { "-" | "!" }
12898
WHITESPACE = _{ " " | "\t" | "\n" | "\r" }
12999
COMMENT = _{ "//" ~ (!"\n" ~ ANY)* ~ "\n"? }

crates/zyn_peg/src/ast.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,9 +294,21 @@ fn build_action_block(pair: Pair<Rule>) -> Result<ActionBlock, String> {
294294

295295
for inner in pair.into_inner() {
296296
match inner.as_rule() {
297+
Rule::type_path => {
298+
// type_path = { rust_type ~ ("::" ~ identifier)? }
299+
// Extract the full type path including variant
300+
return_type = inner.as_str().trim().to_string();
301+
}
297302
Rule::rust_type => {
303+
// Legacy support: direct rust_type
298304
return_type = inner.as_str().trim().to_string();
299305
}
306+
Rule::identifier => {
307+
// Helper function call or passthrough: -> intern(binding) or -> binding
308+
if return_type.is_empty() {
309+
return_type = inner.as_str().trim().to_string();
310+
}
311+
}
300312
Rule::action_body => {
301313
for field_pair in inner.into_inner() {
302314
match field_pair.as_rule() {

crates/zyn_peg/src/generator.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1038,6 +1038,20 @@ pub fn generate_pest_grammar_string(grammar: &ZynGrammar) -> Result<String> {
10381038
generate_pest_grammar(grammar)
10391039
}
10401040

1041+
/// Strip name bindings from a pattern for pest compatibility.
1042+
/// Converts `name:rule` to just `rule` since pest doesn't support named bindings.
1043+
///
1044+
/// Examples:
1045+
/// - `items:top_level_items` -> `top_level_items`
1046+
/// - `name:identifier ~ ":" ~ value:expr` -> `identifier ~ ":" ~ expr`
1047+
fn strip_name_bindings(pattern: &str) -> String {
1048+
use regex::Regex;
1049+
// Match identifier followed by colon followed by identifier (the binding syntax)
1050+
// We need to handle cases like `name:identifier` but not break strings like `":"`
1051+
let re = Regex::new(r"\b([a-zA-Z_][a-zA-Z0-9_]*):([a-zA-Z_][a-zA-Z0-9_]*)").unwrap();
1052+
re.replace_all(pattern, "$2").to_string()
1053+
}
1054+
10411055
/// Generate a pest-compatible grammar from ZynGrammar rules
10421056
fn generate_pest_grammar(grammar: &ZynGrammar) -> Result<String> {
10431057
let mut lines = Vec::new();
@@ -1059,9 +1073,12 @@ fn generate_pest_grammar(grammar: &ZynGrammar) -> Result<String> {
10591073
None => "",
10601074
};
10611075

1076+
// Strip name bindings from the pattern for pest compatibility
1077+
let pest_pattern = strip_name_bindings(&rule.pattern);
1078+
10621079
lines.push(format!(
10631080
"{} = {}{{ {} }}",
1064-
rule.name, modifier, rule.pattern
1081+
rule.name, modifier, pest_pattern
10651082
));
10661083
}
10671084

crates/zyn_peg/src/grammar/parser.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,28 @@ impl<'a> GrammarParser<'a> {
524524
let first_path = self.parse_type_path()?;
525525
self.skip_ws();
526526

527+
// Check for function call: -> intern(name)
528+
if self.peek_char() == Some('(') {
529+
self.advance(); // consume '('
530+
self.skip_ws();
531+
532+
// Parse arguments
533+
let mut args = Vec::new();
534+
while self.peek_char() != Some(')') {
535+
let arg = self.parse_expr()?;
536+
args.push(arg);
537+
self.skip_ws();
538+
if self.peek_char() == Some(',') {
539+
self.advance();
540+
self.skip_ws();
541+
}
542+
}
543+
self.expect_char(')')?;
544+
545+
// Function call action
546+
return Ok((ActionIR::HelperCall { function: first_path.clone(), args }, "Any".to_string()));
547+
}
548+
527549
// Check for simple pass-through: -> binding (no '{' follows)
528550
// If the first_path is a simple identifier (no ::) and no '{' follows,
529551
// it's a pass-through action

0 commit comments

Comments
 (0)