diff --git a/AGENTS.md b/AGENTS.md index 32523fcd..f671877c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -55,3 +55,9 @@ If an implementation plan artifact is created, also print the full plan directly When modifying code, comments, or documentation, use formal terminology based on Programming Language Theory, Theory of Computation, and Type Theory for internal logic. Prefer terms such as `Symbol` and `Production` internally. For user-facing Bison-inspired syntax, keep familiar Bison terminology such as `%token` and `%tokentype`. + +## 9. Keep LSP Synchronized with Grammar Changes + +Whenever changes are made to the grammar syntax, directives, patterns, or variables: +- Update the LSP implementation in `rusty_lr_lsp` to fully support and recognize the updated grammar. +- Ensure that semantic tokens, hover information, completions, inlay hints, and diagnostic handling are kept aligned with the new grammar specifications. diff --git a/Cargo.toml b/Cargo.toml index cac7219c..c3f7f5a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,8 +6,10 @@ members = [ "rusty_lr_parser", "rusty_lr_buildscript", "rusty_lr_executable", + "rusty_lr_lsp", "example/calculator", "example/calculator_u8", "example/glr", "example/json", ] + diff --git a/README.md b/README.md index 0347d119..31df3746 100644 --- a/README.md +++ b/README.md @@ -258,6 +258,13 @@ println!("{}", context); // Formats the state tree (requires 'tree' feature) --- +## Editor Support + +An experimental RustyLR language server is under development in [`rusty_lr_lsp`](rusty_lr_lsp), with a temporary VSCode client in [`editors/vscode-rustylr`](editors/vscode-rustylr). +It currently targets `*.rustylr` files and files named `rustylr.rs`. + +--- + ## Examples - [Calculator (enum tokens)](https://github.com/ehwan/RustyLR/blob/main/example/calculator/src/parser.rustylr): A numeric expression parser using custom token enums. diff --git a/SYNTAX.md b/SYNTAX.md index 9e15b661..1de9bd28 100644 --- a/SYNTAX.md +++ b/SYNTAX.md @@ -10,7 +10,7 @@ This document provides a comprehensive guide to the grammar definition syntax us - [Token Definition (`%token`)](#token-definition-must-defined) - [Production Rules](#production-rules) - [Patterns](#patterns) -- [ProductionType (Non-Terminal Types)](#ruletype-optional) +- [ProductionType (Non-Terminal Types)](#productiontype-optional) - [Reduce Actions](#reduceaction-optional) - [Accessing Data in Reduce Actions](#accessing-token-data-in-reduceaction) - [Exclamation Mark (`!`) Value Discard](#exclamation-mark-) @@ -511,7 +511,7 @@ You can use variables prefixed with `$` inside any RustCode block in the grammar - `$location` -> Evaluates to the type defined by `%location` (defaults to `::rusty_lr::DefaultLocation`). - `$userdata` -> Evaluates to the type defined by `%userdata` (defaults to `()`). - `$error` or `$errortype` -> Evaluates to the type defined by `%errortype` / `%error` (defaults to `::rusty_lr::DefaultReduceActionError`). -- `$NonTerminalName` -> Evaluates to the `ruletype` defined for `NonTerminalName`. +- `$NonTerminalName` -> Evaluates to the `ProductionType` defined for `NonTerminalName`. - `$terminal_name` -> Evaluates to the match pattern/definition of ``. ### Substitution Errors diff --git a/editors/vscode-rustylr/.gitignore b/editors/vscode-rustylr/.gitignore new file mode 100644 index 00000000..28a78a7c --- /dev/null +++ b/editors/vscode-rustylr/.gitignore @@ -0,0 +1,2 @@ +node_modules/ +*.vsix diff --git a/editors/vscode-rustylr/CHANGELOG.md b/editors/vscode-rustylr/CHANGELOG.md new file mode 100644 index 00000000..73116095 --- /dev/null +++ b/editors/vscode-rustylr/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +All notable changes to the "RustyLR" extension will be documented in this file. + +## 0.1.0 + +- First public release of RustyLR language support! +- Fully integrated with the `rusty_lr_lsp` server: + - **Syntax Highlighting (Semantic Tokens):** Distinct syntax coloring for terminals, non-terminals, directives, bindings, location bindings, and variables. + - **Diagnostics:** Inline warning and error reporting directly in the editor. + - **Code Actions:** Quick-fix actions to suppress warnings with `%allow` directives. + - **Formatting:** Code formatting and indentation support for rule definitions and reduce actions. + - **Go to Definition:** Jump directly to token declarations, production definitions, and precedence definitions. + - **Find References:** Find all usages of terminals, non-terminals, and precedence symbols across the grammar document. + - **Hover Tooltips:** Interactive documentation tooltips for keywords, patterns, and variables. + - **Inlay Hints:** Inline type hints for grammar patterns and reduce actions. + - **Auto-Completion:** Intelligent suggestions for symbols, directives, variables, and locations. diff --git a/editors/vscode-rustylr/README.md b/editors/vscode-rustylr/README.md new file mode 100644 index 00000000..d929ed15 --- /dev/null +++ b/editors/vscode-rustylr/README.md @@ -0,0 +1,34 @@ +# RustyLR Language Support + +This extension provides rich language support for the [RustyLR](https://github.com/ehwan/RustyLR) parser generator grammar files (`*.rustylr` and `rustylr.rs`). + +## Features + +- **Diagnostics & Error Reporting:** Real-time diagnostics for grammar syntax errors, unused symbols, conflict resolutions, and more. +- **Go to Definition:** Quickly navigate to rule definitions, terminal declarations, and precedence rules. +- **Find References:** Find all occurrences and usages of terminals, non-terminals, and precedence symbols. +- **Syntax Highlighting (Semantic Tokens):** Distinct, theme-aligned colors for terminal names, non-terminal rules, directives, bindings, location bindings (`@loc`), and variables (`$var`). +- **Formatting:** Automatic document formatter that standardizes directives, separates rules, and indents rule lines and reduce-action bodies. +- **Code Actions (Quick Fixes):** Fast diagnostic suppression actions using the `%allow` directive. +- **Hover tooltips:** Documented explanations and types for terminal tokens, non-terminal rules, keywords, and patterns. +- **Inlay Hints:** Inline type annotations and reduce action indicators. +- **Auto-Completion:** Intelligent suggestions for directives, symbols, locations, variables, and diagnostics. + +## Extension Settings + +This extension contributes the following settings to control the language server behavior: + +* `rustylr.server.command`: Path to the `rusty_lr_lsp` server binary. Leave empty to automatically detect or run from Cargo. +* `rustylr.server.args`: Arguments passed to the language server command. +* `rustylr.server.cwd`: Working directory for the language server. +* `rustylr.semanticTokens.enabled`: Toggle semantic token syntax highlighting. + +## Requirements + +The language features require the `rusty_lr_lsp` server, which is part of the RustyLR cargo workspace. You can build it from the repository root: + +```bash +cargo build -p rusty_lr_lsp +``` + +By default, the extension will attempt to auto-detect the built binary in your workspace target folder or run it dynamically using Cargo. diff --git a/editors/vscode-rustylr/extension.js b/editors/vscode-rustylr/extension.js new file mode 100644 index 00000000..de14b63f --- /dev/null +++ b/editors/vscode-rustylr/extension.js @@ -0,0 +1,201 @@ +const fs = require("fs"); +const path = require("path"); +const vscode = require("vscode"); +const { LanguageClient, TransportKind } = require("vscode-languageclient/node"); + +let client; +let outputChannel; +let startingClient; + +async function activate(context) { + outputChannel = vscode.window.createOutputChannel("RustyLR LSP"); + context.subscriptions.push(outputChannel); + + context.subscriptions.push( + vscode.commands.registerCommand("rustylr.restartServer", async () => { + await stopClient(); + try { + await startClient(context); + vscode.window.showInformationMessage("RustyLR language server restarted."); + } catch (error) { + reportStartError(error); + } + }) + ); + + startClient(context).catch(reportStartError); +} + +async function deactivate() { + await stopClient(); +} + +async function startClient(context) { + if (startingClient) { + return startingClient; + } + if (client) { + return; + } + + startingClient = doStartClient(context); + try { + await startingClient; + } finally { + startingClient = undefined; + } +} + +async function doStartClient(context) { + const config = vscode.workspace.getConfiguration("rustylr.server"); + const workspaceFolder = + vscode.workspace.workspaceFolders && vscode.workspace.workspaceFolders.length > 0 + ? vscode.workspace.workspaceFolders[0].uri.fsPath + : undefined; + const repoRoot = findRustyLrRoot(workspaceFolder) || findRustyLrRoot(context.extensionPath); + + const configuredCwd = config.get("cwd", ""); + const cwd = configuredCwd + ? expandPath(configuredCwd, { workspaceFolder, extensionPath: context.extensionPath, repoRoot }) + : repoRoot || workspaceFolder || context.extensionPath; + + const configuredCommand = config.get("command", ""); + const configuredArgs = config.get("args", []); + const server = resolveServerCommand(configuredCommand, configuredArgs, { + workspaceFolder, + extensionPath: context.extensionPath, + repoRoot, + cwd, + }); + + const patterns = config.get("documentPatterns", [ + "**/*.rustylr", + "**/rustylr.rs", + ]); + + const documentSelector = [ + { scheme: "file", language: "rustylr" }, + ...patterns.map((pattern) => ({ scheme: "file", pattern })), + ]; + + outputChannel.appendLine(`Starting RustyLR LSP: ${server.command} ${server.args.join(" ")}`); + outputChannel.appendLine(`RustyLR LSP cwd: ${cwd}`); + + client = new LanguageClient( + "rustylr", + "RustyLR Language Server", + { + command: server.command, + args: server.args, + options: { cwd }, + transport: TransportKind.stdio, + }, + { + documentSelector, + outputChannel, + synchronize: { + configurationSection: "rustylr", + }, + } + ); + + await client.start(); +} + +async function stopClient() { + if (startingClient) { + try { + await startingClient; + } catch (_error) { + // The start failure will already be reported by the original caller. + } + } + + if (!client) { + return; + } + + const activeClient = client; + client = undefined; + try { + await activeClient.stop(); + } catch (error) { + const message = error && error.message ? error.message : String(error); + if (outputChannel) { + outputChannel.appendLine(`Ignoring RustyLR LSP stop error: ${message}`); + } + } +} + +function expandPath(value, vars) { + return value + .split("${workspaceFolder}") + .join(vars.workspaceFolder || "") + .split("${extensionPath}") + .join(vars.extensionPath || "") + .split("${repoRoot}") + .join(vars.repoRoot || ""); +} + +function resolveServerCommand(configuredCommand, configuredArgs, vars) { + if (configuredCommand) { + return { + command: expandPath(configuredCommand, vars), + args: configuredArgs.map((arg) => expandPath(arg, vars)), + }; + } + + const binaryName = process.platform === "win32" ? "rusty_lr_lsp.exe" : "rusty_lr_lsp"; + const candidates = [ + vars.repoRoot && path.join(vars.repoRoot, "target", "debug", binaryName), + vars.repoRoot && path.join(vars.repoRoot, "target", "release", binaryName), + ].filter(Boolean); + + for (const candidate of candidates) { + if (fs.existsSync(candidate)) { + return { command: candidate, args: [] }; + } + } + + return { + command: "cargo", + args: ["run", "--quiet", "--package", "rusty_lr_lsp"], + }; +} + +function findRustyLrRoot(startPath) { + if (!startPath) { + return undefined; + } + + let current = fs.statSync(startPath).isDirectory() ? startPath : path.dirname(startPath); + while (true) { + if ( + fs.existsSync(path.join(current, "Cargo.toml")) && + fs.existsSync(path.join(current, "rusty_lr_lsp", "Cargo.toml")) + ) { + return current; + } + + const parent = path.dirname(current); + if (parent === current) { + return undefined; + } + current = parent; + } +} + +function reportStartError(error) { + const message = error && error.stack ? error.stack : String(error); + if (outputChannel) { + outputChannel.appendLine("Failed to start RustyLR LSP."); + outputChannel.appendLine(message); + outputChannel.show(true); + } + vscode.window.showErrorMessage("Failed to start RustyLR language server. See Output: RustyLR LSP."); +} + +module.exports = { + activate, + deactivate, +}; diff --git a/editors/vscode-rustylr/language-configuration.json b/editors/vscode-rustylr/language-configuration.json new file mode 100644 index 00000000..0893eba1 --- /dev/null +++ b/editors/vscode-rustylr/language-configuration.json @@ -0,0 +1,74 @@ +{ + "comments": { + "lineComment": "//", + "blockComment": [ + "/*", + "*/" + ] + }, + "brackets": [ + [ + "{", + "}" + ], + [ + "[", + "]" + ], + [ + "(", + ")" + ] + ], + "autoClosingPairs": [ + { + "open": "{", + "close": "}" + }, + { + "open": "[", + "close": "]" + }, + { + "open": "(", + "close": ")" + }, + { + "open": "\"", + "close": "\"", + "notIn": [ + "string" + ] + }, + { + "open": "'", + "close": "'", + "notIn": [ + "string", + "comment" + ] + } + ], + "surroundingPairs": [ + [ + "{", + "}" + ], + [ + "[", + "]" + ], + [ + "(", + ")" + ], + [ + "\"", + "\"" + ], + [ + "'", + "'" + ] + ] +} diff --git a/editors/vscode-rustylr/package-lock.json b/editors/vscode-rustylr/package-lock.json new file mode 100644 index 00000000..243f6479 --- /dev/null +++ b/editors/vscode-rustylr/package-lock.json @@ -0,0 +1,161 @@ +{ + "name": "rustylr-vscode", + "version": "0.0.1", + "lockfileVersion": 2, + "requires": true, + "packages": { + "": { + "name": "rustylr-vscode", + "version": "0.0.1", + "license": "MIT OR Apache-2.0", + "dependencies": { + "vscode-languageclient": "^9.0.1" + }, + "devDependencies": { + "@types/vscode": "1.84.0" + }, + "engines": { + "vscode": "^1.84.0" + } + }, + "node_modules/@types/vscode": { + "version": "1.84.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.84.0.tgz", + "integrity": "sha512-lCGOSrhT3cL+foUEqc8G1PVZxoDbiMmxgnUZZTEnHF4mC47eKAUtBGAuMLY6o6Ua8PAuNCoKXbqPmJd1JYnQfg==", + "dev": true + }, + "node_modules/balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "node_modules/brace-expansion": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.1.tgz", + "integrity": "sha512-WR1cURNjuvBLMZBMbqM0UoE+WAfdUcEV1ccD8PVBVOI+Z3ND4+SZbN8RsfT2bMuG1qwz5RFvPukSZm5fF2D5eA==", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/minimatch": { + "version": "5.1.9", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz", + "integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/semver": { + "version": "7.8.5", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.5.tgz", + "integrity": "sha512-Y7/KDsb8LjooZpwaqGyulO6DQlksgCncchHGk+sZIY4SBvUocMBEFH5Ur1fI4dV+Jvl0w6cjvucaIi40puRioA==", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==", + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/vscode-languageclient": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz", + "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==", + "dependencies": { + "minimatch": "^5.1.0", + "semver": "^7.3.7", + "vscode-languageserver-protocol": "3.17.5" + }, + "engines": { + "vscode": "^1.82.0" + } + }, + "node_modules/vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "dependencies": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "node_modules/vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==" + } + }, + "dependencies": { + "@types/vscode": { + "version": "1.84.0", + "resolved": "https://registry.npmjs.org/@types/vscode/-/vscode-1.84.0.tgz", + "integrity": "sha512-lCGOSrhT3cL+foUEqc8G1PVZxoDbiMmxgnUZZTEnHF4mC47eKAUtBGAuMLY6o6Ua8PAuNCoKXbqPmJd1JYnQfg==", + "dev": true + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "brace-expansion": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.1.1.tgz", + "integrity": "sha512-WR1cURNjuvBLMZBMbqM0UoE+WAfdUcEV1ccD8PVBVOI+Z3ND4+SZbN8RsfT2bMuG1qwz5RFvPukSZm5fF2D5eA==", + "requires": { + "balanced-match": "^1.0.0" + } + }, + "minimatch": { + "version": "5.1.9", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.9.tgz", + "integrity": "sha512-7o1wEA2RyMP7Iu7GNba9vc0RWWGACJOCZBJX2GJWip0ikV+wcOsgVuY9uE8CPiyQhkGFSlhuSkZPavN7u1c2Fw==", + "requires": { + "brace-expansion": "^2.0.1" + } + }, + "semver": { + "version": "7.8.5", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.5.tgz", + "integrity": "sha512-Y7/KDsb8LjooZpwaqGyulO6DQlksgCncchHGk+sZIY4SBvUocMBEFH5Ur1fI4dV+Jvl0w6cjvucaIi40puRioA==" + }, + "vscode-jsonrpc": { + "version": "8.2.0", + "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz", + "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==" + }, + "vscode-languageclient": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/vscode-languageclient/-/vscode-languageclient-9.0.1.tgz", + "integrity": "sha512-JZiimVdvimEuHh5olxhxkht09m3JzUGwggb5eRUkzzJhZ2KjCN0nh55VfiED9oez9DyF8/fz1g1iBV3h+0Z2EA==", + "requires": { + "minimatch": "^5.1.0", + "semver": "^7.3.7", + "vscode-languageserver-protocol": "3.17.5" + } + }, + "vscode-languageserver-protocol": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz", + "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==", + "requires": { + "vscode-jsonrpc": "8.2.0", + "vscode-languageserver-types": "3.17.5" + } + }, + "vscode-languageserver-types": { + "version": "3.17.5", + "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz", + "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==" + } + } +} diff --git a/editors/vscode-rustylr/package.json b/editors/vscode-rustylr/package.json new file mode 100644 index 00000000..4b7c5356 --- /dev/null +++ b/editors/vscode-rustylr/package.json @@ -0,0 +1,128 @@ +{ + "name": "rustylr-vscode", + "displayName": "RustyLR", + "description": "Rich language support for the RustyLR parser generator, featuring diagnostics, formatting, auto-completion, hover, goto-definition, find-references, and inlay hints.", + "version": "0.1.0", + "publisher": "rustylr", + "license": "MIT OR Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/ehwan/RustyLR.git" + }, + "bugs": { + "url": "https://github.com/ehwan/RustyLR/issues" + }, + "homepage": "https://github.com/ehwan/RustyLR#readme", + "keywords": [ + "parser", + "grammar", + "lsp", + "rust", + "rustylr", + "bison", + "yacc", + "compiler" + ], + "engines": { + "vscode": "^1.84.0" + }, + "categories": [ + "Programming Languages", + "Linters", + "Formatters" + ], + "main": "./extension.js", + "activationEvents": [ + "onLanguage:rustylr", + "workspaceContains:**/*.rustylr", + "workspaceContains:**/rustylr.rs", + "onCommand:rustylr.restartServer" + ], + "contributes": { + "commands": [ + { + "command": "rustylr.restartServer", + "title": "RustyLR: Restart Language Server" + } + ], + "configuration": { + "title": "RustyLR", + "properties": { + "rustylr.server.command": { + "type": "string", + "default": "", + "description": "Command used to start the RustyLR LSP server. Empty means auto-detect target/debug/rusty_lr_lsp and fall back to cargo run." + }, + "rustylr.server.args": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "Arguments passed to rustylr.server.command." + }, + "rustylr.server.cwd": { + "type": "string", + "default": "", + "description": "Working directory for the RustyLR LSP server. Empty means the extension will use the RustyLR workspace root when it can find one." + }, + "rustylr.server.documentPatterns": { + "type": "array", + "items": { + "type": "string" + }, + "default": [ + "**/*.rustylr", + "**/rustylr.rs" + ], + "description": "Additional file globs handled by the RustyLR LSP server." + }, + "rustylr.semanticTokens.enabled": { + "type": "boolean", + "default": true, + "description": "Enable or disable semantic token highlighting for RustyLR grammar files." + } + } + }, + "languages": [ + { + "id": "rustylr", + "aliases": [ + "RustyLR", + "rustylr" + ], + "extensions": [ + ".rustylr" + ], + "filenamePatterns": [ + "rustylr.rs" + ], + "configuration": "./language-configuration.json" + } + ], + "grammars": [ + { + "language": "rustylr", + "scopeName": "source.rustylr", + "path": "./syntaxes/rustylr.tmLanguage.json" + } + ], + "configurationDefaults": { + "[rustylr]": { + "editor.quickSuggestions": { + "other": true, + "comments": false, + "strings": false + }, + "editor.suggestOnTriggerCharacters": true, + "editor.semanticHighlighting.enabled": true + } + } + }, + "dependencies": { + "vscode-languageclient": "^9.0.1" + }, + "devDependencies": { + "@types/vscode": "1.84.0" + } +} diff --git a/editors/vscode-rustylr/syntaxes/rustylr.tmLanguage.json b/editors/vscode-rustylr/syntaxes/rustylr.tmLanguage.json new file mode 100644 index 00000000..4682b3b4 --- /dev/null +++ b/editors/vscode-rustylr/syntaxes/rustylr.tmLanguage.json @@ -0,0 +1,80 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "RustyLR", + "scopeName": "source.rustylr", + "patterns": [ + { + "include": "#comments" + }, + { + "include": "#directives" + }, + { + "include": "#punctuation" + }, + { + "include": "#strings" + } + ], + "repository": { + "comments": { + "patterns": [ + { + "name": "comment.line.double-slash.rustylr", + "match": "//.*$" + }, + { + "name": "comment.block.rustylr", + "begin": "/\\*", + "end": "\\*/" + } + ] + }, + "directives": { + "patterns": [ + { + "name": "keyword.control.directive.rustylr", + "match": "%(?:allow|eof|error|errortype|fallback|glr|lalr|layout|left|location|nonassoc|prec|right|start|token|tokentype)\\b" + }, + { + "name": "keyword.operator.section.rustylr", + "match": "%%" + } + ] + }, + "punctuation": { + "patterns": [ + { + "name": "punctuation.separator.production.rustylr", + "match": "[:;|]" + } + ] + }, + "strings": { + "patterns": [ + { + "name": "string.quoted.double.rustylr", + "begin": "\"", + "end": "\"", + "patterns": [ + { + "name": "constant.character.escape.rustylr", + "match": "\\\\." + } + ] + }, + { + "name": "string.quoted.single.rustylr", + "begin": "'", + "end": "'", + "patterns": [ + { + "name": "constant.character.escape.rustylr", + "match": "\\\\." + } + ] + } + ] + } + } +} diff --git a/rusty_lr_lsp/Cargo.toml b/rusty_lr_lsp/Cargo.toml new file mode 100644 index 00000000..4affcff8 --- /dev/null +++ b/rusty_lr_lsp/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "rusty_lr_lsp" +version = "0.1.0" +edition = "2021" +description = "LSP server for rusty_lr grammar files" +license = "MIT OR Apache-2.0" + +[dependencies] +lsp-server = "0.7.6" +lsp-types = "0.95.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +proc-macro2 = { version = "1.0.86", features = ["span-locations"] } +quote = "1.0" +syn = { version = "2.0", features = ["full", "extra-traits"] } +rusty_lr_core = { version = "4.2.0", path = "../rusty_lr_core", features = ["builder"] } +rusty_lr_parser = { version = "4.2.1", path = "../rusty_lr_parser" } diff --git a/rusty_lr_lsp/README.md b/rusty_lr_lsp/README.md new file mode 100644 index 00000000..79b4f74f --- /dev/null +++ b/rusty_lr_lsp/README.md @@ -0,0 +1,78 @@ +# RustyLR LSP + +`rusty_lr_lsp` is an experimental language server for RustyLR grammar files. It communicates over stdio and is intended to be used by editor clients such as the temporary VSCode extension in `editors/vscode-rustylr`. + +## Supported Files + +The current VSCode client targets: + +- `*.rustylr` +- `rustylr.rs` + +Other Rust files are intentionally not matched by default. + +## Features + +### Supported Features + +- [x] **Diagnostics:** Parses open RustyLR grammar files and publishes grammar errors, recovered parser errors, warnings, and conflict diagnostics. +- [x] **Code Actions:** Offers quick fixes for suppressible diagnostics by inserting the appropriate `%allow ...;` directive. +- [x] **Formatting:** Normalizes directive declarations into one-space, single-line forms, one-space pattern separators, and indentation for production rules and reduce-action bodies. +- [x] **Go to Definition:** Resolves terminal and non-terminal references to their `%token` declarations or production definitions, including `%prec` and precedence symbols. +- [x] **Find References:** Finds all references to terminal and non-terminal symbols throughout the grammar definitions, `%start` rules, `%token` definitions, precedence symbols, and the `error` keyword. +- [x] **Hover:** Shows directive and keyword documentation. Hovering over grammar patterns also shows the pattern syntax, explanation, and final Rust type. +- [x] **Inlay Hints:** Shows `Pattern: Type` hints for top-level patterns in non-terminal definitions, and `ReduceAction` labels before action blocks. +- [x] **Completion for symbols:** Suggests declared terminal names and non-terminal names in grammar positions. Completion details include the resolved Rust type for terminals and non-terminals, including inferred placeholders and a note when the value is boxed for parser storage. +- [x] **Completion for directives and keywords:** Suggests directives such as `%token`, `%start`, `%tokentype`, `%left`, `%right`, `%precedence`, `%prec`, `%dprec`, `%glr`, `%lalr`, `%nooptim`, `%allow`, and common identifiers such as `error`, `$sep`, `data`, `lookahead`, and `shift`. +- [x] **Completion for `$...` variables:** Suggests built-in substitutions (`$tokentype`, `$location`, `$userdata`, `$error`, `$errortype`), terminal and non-terminal substitutions (`$terminal_name`, `$NonTerminalName`), current reduce-action bindings (`$left`, `$value`, etc.), and positional semantic variables (`$1`, `$2`, ...). +- [x] **Completion for locations:** Suggests `@$`, `@0`, positional locations (`@1`, `@2`, ...), and named binding locations (`@left`, `@value`, etc.). +- [x] **Completion for `%allow`:** Suggests valid diagnostic names such as `nonterm_unreachable`, `unused_terminals`, and conflict-resolution diagnostic identifiers. + +### Unsupported / Planned Features + +- [ ] **Go to Definition / Find References inside Reduce Actions:** Navigating to definitions or finding references of symbols inside `ReduceAction` Rust code blocks is currently not supported. +- [ ] **Document Symbols / Outline:** Showing all rules and tokens in the file outline window is not supported. +- [ ] **Rename Symbol:** Rename refactoring of terminal and non-terminal symbols throughout the grammar file is not supported. +- [ ] **Signature Help:** Parameter information for helper patterns like `$sep` is not supported. +- [ ] **Multi-file Project Support:** Referencing definitions across multiple files is not supported (the grammar file is treated as a self-contained unit). + +## Running the Server + +Build the server from the workspace root: + +```bash +cargo build -p rusty_lr_lsp +``` + +The debug binary is then available at: + +```bash +target/debug/rusty_lr_lsp +``` + +The server expects to be launched by an LSP client over stdio. For quick VSCode testing, use the extension client in `editors/vscode-rustylr`. + +## VSCode Test Client + +From the repository root: + +```bash +cargo build -p rusty_lr_lsp +cd editors/vscode-rustylr +npm install +code . +``` + +Press `F5` in VSCode to open an Extension Development Host, then open the RustyLR repository or another workspace containing `*.rustylr` or `rustylr.rs` grammar files. + +The extension auto-detects `target/debug/rusty_lr_lsp` when it exists. You can override the server command with VSCode settings: + +```json +{ + "rustylr.server.command": "/home/ehwan/workspace/RustyLR/target/debug/rusty_lr_lsp", + "rustylr.server.args": [], + "rustylr.server.cwd": "/home/ehwan/workspace/RustyLR" +} +``` + +Use `RustyLR: Restart Language Server` from the command palette after changing server settings. diff --git a/rusty_lr_lsp/src/code_action.rs b/rusty_lr_lsp/src/code_action.rs new file mode 100644 index 00000000..ff1d74c2 --- /dev/null +++ b/rusty_lr_lsp/src/code_action.rs @@ -0,0 +1,123 @@ +use lsp_types::{ + CodeAction, CodeActionKind, CodeActionOrCommand, Diagnostic, Position, Range, TextEdit, Url, + WorkspaceEdit, +}; +use std::collections::{HashMap, HashSet}; + +pub fn code_actions( + content: &str, + uri: Url, + diagnostics: Vec, +) -> Vec { + let insert_position = allow_insert_position(content); + let mut seen = HashSet::new(); + let mut actions = Vec::new(); + + for diagnostic in diagnostics { + let Some(allow) = allow_suggestion(&diagnostic) else { + continue; + }; + if !seen.insert(allow.clone()) { + continue; + } + + let mut changes = HashMap::new(); + changes.insert( + uri.clone(), + vec![TextEdit { + range: Range::new(insert_position, insert_position), + new_text: format!("{allow}\n"), + }], + ); + + actions.push(CodeActionOrCommand::CodeAction(CodeAction { + title: format!("Insert `{allow}`"), + kind: Some(CodeActionKind::QUICKFIX), + diagnostics: Some(vec![diagnostic]), + edit: Some(WorkspaceEdit { + changes: Some(changes), + document_changes: None, + change_annotations: None, + }), + command: None, + is_preferred: Some(true), + disabled: None, + data: None, + })); + } + + actions +} + +fn allow_suggestion(diagnostic: &Diagnostic) -> Option { + diagnostic + .data + .as_ref()? + .get("rustylr_allow")? + .as_str() + .filter(|suggestion| suggestion.starts_with("%allow ")) + .map(str::to_string) +} + +fn allow_insert_position(content: &str) -> Position { + let line = content + .lines() + .position(|line| line.trim() == "%%") + .map_or(0, |line| line + 1); + Position::new(line as u32, 0) +} + +#[cfg(test)] +mod tests { + use super::*; + use lsp_types::DiagnosticSeverity; + use serde_json::json; + + #[test] + fn creates_allow_quick_fix_from_diagnostic_data() { + let uri = Url::parse("file:///test.rustylr").unwrap(); + let diagnostic = Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: "unused".to_string(), + related_information: None, + tags: None, + data: Some(json!({ "rustylr_allow": "%allow unused_terminals(num);" })), + }; + + let actions = code_actions("mod x {}\n%%\n%start E;\n", uri, vec![diagnostic]); + assert_eq!(actions.len(), 1); + + let CodeActionOrCommand::CodeAction(action) = &actions[0] else { + panic!("expected code action"); + }; + assert_eq!(action.title, "Insert `%allow unused_terminals(num);`"); + let edit = action.edit.as_ref().unwrap(); + let changes = edit.changes.as_ref().unwrap(); + let text_edit = changes.values().next().unwrap().first().unwrap(); + assert_eq!(text_edit.range.start, Position::new(2, 0)); + assert_eq!(text_edit.new_text, "%allow unused_terminals(num);\n"); + } + + #[test] + fn deduplicates_same_allow_suggestion() { + let uri = Url::parse("file:///test.rustylr").unwrap(); + let diagnostic = Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: "unused".to_string(), + related_information: None, + tags: None, + data: Some(json!({ "rustylr_allow": "%allow nonterm_unreachable(E);" })), + }; + + let actions = code_actions("%%\n%start E;\n", uri, vec![diagnostic.clone(), diagnostic]); + assert_eq!(actions.len(), 1); + } +} diff --git a/rusty_lr_lsp/src/completion.rs b/rusty_lr_lsp/src/completion.rs new file mode 100644 index 00000000..9106afc7 --- /dev/null +++ b/rusty_lr_lsp/src/completion.rs @@ -0,0 +1,1018 @@ +use lsp_types::{ + CompletionItem, CompletionItemKind, CompletionResponse, CompletionTextEdit, Documentation, + MarkupContent, MarkupKind, Position, Range, TextEdit, +}; +use proc_macro2::{TokenStream, TokenTree}; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{GrammarArgs, Location, PatternArgs}; +use std::collections::{BTreeMap, BTreeSet}; +use std::str::FromStr; + +use crate::diagnostics::split_stream; +use crate::position::{offset_to_position, position_to_offset}; + +pub(crate) const DIRECTIVES: &[&str] = &[ + "%token", + "%start", + "%tokentype", + "%userdata", + "%error", + "%errortype", + "%location", + "%left", + "%right", + "%precedence", + "%prec", + "%dprec", + "%glr", + "%lalr", + "%nooptim", + "%allow", + "%moduleprefix", +]; + +pub(crate) const SUBSTITUTION_VARIABLES: &[&str] = &[ + "$tokentype", + "$location", + "$userdata", + "$error", + "$errortype", +]; + +pub(crate) const ALLOW_DIAGNOSTICS: &[&str] = &[ + "nonterm_unreachable", + "nonterm_unproductive", + "unused_nonterm_data", + "unused_terminals", + "terminals_merged", + "redundant_rule_removed", + "unit_production_eliminated", + "reduce_reduce_conflict_resolved", + "shift_reduce_conflict_resolved", + "shift_reduce_conflict_glr", + "reduce_reduce_conflict_glr", +]; + +pub(crate) const KEYWORDS: &[&str] = &[ + "error", + "auto", + "dense", + "sparse", + "$sep", + "data", + "lookahead", + "shift", +]; + +pub(crate) const SYNTAX_URL: &str = "https://github.com/ehwan/RustyLR/blob/main/SYNTAX.md"; + +#[derive(Clone, Copy, PartialEq, Eq)] +enum CompletionMode { + Directive, + Dollar, + Location, + AllowDiagnostic, + Symbol, +} + +pub fn completions(content: &str, position: Position) -> CompletionResponse { + let offset = position_to_offset(content, position); + let mode = completion_mode(content, offset); + let replace_range = replacement_range(content, offset, mode); + + let parsed = parse_args(content).ok(); + let names = parsed + .as_ref() + .map(|args| CompletionNames::from_args(args, content)) + .unwrap_or_else(|| CompletionNames::from_text(content)); + let line_variables = parsed + .as_ref() + .map(|args| variables_for_offset(args, content, offset)) + .unwrap_or_default(); + + let mut builder = CompletionBuilder::new(replace_range); + + match mode { + CompletionMode::Directive => { + for directive in DIRECTIVES { + builder.keyword( + directive, + "RustyLR directive", + keyword_documentation(directive), + ); + } + } + CompletionMode::Dollar => { + for variable in SUBSTITUTION_VARIABLES { + builder.variable( + variable, + "built-in RustCode substitution", + substitution_documentation(variable), + ); + } + for (name, documentation) in &names.nonterminals { + builder.variable( + &format!("${name}"), + "non-terminal production type", + Some(format!( + "Substitutes to the production type of non-terminal `{name}`.\n\n{documentation}\n\n[Variable substitution]({SYNTAX_URL}#variable-substitution)" + )), + ); + } + for (name, documentation) in &names.terminals { + builder.variable( + &format!("${name}"), + "terminal definition substitution", + Some(format!( + "Substitutes to the `%token` definition for terminal `{name}`.\n\n{documentation}\n\n[Variable substitution]({SYNTAX_URL}#variable-substitution)" + )), + ); + } + for variable in &line_variables.value_names { + builder.variable( + &format!("${variable}"), + "current production binding", + Some(format!( + "Semantic value bound by the current production line.\n\nExample:\n\n```rustylr\nExpr : left=Expr plus right=Term {{ left + right }};\n```\n\nHere `$left` and `$right` can be used in RustCode substitution contexts.\n\n[Named variables]({SYNTAX_URL}#named-variables)" + )), + ); + } + for index in 1..=line_variables.value_count { + builder.variable( + &format!("${index}"), + "positional semantic value", + Some(format!( + "Semantic value of RHS symbol #{index} in the current production line.\n\nExample:\n\n```rustylr\nExpr : Expr plus Term {{ $1 }};\n```\n\n[Bison-style positional variables]({SYNTAX_URL}#3-bison-style-positional-variables)" + )), + ); + } + } + CompletionMode::Location => { + builder.variable( + "@$", + "current production location", + location_documentation("@$"), + ); + builder.variable( + "@0", + "current production location", + location_documentation("@0"), + ); + for variable in &line_variables.value_names { + builder.variable( + &format!("@{variable}"), + "current production binding location", + location_documentation(&format!("@{variable}")), + ); + } + for index in 1..=line_variables.value_count { + builder.variable( + &format!("@{index}"), + "positional location", + location_documentation(&format!("@{index}")), + ); + } + } + CompletionMode::AllowDiagnostic => { + for diagnostic in ALLOW_DIAGNOSTICS { + builder.keyword( + diagnostic, + "diagnostic suppression name", + allow_diagnostic_documentation(diagnostic), + ); + } + add_symbol_items(&mut builder, &names); + } + CompletionMode::Symbol => { + add_symbol_items(&mut builder, &names); + for keyword in KEYWORDS { + builder.keyword(keyword, "RustyLR keyword", keyword_documentation(keyword)); + } + for directive in DIRECTIVES { + builder.keyword( + directive, + "RustyLR directive", + keyword_documentation(directive), + ); + } + } + } + + CompletionResponse::Array(builder.finish()) +} + +fn add_symbol_items(builder: &mut CompletionBuilder, names: &CompletionNames) { + for (name, documentation) in &names.nonterminals { + builder.nonterminal(name, documentation.clone()); + } + for (name, documentation) in &names.terminals { + builder.terminal(name, documentation.clone()); + } +} + +pub(crate) fn parse_args(content: &str) -> Result { + let token_stream = TokenStream::from_str(content).map_err(|_| ())?; + let (_, macro_stream) = split_stream(token_stream).map_err(|_| ())?; + Grammar::parse_args(macro_stream).map_err(|_| ()) +} + +fn completion_mode(content: &str, offset: usize) -> CompletionMode { + let prefix_start = current_prefix_start(content, offset, true); + if prefix_start < offset { + match content.as_bytes()[prefix_start] { + b'%' => return CompletionMode::Directive, + b'$' => return CompletionMode::Dollar, + b'@' => return CompletionMode::Location, + _ => {} + } + } + + let line_prefix = line_prefix(content, offset); + let trimmed = line_prefix.trim_start(); + if trimmed.starts_with("%allow") { + return CompletionMode::AllowDiagnostic; + } + if trimmed.ends_with('%') { + return CompletionMode::Directive; + } + if trimmed.ends_with('$') { + return CompletionMode::Dollar; + } + if trimmed.ends_with('@') { + return CompletionMode::Location; + } + + CompletionMode::Symbol +} + +fn replacement_range(content: &str, offset: usize, mode: CompletionMode) -> Range { + let include_sigils = matches!( + mode, + CompletionMode::Directive | CompletionMode::Dollar | CompletionMode::Location + ); + let start = current_prefix_start(content, offset, include_sigils); + Range::new( + offset_to_position(content, start), + offset_to_position(content, offset), + ) +} + +pub(crate) fn current_prefix_start(content: &str, offset: usize, include_sigils: bool) -> usize { + let mut start = offset.min(content.len()); + while start > 0 { + let Some(ch) = content[..start].chars().next_back() else { + break; + }; + if is_ident_continue(ch) + || (include_sigils && matches!(ch, '$' | '@' | '%')) + || (include_sigils && ch.is_ascii_digit()) + { + start -= ch.len_utf8(); + } else { + break; + } + } + start +} + +fn line_prefix(content: &str, offset: usize) -> &str { + let offset = offset.min(content.len()); + let line_start = content[..offset].rfind('\n').map_or(0, |idx| idx + 1); + &content[line_start..offset] +} + +pub(crate) fn is_ident_continue(ch: char) -> bool { + ch == '_' || ch.is_ascii_alphanumeric() +} + +#[derive(Default)] +struct CompletionNames { + terminals: BTreeMap, + nonterminals: BTreeMap, +} + +impl CompletionNames { + fn from_args(args: &GrammarArgs, content: &str) -> Self { + let mut names = CompletionNames::default(); + let types = ResolvedTypes::from_args(args); + for (terminal, _) in &args.terminals { + let line = line_text_for_location(args, content, &terminal.location()); + names.terminals.insert( + terminal.value().clone(), + terminal_documentation(terminal.value(), &line, types.token_type.as_ref()), + ); + } + for rule in &args.rules { + let snippet = rule_definition_text(args, content, rule); + let documentation = nonterminal_documentation( + rule.name.value(), + &snippet, + types.nonterminals.get(rule.name.value()), + ); + names + .nonterminals + .entry(rule.name.value().clone()) + .and_modify(|existing| { + existing.push_str("\n\n---\n\n"); + existing.push_str(&documentation); + }) + .or_insert(documentation); + } + names + } + + fn from_text(content: &str) -> Self { + let mut names = CompletionNames::default(); + let grammar = content + .split_once("%%") + .map_or(content, |(_, grammar)| grammar); + + for raw_line in grammar.lines() { + let line = raw_line.trim_start(); + if let Some(rest) = line.strip_prefix("%token") { + if let Some(name) = first_ident(rest) { + names + .terminals + .insert(name.to_string(), terminal_documentation(name, line, None)); + } + continue; + } + + if line.starts_with('%') { + continue; + } + + if let Some(colon_idx) = line.find(':') { + let head = &line[..colon_idx]; + if let Some(name) = first_ident(head) { + names.nonterminals.insert( + name.to_string(), + nonterminal_documentation(name, line.trim(), None), + ); + } + } + } + + names + } +} + +#[derive(Default)] +struct ResolvedTypes { + token_type: Option, + nonterminals: BTreeMap, +} + +struct ResolvedRustType { + name: String, + boxed: bool, +} + +impl ResolvedTypes { + fn from_args(args: &GrammarArgs) -> Self { + let Ok(grammar) = Grammar::from_grammar_args(args.clone()) else { + return ResolvedTypes::default(); + }; + + let mut types = ResolvedTypes { + token_type: Some(resolved_rust_type( + Some(grammar.token_type()), + grammar.token_type_boxed(), + )), + nonterminals: BTreeMap::new(), + }; + for rule in &args.rules { + if let Some((rule_type, boxed)) = grammar.nonterminal_type(rule.name.value()) { + types.nonterminals.insert( + rule.name.value().clone(), + resolved_rust_type(rule_type, boxed), + ); + } + } + + types + } +} + +fn resolved_rust_type(ty: Option<&TokenStream>, boxed: bool) -> ResolvedRustType { + let name = ty + .map(TokenStream::to_string) + .filter(|ty| !ty.is_empty()) + .unwrap_or_else(|| "()".to_string()); + ResolvedRustType { name, boxed } +} + +pub(crate) fn line_text_for_location( + args: &GrammarArgs, + content: &str, + location: &Location, +) -> String { + let offset = args + .span_manager + .get_byterange(location) + .map_or(0, |range| range.start); + let start = content[..offset.min(content.len())] + .rfind('\n') + .map_or(0, |idx| idx + 1); + let end = content[offset.min(content.len())..] + .find('\n') + .map_or(content.len(), |idx| offset + idx); + content[start..end].trim().to_string() +} + +pub(crate) fn rule_definition_text( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, +) -> String { + let rule_start = args + .span_manager + .get_byterange(&rule.name.location()) + .map_or(0, |range| range.start); + let start = content[..rule_start.min(content.len())] + .rfind('\n') + .map_or(0, |idx| idx + 1); + let first_separator = rule.rule_lines.first().and_then(|line| { + args.span_manager + .get_byterange(&line.separator_location) + .map(|range| range.start) + }); + let header_end = first_separator.unwrap_or(rule_start).min(content.len()); + let header = content[start..header_end].trim(); + let mut definition = String::new(); + definition.push_str(header); + for (line_idx, line) in rule.rule_lines.iter().enumerate() { + let tokens = rule_line_tokens_text(args, content, line); + definition.push('\n'); + definition.push(' '); + definition.push(if line_idx == 0 { ':' } else { '|' }); + if !tokens.is_empty() { + definition.push(' '); + definition.push_str(&tokens); + } + } + definition.push_str("\n ;"); + definition +} + +fn first_ident(text: &str) -> Option<&str> { + let start = text.find(|ch: char| ch == '_' || ch.is_ascii_alphabetic())?; + let rest = &text[start..]; + let end = rest + .find(|ch: char| !(ch == '_' || ch.is_ascii_alphanumeric())) + .unwrap_or(rest.len()); + Some(&rest[..end]) +} + +fn rule_line_tokens_text( + args: &GrammarArgs, + content: &str, + line: &rusty_lr_parser::RuleLineArgs, +) -> String { + line.tokens + .iter() + .map(|(_, pattern)| { + let start = pattern_start(args, pattern); + let end = pattern_end(args, pattern); + content[start.min(content.len())..end.min(content.len())] + .trim() + .to_string() + }) + .collect::>() + .join(" ") +} + +#[derive(Default)] +struct LineVariables { + value_names: BTreeSet, + value_count: usize, +} + +fn variables_for_offset(args: &GrammarArgs, content: &str, offset: usize) -> LineVariables { + for rule in &args.rules { + for (line_idx, line) in rule.rule_lines.iter().enumerate() { + let start = args + .span_manager + .get_byterange(&line.separator_location) + .map_or(0, |range| range.start); + let end = rule_line_end(args, content, rule, line_idx); + if start <= offset && offset <= end { + let mut variables = LineVariables::default(); + for (mapped_name, pattern) in &line.tokens { + variables.value_count += 1; + if let Some(name) = mapped_name { + variables.value_names.insert(name.value().clone()); + } else { + collect_default_bindings(pattern, &mut variables.value_names); + } + } + return variables; + } + } + } + + LineVariables::default() +} + +fn rule_line_end( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, + line_idx: usize, +) -> usize { + if let Some(next_line) = rule.rule_lines.get(line_idx + 1) { + return args + .span_manager + .get_byterange(&next_line.separator_location) + .map_or(content.len(), |range| range.start); + } + + let mut end = args + .span_manager + .get_byterange(&rule.name.location()) + .map_or(0, |range| range.end); + for (_, pattern) in &rule.rule_lines[line_idx].tokens { + end = end.max(pattern_end(args, pattern)); + } + if let Some(action) = &rule.rule_lines[line_idx].reduce_action { + end = end.max(token_stream_end(action)); + } + + content[end.min(content.len())..] + .find(';') + .map_or(content.len(), |semi| end + semi) +} + +fn pattern_end(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.end), + PatternArgs::Plus { base, op_location } + | PatternArgs::Star { base, op_location } + | PatternArgs::Question { base, op_location } + | PatternArgs::Exclamation { base, op_location } => pattern_end(args, base).max( + args.span_manager + .get_byterange(op_location) + .map_or(0, |range| range.end), + ), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.end), + PatternArgs::Group { + alternatives, + close_location, + .. + } => alternatives + .iter() + .flatten() + .map(|pattern| pattern_end(args, pattern)) + .max() + .unwrap_or(0) + .max( + args.span_manager + .get_byterange(close_location) + .map_or(0, |range| range.end), + ), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Minus { base, exclude } => { + pattern_end(args, base).max(pattern_end(args, exclude)) + } + PatternArgs::Sep { + base, + delimiter, + location, + .. + } => pattern_end(args, base) + .max(pattern_end(args, delimiter)) + .max( + args.span_manager + .get_byterange(location) + .map_or(0, |range| range.end), + ), + } +} + +fn pattern_start(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.start), + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => pattern_start(args, base), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.start), + PatternArgs::Group { open_location, .. } => args + .span_manager + .get_byterange(open_location) + .map_or(0, |range| range.start), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Minus { base, .. } => pattern_start(args, base), + PatternArgs::Sep { location, .. } => args + .span_manager + .get_byterange(location) + .map_or(0, |range| range.start), + } +} + +fn token_stream_end(stream: &TokenStream) -> usize { + stream + .clone() + .into_iter() + .map(token_tree_end) + .max() + .unwrap_or(0) +} + +fn token_tree_end(token: TokenTree) -> usize { + match token { + TokenTree::Group(group) => token_stream_end(&group.stream()) + .max(group.span_close().byte_range().end) + .max(group.span_open().byte_range().end), + TokenTree::Ident(ident) => ident.span().byte_range().end, + TokenTree::Punct(punct) => punct.span().byte_range().end, + TokenTree::Literal(lit) => lit.span().byte_range().end, + } +} + +fn collect_default_bindings(pattern: &PatternArgs, names: &mut BTreeSet) { + match pattern { + PatternArgs::Ident(ident) => { + names.insert(ident.value().clone()); + } + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => { + collect_default_bindings(base, names); + } + PatternArgs::Minus { base, exclude } => { + collect_default_bindings(base, names); + collect_default_bindings(exclude, names); + } + PatternArgs::Sep { base, .. } => { + collect_default_bindings(base, names); + } + PatternArgs::Group { .. } + | PatternArgs::TerminalSet(_) + | PatternArgs::Byte(_) + | PatternArgs::ByteString(_) + | PatternArgs::Char(_) + | PatternArgs::String(_) => {} + } +} + +fn terminal_documentation( + name: &str, + definition: &str, + rust_type: Option<&ResolvedRustType>, +) -> String { + let type_line = type_line(rust_type); + format!( + "Terminal symbol `{name}`.\n\n{type_line}\n\nDefinition:\n\n```rustylr\n{definition}\n```\n\n[Token definition]({SYNTAX_URL}#token-definition-must-defined)" + ) +} + +fn nonterminal_documentation( + name: &str, + definition: &str, + rust_type: Option<&ResolvedRustType>, +) -> String { + let type_line = type_line(rust_type); + format!( + "Non-terminal symbol `{name}`.\n\n{type_line}\n\nDefinition:\n\n```rustylr\n{definition}\n```\n\n[Production rules]({SYNTAX_URL}#production-rules)" + ) +} + +fn type_line(rust_type: Option<&ResolvedRustType>) -> String { + match rust_type { + Some(rust_type) if rust_type.boxed => format!("Rust type: `{}` (boxed)", rust_type.name), + Some(rust_type) => format!("Rust type: `{}`", rust_type.name), + None => "Rust type: unavailable until the grammar parses successfully.".to_string(), + } +} + +pub(crate) fn keyword_documentation(label: &str) -> Option { + let documentation = match label { + "%token" => format!( + "Defines a terminal symbol and the Rust pattern that recognizes it.\n\nExample:\n\n```rustylr\n%token num Token::Num(_);\n```\n\n[Token definition]({SYNTAX_URL}#token-definition-must-defined)" + ), + "%start" => format!( + "Declares a start non-terminal for parser generation.\n\nExample:\n\n```rustylr\n%start Expr;\n```\n\n[Start symbol]({SYNTAX_URL}#start-symbol-must-defined)" + ), + "%tokentype" => format!( + "Sets the Rust type used as the parser's input terminal token type.\n\nExample:\n\n```rustylr\n%tokentype Token;\n```\n\n[Token type]({SYNTAX_URL}#token-type-must-defined)" + ), + "%userdata" => format!( + "Sets the mutable user-data type threaded through parser contexts and reduce actions.\n\nExample:\n\n```rustylr\n%userdata ParserState;\n```\n\n[Userdata type]({SYNTAX_URL}#userdata-type-optional)" + ), + "%error" | "%errortype" => format!( + "Sets the custom error type returned by reduce actions.\n\nExample:\n\n```rustylr\n%error String;\n```\n\n[Error type]({SYNTAX_URL}#error-type-optional)" + ), + "%location" => format!( + "Sets the source-location type used by `@...` location bindings.\n\nExample:\n\n```rustylr\n%location Span;\n```\n\n[Location tracking]({SYNTAX_URL}#location-tracking)" + ), + "%left" => format!( + "Declares left-associative operator precedence for one or more terminals.\n\nExample:\n\n```rustylr\n%left plus minus;\n```\n\n[Operator precedence]({SYNTAX_URL}#operator-precedence)" + ), + "%right" => format!( + "Declares right-associative operator precedence for one or more terminals.\n\nExample:\n\n```rustylr\n%right caret;\n```\n\n[Operator precedence]({SYNTAX_URL}#operator-precedence)" + ), + "%precedence" => format!( + "Declares precedence without associativity.\n\nExample:\n\n```rustylr\n%precedence unary_minus;\n```\n\n[Operator precedence]({SYNTAX_URL}#operator-precedence)" + ), + "%prec" => format!( + "Overrides the precedence of a specific production line.\n\nExample:\n\n```rustylr\nExpr : minus Expr %prec unary_minus {{ Expr }};\n```\n\n[Explicit precedence]({SYNTAX_URL}#explicit-precedence-prec)" + ), + "%dprec" => format!( + "Assigns a dynamic precedence priority to a production, mainly for GLR reduce/reduce control.\n\nExample:\n\n```rustylr\nExpr : Expr star Expr %dprec 2 {{ ... }};\n```\n\n[Rule priority]({SYNTAX_URL}#rule-priority)" + ), + "%glr" => format!( + "Enables Generalized LR parser generation for ambiguous grammars.\n\nExample:\n\n```rustylr\n%glr;\n```\n\n[GLR parser generation]({SYNTAX_URL}#glr-parser-generation)" + ), + "%lalr" => format!( + "Generates LALR(1) parsing tables instead of the default LR construction.\n\nExample:\n\n```rustylr\n%lalr;\n```\n\n[LALR parser generation]({SYNTAX_URL}#lalr-parser-generation)" + ), + "%nooptim" => format!( + "Disables parser table optimization.\n\nExample:\n\n```rustylr\n%nooptim;\n```\n\n[No optimization]({SYNTAX_URL}#no-optimization)" + ), + "%allow" => format!( + "Suppresses a RustyLR diagnostic globally or for a specific target.\n\nExample:\n\n```rustylr\n%allow unused_terminals(plus);\n```\n\n[Diagnostic suppression]({SYNTAX_URL}#diagnostic-suppression)" + ), + "%moduleprefix" => { + "Internal directive used by RustyLR's own generated parser code. Most grammars should not use this directly.".to_string() + } + "error" => format!( + "Reserved terminal used for panic-mode error recovery.\n\nExample:\n\n```rustylr\nBlock : lbrace error rbrace {{ recover() }};\n```\n\n[Panic-mode error recovery]({SYNTAX_URL}#panic-mode-error-recovery)" + ), + "$sep" => format!( + "Pattern helper for separated repetition.\n\nExample:\n\n```rustylr\nList : $sep(Item, comma, +) {{ Item }};\n```\n\n[Patterns]({SYNTAX_URL}#patterns)" + ), + "data" => format!( + "Mutable user-data binding available inside reduce actions.\n\nExample:\n\n```rustylr\nExpr : num {{ data.count += 1; num }};\n```\n\n[User data]({SYNTAX_URL}#4-user-data-data)" + ), + "lookahead" => format!( + "GLR reduce-action control binding for inspecting the next terminal.\n\nExample:\n\n```rustylr\nif let Some(term) = lookahead.to_term() {{ /* ... */ }}\n```\n\n[Advanced GLR reduce controls]({SYNTAX_URL}#advanced-glr-reduce-controls)" + ), + "shift" => format!( + "GLR reduce-action control binding used to allow or prune a shift branch.\n\nExample:\n\n```rustylr\n*shift = false;\n```\n\n[Advanced GLR reduce controls]({SYNTAX_URL}#advanced-glr-reduce-controls)" + ), + "auto" => "Table layout mode selected automatically by RustyLR.".to_string(), + "dense" => "Dense table layout mode.".to_string(), + "sparse" => "Sparse table layout mode.".to_string(), + _ => return None, + }; + Some(documentation) +} + +pub(crate) fn substitution_documentation(label: &str) -> Option { + let documentation = match label { + "$tokentype" => "`$tokentype` substitutes to the type defined by `%tokentype`.", + "$location" => "`$location` substitutes to the type defined by `%location`.", + "$userdata" => "`$userdata` substitutes to the type defined by `%userdata`.", + "$error" => "`$error` substitutes to the configured reduce-action error type.", + "$errortype" => "`$errortype` is an alias for the configured reduce-action error type.", + _ => return None, + }; + Some(format!( + "{documentation}\n\nExample:\n\n```rustylr\nRule($tokentype) : token {{ $tokentype }};\n```\n\n[Variable substitution]({SYNTAX_URL}#variable-substitution)" + )) +} + +pub(crate) fn location_documentation(label: &str) -> Option { + Some(format!( + "`{label}` refers to a source-location value in the current reduce action.\n\nExamples:\n\n```rustylr\nExpr : left=Expr plus right=Term {{ println!(\"{{:?}}\", @left); }};\nExpr : Expr plus Term {{ println!(\"{{:?}}\", @1); }};\nExpr : Term {{ println!(\"{{:?}}\", @$); }};\n```\n\n[Location tracking]({SYNTAX_URL}#location-tracking)" + )) +} + +pub(crate) fn allow_diagnostic_documentation(name: &str) -> Option { + Some(format!( + "Diagnostic suppression name `{name}`.\n\nExample:\n\n```rustylr\n%allow {name};\n%allow {name}(SomeTarget);\n```\n\n[Diagnostic suppression]({SYNTAX_URL}#diagnostic-suppression)" + )) +} + +pub(crate) fn markdown_documentation(value: String) -> Documentation { + Documentation::MarkupContent(MarkupContent { + kind: MarkupKind::Markdown, + value, + }) +} + +struct CompletionBuilder { + range: Range, + seen: BTreeSet, + items: Vec, +} + +impl CompletionBuilder { + fn new(range: Range) -> Self { + CompletionBuilder { + range, + seen: BTreeSet::new(), + items: Vec::new(), + } + } + + fn terminal(&mut self, label: &str, documentation: String) { + self.push( + label, + CompletionItemKind::ENUM_MEMBER, + "terminal symbol", + Some(documentation), + ); + } + + fn nonterminal(&mut self, label: &str, documentation: String) { + self.push( + label, + CompletionItemKind::CLASS, + "non-terminal symbol", + Some(documentation), + ); + } + + fn keyword(&mut self, label: &str, detail: &str, documentation: Option) { + self.push(label, CompletionItemKind::KEYWORD, detail, documentation); + } + + fn variable(&mut self, label: &str, detail: &str, documentation: Option) { + self.push(label, CompletionItemKind::VARIABLE, detail, documentation); + } + + fn push( + &mut self, + label: &str, + kind: CompletionItemKind, + detail: &str, + documentation: Option, + ) { + if !self.seen.insert(label.to_string()) { + return; + } + + self.items.push(CompletionItem { + label: label.to_string(), + kind: Some(kind), + detail: Some(detail.to_string()), + text_edit: Some(CompletionTextEdit::Edit(TextEdit { + range: self.range, + new_text: label.to_string(), + })), + documentation: documentation.map(markdown_documentation), + ..Default::default() + }); + } + + fn finish(self) -> Vec { + self.items + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype Token; +%start E; + +%token num Token::Num(_); +%token plus Token::Plus; + +E(i32) : left=E plus num { $ } + | num { num } + ; + +Boxed(box $tokentype) : num { num }; +"#; + + fn labels(response: CompletionResponse) -> BTreeSet { + match response { + CompletionResponse::Array(items) => items + .into_iter() + .map(|item| item.label) + .collect::>(), + _ => BTreeSet::new(), + } + } + + fn items(response: CompletionResponse) -> Vec { + match response { + CompletionResponse::Array(items) => items, + _ => Vec::new(), + } + } + + #[test] + fn completes_symbols() { + let pos = offset_to_position(MOCK_GRAMMAR, MOCK_GRAMMAR.find("plus num").unwrap()); + let labels = labels(completions(MOCK_GRAMMAR, pos)); + assert!(labels.contains("E")); + assert!(labels.contains("num")); + assert!(labels.contains("plus")); + assert!(labels.contains("error")); + } + + #[test] + fn completes_dollar_variables() { + let offset = MOCK_GRAMMAR.find("$ }").unwrap() + 1; + let labels = labels(completions( + MOCK_GRAMMAR, + offset_to_position(MOCK_GRAMMAR, offset), + )); + assert!(labels.contains("$tokentype")); + assert!(labels.contains("$E")); + assert!(labels.contains("$num")); + assert!(labels.contains("$left")); + assert!(labels.contains("$1")); + } + + #[test] + fn completes_directives() { + let content = "%%\n%"; + let labels = labels(completions(content, Position::new(1, 1))); + assert!(labels.contains("%token")); + assert!(labels.contains("%start")); + } + + #[test] + fn completion_items_include_markdown_documentation() { + let pos = offset_to_position(MOCK_GRAMMAR, MOCK_GRAMMAR.find("plus num").unwrap()); + let items = items(completions(MOCK_GRAMMAR, pos)); + + let terminal = items.iter().find(|item| item.label == "plus").unwrap(); + let markup = markdown_value(terminal); + assert!(markup.contains("Rust type: `Token`")); + assert!(markup.contains("%token plus Token::Plus;")); + + let nonterminal = items.iter().find(|item| item.label == "E").unwrap(); + let markup = markdown_value(nonterminal); + assert!(markup.contains("Rust type: `i32`")); + assert!(markup.contains("E(i32)")); + assert!(markup.contains("E(i32)\n : E plus num")); + assert!(markup.contains("E plus num")); + assert!(!markup.contains("left=E")); + assert!(markup.contains("\n | num")); + assert!(!markup.contains("{ $ }")); + assert!(!markup.contains("{ num }")); + + let boxed = items.iter().find(|item| item.label == "Boxed").unwrap(); + let markup = markdown_value(boxed); + assert!(markup.contains("Rust type: `Token` (boxed)")); + assert!(markup.contains("Boxed(box $tokentype)")); + assert!(!markup.contains("{ num }")); + } + + fn markdown_value(item: &CompletionItem) -> &str { + let documentation = item.documentation.as_ref().unwrap(); + let Documentation::MarkupContent(markup) = documentation else { + panic!("expected markdown documentation"); + }; + &markup.value + } +} diff --git a/rusty_lr_lsp/src/diagnostics.rs b/rusty_lr_lsp/src/diagnostics.rs new file mode 100644 index 00000000..9a52bd17 --- /dev/null +++ b/rusty_lr_lsp/src/diagnostics.rs @@ -0,0 +1,285 @@ +use lsp_types::{Diagnostic, DiagnosticSeverity, Range}; +use proc_macro2::{Spacing, TokenStream, TokenTree}; +use rusty_lr_parser::grammar::Grammar; +use serde_json::json; +use std::str::FromStr; + +use crate::position::range_to_lsp_range; + +/// Splits a TokenStream by the `%%` separator. +pub fn split_stream(token_stream: TokenStream) -> Result<(TokenStream, TokenStream), ()> { + let mut token_stream = token_stream.into_iter().peekable(); + let mut output_stream = TokenStream::new(); + + while let Some(token) = token_stream.next() { + if let TokenTree::Punct(token) = &token { + if token.as_char() == '%' && token.spacing() == Spacing::Joint { + if let Some(TokenTree::Punct(next)) = token_stream.peek() { + if next.as_char() == '%' && next.spacing() == Spacing::Alone { + token_stream.next(); + let macro_stream: TokenStream = token_stream.collect(); + return Ok((output_stream, macro_stream)); + } + } + } + } + output_stream.extend(std::iter::once(token)); + } + Err(()) +} + +/// Runs the compiler's parser/builder pipeline on the given file content and gathers all diagnostics. +pub fn compile_and_get_diagnostics(content: &str) -> Vec { + // 1. Parse TokenStream from content + let token_stream = match TokenStream::from_str(content) { + Ok(ts) => ts, + Err(e) => { + let range = e.span().byte_range(); + return vec![Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("Lexing/parsing error: {}", e), + related_information: None, + tags: None, + data: None, + }]; + } + }; + + // 2. Split into Rust code and grammar sections + let (_, macro_stream) = match split_stream(token_stream) { + Ok(res) => res, + Err(_) => { + return vec![Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: "Cannot find `%%` to separate the Rust code and the grammar parts" + .to_string(), + related_information: None, + tags: None, + data: None, + }]; + } + }; + + // 3. Parse grammar arguments + let grammar_args = match Grammar::parse_args(macro_stream) { + Ok(args) => args, + Err((e, sm)) => { + let location = e.location(); + let range = sm.get_byterange(&location).unwrap_or(0..0); + return vec![Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: e.short_message(), + related_information: None, + tags: None, + data: None, + }]; + } + }; + + // 4. Collect recovered parser errors + let mut diagnostics = Vec::new(); + for error in &grammar_args.error_recovered { + let range = grammar_args + .span_manager + .get_byterange(&error.location) + .unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("{} (refer to: {})", error.message, error.link), + related_information: None, + tags: None, + data: None, + }); + } + + if !grammar_args.error_recovered.is_empty() { + return diagnostics; + } + + let span_manager = grammar_args.span_manager.clone(); + + // 5. Run arg validation + if let Err(e) = Grammar::arg_check_error(&grammar_args) { + let msg = e.short_message(); + for loc in e.locations() { + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg.clone(), + related_information: None, + tags: None, + data: None, + }); + } + return diagnostics; + } + + // 6. Build the Grammar structure + let mut grammar = match Grammar::from_grammar_args(grammar_args) { + Ok(g) => g, + Err(e) => { + let msg = e.short_message(); + for loc in e.locations() { + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg.clone(), + related_information: None, + tags: None, + data: None, + }); + } + return diagnostics; + } + }; + + if grammar.optimize { + grammar.optimize(25); + } + grammar.builder = grammar.create_builder(); + + // 7. Verify Shift/Reduce and Reduce/Reduce conflicts in non-GLR mode + let diags_collector = grammar.build_grammar(); + if !grammar.glr { + // Shift/Reduce conflicts + for ((term, shift_rules, _), reduce_rules) in diags_collector.shift_reduce_conflicts { + let term_str = grammar.class_pretty_name_list(term, 5); + let message = format!( + "Shift/Reduce conflict detected with terminal(class): {}", + term_str + ); + + for shift_rule in shift_rules { + if let Some((nonterm, local_rule)) = + grammar.get_rule_by_id(shift_rule.production_idx) + { + let loc = nonterm.rules[local_rule].location(); + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("(Shift) {}", message), + related_information: None, + tags: None, + data: None, + }); + } + } + for (reduce_rule, _) in reduce_rules { + if let Some((nonterm, local_rule)) = grammar.get_rule_by_id(reduce_rule) { + let loc = nonterm.rules[local_rule].location(); + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("(Reduce) {}", message), + related_information: None, + tags: None, + data: None, + }); + } + } + } + + // Reduce/Reduce conflicts + for (reduce_rules, reduce_terms) in diags_collector.reduce_reduce_conflicts { + let mut terms = Vec::new(); + for term in reduce_terms { + terms.push(grammar.class_pretty_name_list(term, 5)); + } + let message = format!( + "Reduce/Reduce conflict detected with terminals: {}", + terms.join(", ") + ); + + for (reduce_rule, _) in reduce_rules { + if let Some((nonterm, local_rule)) = grammar.get_rule_by_id(reduce_rule) { + let loc = nonterm.rules[local_rule].location(); + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: message.clone(), + related_information: None, + tags: None, + data: None, + }); + } + } + } + } + + // 8. Collect Warnings + for warning in &grammar.warnings { + if grammar.is_warning_allowed(warning) { + continue; + } + let msg = warning.short_message(&grammar); + let locs = warning.locations(); + if locs.is_empty() { + let sep_idx = content.find("%%").unwrap_or(0); + let range = sep_idx..(sep_idx + 2); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg, + related_information: None, + tags: None, + data: Some(json!({ "rustylr_allow": warning.suggestion(&grammar) })), + }); + } else { + for loc in locs { + let range = span_manager.get_byterange(&loc).unwrap_or(0..0); + diagnostics.push(Diagnostic { + range: range_to_lsp_range(content, range), + severity: Some(DiagnosticSeverity::WARNING), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: msg.clone(), + related_information: None, + tags: None, + data: Some(json!({ "rustylr_allow": warning.suggestion(&grammar) })), + }); + } + } + } + + diagnostics +} diff --git a/rusty_lr_lsp/src/formatter.rs b/rusty_lr_lsp/src/formatter.rs new file mode 100644 index 00000000..d04315f2 --- /dev/null +++ b/rusty_lr_lsp/src/formatter.rs @@ -0,0 +1,859 @@ +use lsp_types::TextEdit; +use proc_macro2::{TokenStream, TokenTree}; +use rusty_lr_parser::{GrammarArgs, PatternArgs}; +use std::ops::Range; + +use crate::completion; +use crate::position::range_to_lsp_range; + +const RULE_INDENT: &str = " "; +const ACTION_INNER_INDENT: &str = " "; + +pub fn formatting(content: &str) -> Vec { + let Ok(args) = completion::parse_args(content) else { + return Vec::new(); + }; + + let mut edits = Vec::new(); + edits.extend(directive_edits(content)); + edits.extend(rule_edits(&args, content)); + edits +} + +fn directive_edits(content: &str) -> Vec { + let Some(grammar_start) = content.find("%%").map(|idx| idx + 2) else { + return Vec::new(); + }; + + let comments = comment_ranges(content); + let mut edits = Vec::new(); + let mut offset = line_start(content, grammar_start); + while offset < content.len() { + let current_line_end = line_end(content, offset); + if offset >= grammar_start { + let line_prefix = &content[offset..current_line_end]; + let leading = line_prefix.len() - line_prefix.trim_start().len(); + let directive_start = offset + leading; + if content[directive_start..current_line_end].starts_with('%') { + if let Some((range_end, new_text)) = + format_directive_block(content, directive_start, &comments) + { + edits.push(TextEdit { + range: range_to_lsp_range(content, offset..range_end), + new_text, + }); + offset = content[range_end..] + .find('\n') + .map_or(content.len(), |idx| range_end + idx + 1); + continue; + } + } + } + offset = content[current_line_end..] + .find('\n') + .map_or(content.len(), |idx| current_line_end + idx + 1); + } + edits +} + +fn format_directive_block( + content: &str, + start: usize, + comments: &[Range], +) -> Option<(usize, String)> { + let semicolon = find_directive_semicolon(content, start)?; + if range_has_comment(comments, start..semicolon) { + return None; + } + + let range_end = line_end(content, semicolon + 1); + let directive = &content[start..semicolon]; + let trailing = content[semicolon + 1..range_end].trim_end(); + let mut formatted = normalize_directive_spacing(directive); + formatted.push(';'); + if !trailing.is_empty() { + formatted.push_str(trailing); + } + + (formatted != content[line_start(content, start)..range_end]).then_some((range_end, formatted)) +} + +fn normalize_directive_spacing(directive: &str) -> String { + let mut result = String::new(); + let mut pending_space = false; + let trimmed = directive.trim(); + let mut chars = trimmed.char_indices(); + let mut quote = None; + let mut escaped = false; + + while let Some((idx, ch)) = chars.next() { + if let Some(quote_ch) = quote { + result.push(ch); + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote_ch { + quote = None; + } + continue; + } + + if ch == '"' || (ch == '\'' && is_single_quote_literal_start(trimmed, idx)) { + if pending_space && !result.is_empty() { + result.push(' '); + pending_space = false; + } + result.push(ch); + quote = Some(ch); + } else if ch.is_whitespace() { + pending_space = true; + } else { + if pending_space && !result.is_empty() { + result.push(' '); + pending_space = false; + } + result.push(ch); + } + } + + result +} + +fn find_directive_semicolon(content: &str, start: usize) -> Option { + let mut quote = None; + let mut escaped = false; + let mut paren_depth = 0usize; + let mut bracket_depth = 0usize; + let mut brace_depth = 0usize; + + let remaining = &content[start..]; + let mut iter = remaining.char_indices().peekable(); + while let Some((relative_idx, ch)) = iter.next() { + if let Some(quote_ch) = quote { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote_ch { + quote = None; + } + continue; + } + + match ch { + '"' => quote = Some(ch), + '\'' if is_single_quote_literal_start(remaining, relative_idx) => quote = Some(ch), + '/' => match iter.peek().copied() { + Some((_, '/')) => { + iter.next(); + while let Some((_, next_ch)) = iter.peek() { + if *next_ch == '\n' || *next_ch == '\r' { + break; + } + iter.next(); + } + } + Some((_, '*')) => { + iter.next(); + while let Some((_, next_ch)) = iter.next() { + if next_ch == '*' { + if let Some((_, '/')) = iter.peek() { + iter.next(); + break; + } + } + } + } + _ => {} + }, + '(' => paren_depth += 1, + ')' => paren_depth = paren_depth.saturating_sub(1), + '[' => bracket_depth += 1, + ']' => bracket_depth = bracket_depth.saturating_sub(1), + '{' => brace_depth += 1, + '}' => brace_depth = brace_depth.saturating_sub(1), + ';' if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => { + return Some(start + relative_idx); + } + _ => {} + } + } + None +} + +fn is_single_quote_literal_start(text: &str, quote_idx: usize) -> bool { + let mut escaped = false; + for (relative_idx, ch) in text[quote_idx + 1..].char_indices() { + if ch == '\n' || ch == '\r' { + return false; + } + + if escaped { + escaped = false; + continue; + } + if ch == '\\' { + escaped = true; + continue; + } + if ch == '\'' { + let close_end = quote_idx + 1 + relative_idx + ch.len_utf8(); + return match text[close_end..].chars().next() { + Some(next) => !matches!(next, '_' | 'a'..='z' | 'A'..='Z' | '0'..='9'), + None => true, + }; + } + } + + false +} + +fn rule_edits(args: &GrammarArgs, content: &str) -> Vec { + let comments = comment_ranges(content); + args.rules + .iter() + .filter_map(|rule| { + let rule_start = args + .span_manager + .get_byterange(&rule.name.location())? + .start; + let start = line_start(content, rule_start); + let first_separator = rule.rule_lines.first().and_then(|line| { + args.span_manager + .get_byterange(&line.separator_location) + .map(|range| range.start) + })?; + let header = content[start..first_separator.min(content.len())].trim(); + + let mut formatted = String::new(); + formatted.push_str(header); + for (line_idx, line) in rule.rule_lines.iter().enumerate() { + let tokens = line_tokens_text(args, content, line); + let modifiers = line_modifiers_text(args, content, rule, line_idx); + let action = line + .reduce_action + .as_ref() + .and_then(|action| token_stream_text(content, action)); + + formatted.push('\n'); + formatted.push_str(RULE_INDENT); + formatted.push(if line_idx == 0 { ':' } else { '|' }); + if !tokens.is_empty() { + formatted.push(' '); + formatted.push_str(&tokens); + } + if !modifiers.is_empty() { + formatted.push(' '); + formatted.push_str(&modifiers); + } + if let Some(action) = action { + formatted.push(' '); + formatted.push_str(&format_reduce_action(&action)); + } + } + formatted.push('\n'); + formatted.push_str(RULE_INDENT); + formatted.push(';'); + + let end = rule_block_end(args, content, rule)?; + let action_ranges = rule + .rule_lines + .iter() + .filter_map(|line| line.reduce_action.as_ref().and_then(token_stream_range)) + .collect::>(); + if has_comment_outside_ranges(&comments, start..end, &action_ranges) { + return None; + } + + Some(TextEdit { + range: range_to_lsp_range(content, start..end), + new_text: formatted, + }) + }) + .collect() +} + +fn format_reduce_action(action: &str) -> String { + let trimmed = action.trim(); + if !trimmed.contains('\n') { + return trimmed.to_string(); + } + + let lines = trimmed.lines().collect::>(); + let last_non_empty = lines.iter().rposition(|line| !line.trim().is_empty()); + let body_indent = lines + .iter() + .enumerate() + .filter(|(idx, line)| *idx != 0 && Some(*idx) != last_non_empty && !line.trim().is_empty()) + .map(|(_, line)| leading_indent_len(line)) + .min() + .unwrap_or(0); + + lines + .iter() + .enumerate() + .map(|(idx, line)| { + if idx == 0 { + line.trim_end().to_string() + } else if line.trim().is_empty() { + String::new() + } else if Some(idx) == last_non_empty && line.trim_start().starts_with('}') { + format!("{RULE_INDENT}{}", line.trim_start()) + } else { + format!("{ACTION_INNER_INDENT}{}", strip_indent(line, body_indent)) + } + }) + .collect::>() + .join("\n") +} + +fn leading_indent_len(line: &str) -> usize { + line.char_indices() + .find_map(|(idx, ch)| (!matches!(ch, ' ' | '\t')).then_some(idx)) + .unwrap_or(line.len()) +} + +fn strip_indent(line: &str, indent: usize) -> &str { + if leading_indent_len(line) >= indent { + &line[indent..] + } else { + line.trim_start() + } +} + +fn comment_ranges(content: &str) -> Vec> { + let mut ranges = Vec::new(); + let mut iter = content.char_indices().peekable(); + let mut quote = None; + let mut escaped = false; + + while let Some((idx, ch)) = iter.next() { + if let Some(quote_ch) = quote { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote_ch { + quote = None; + } + continue; + } + + match ch { + '"' => quote = Some(ch), + '\'' if is_single_quote_literal_start(content, idx) => quote = Some(ch), + '/' => match iter.peek().copied() { + Some((next_idx, '/')) => { + iter.next(); + let end = content[next_idx + 1..] + .find('\n') + .map_or(content.len(), |line_end| next_idx + 1 + line_end); + ranges.push(idx..end); + while let Some((comment_idx, _)) = iter.peek().copied() { + if comment_idx >= end { + break; + } + iter.next(); + } + } + Some((_, '*')) => { + iter.next(); + let end = content[idx + 2..] + .find("*/") + .map_or(content.len(), |comment_end| idx + 2 + comment_end + 2); + ranges.push(idx..end); + while let Some((comment_idx, _)) = iter.peek().copied() { + if comment_idx >= end { + break; + } + iter.next(); + } + } + _ => {} + }, + _ => {} + } + } + + ranges +} + +fn range_has_comment(comments: &[Range], range: Range) -> bool { + comments + .iter() + .any(|comment| ranges_overlap(comment, &range)) +} + +fn has_comment_outside_ranges( + comments: &[Range], + outer: Range, + allowed: &[Range], +) -> bool { + comments + .iter() + .filter(|comment| ranges_overlap(comment, &outer)) + .any(|comment| { + !allowed + .iter() + .any(|allowed_range| range_contains(allowed_range, comment)) + }) +} + +fn ranges_overlap(left: &Range, right: &Range) -> bool { + left.start < right.end && right.start < left.end +} + +fn range_contains(outer: &Range, inner: &Range) -> bool { + outer.start <= inner.start && inner.end <= outer.end +} + +fn line_tokens_text( + args: &GrammarArgs, + content: &str, + line: &rusty_lr_parser::RuleLineArgs, +) -> String { + line.tokens + .iter() + .map(|(mapped_name, pattern)| { + let start = mapped_name + .as_ref() + .and_then(|name| { + args.span_manager + .get_byterange(&name.location()) + .map(|range| range.start) + }) + .unwrap_or_else(|| pattern_start(args, pattern)); + let end = pattern_end(args, pattern); + content[start.min(content.len())..end.min(content.len())] + .trim() + .to_string() + }) + .collect::>() + .join(" ") +} + +fn line_modifiers_text( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, + line_idx: usize, +) -> String { + let line = &rule.rule_lines[line_idx]; + let start = line + .tokens + .iter() + .map(|(_, pattern)| pattern_end(args, pattern)) + .max() + .unwrap_or_else(|| { + args.span_manager + .get_byterange(&line.separator_location) + .map_or(0, |range| range.end) + }); + let end = line + .reduce_action + .as_ref() + .and_then(|action| token_stream_range(action).map(|range| range.start)) + .unwrap_or_else(|| rule_line_end(args, content, rule, line_idx)); + + content[start.min(content.len())..end.min(content.len())] + .trim() + .to_string() +} + +fn rule_block_end( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, +) -> Option { + let last_line_idx = rule.rule_lines.len().checked_sub(1)?; + let end_hint = rule_line_end(args, content, rule, last_line_idx); + let semicolon = content[end_hint.min(content.len())..].find(';')?; + Some(line_end(content, end_hint + semicolon + 1)) +} + +fn rule_line_end( + args: &GrammarArgs, + content: &str, + rule: &rusty_lr_parser::RuleDefArgs, + line_idx: usize, +) -> usize { + if let Some(next_line) = rule.rule_lines.get(line_idx + 1) { + return args + .span_manager + .get_byterange(&next_line.separator_location) + .map_or(content.len(), |range| range.start); + } + + let mut end = args + .span_manager + .get_byterange(&rule.name.location()) + .map_or(0, |range| range.end); + for (_, pattern) in &rule.rule_lines[line_idx].tokens { + end = end.max(pattern_end(args, pattern)); + } + if let Some(action) = &rule.rule_lines[line_idx].reduce_action { + if let Some(range) = token_stream_range(action) { + end = end.max(range.end); + } + } + + content[end.min(content.len())..] + .find(';') + .map_or(content.len(), |semi| end + semi) +} + +fn pattern_end(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.end), + PatternArgs::Plus { base, op_location } + | PatternArgs::Star { base, op_location } + | PatternArgs::Question { base, op_location } + | PatternArgs::Exclamation { base, op_location } => pattern_end(args, base).max( + args.span_manager + .get_byterange(op_location) + .map_or(0, |range| range.end), + ), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.end), + PatternArgs::Group { + alternatives, + close_location, + .. + } => alternatives + .iter() + .flatten() + .map(|pattern| pattern_end(args, pattern)) + .max() + .unwrap_or(0) + .max( + args.span_manager + .get_byterange(close_location) + .map_or(0, |range| range.end), + ), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.end), + PatternArgs::Minus { base, exclude } => { + pattern_end(args, base).max(pattern_end(args, exclude)) + } + PatternArgs::Sep { + base, + delimiter, + location, + .. + } => pattern_end(args, base) + .max(pattern_end(args, delimiter)) + .max( + args.span_manager + .get_byterange(location) + .map_or(0, |range| range.end), + ), + } +} + +fn pattern_start(args: &GrammarArgs, pattern: &PatternArgs) -> usize { + match pattern { + PatternArgs::Ident(ident) => args + .span_manager + .get_byterange(&ident.location()) + .map_or(0, |range| range.start), + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => pattern_start(args, base), + PatternArgs::TerminalSet(set) => args + .span_manager + .get_byterange(&set.location()) + .map_or(0, |range| range.start), + PatternArgs::Group { open_location, .. } => args + .span_manager + .get_byterange(open_location) + .map_or(0, |range| range.start), + PatternArgs::Byte(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::ByteString(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Char(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::String(lit) => args + .span_manager + .get_byterange(&lit.location()) + .map_or(0, |range| range.start), + PatternArgs::Minus { base, .. } => pattern_start(args, base), + PatternArgs::Sep { location, .. } => args + .span_manager + .get_byterange(location) + .map_or(0, |range| range.start), + } +} + +fn token_stream_text(content: &str, stream: &TokenStream) -> Option { + let range = token_stream_range(stream)?; + content + .get(range.start.min(content.len())..range.end.min(content.len())) + .map(str::trim) + .filter(|text| !text.is_empty()) + .map(str::to_string) +} + +fn token_stream_range(stream: &TokenStream) -> Option> { + let mut start = usize::MAX; + let mut end = 0; + for token in stream.clone() { + let range = token_tree_range(token); + start = start.min(range.start); + end = end.max(range.end); + } + if start == usize::MAX { + None + } else { + Some(start..end) + } +} + +fn token_tree_range(token: TokenTree) -> std::ops::Range { + match token { + TokenTree::Group(group) => { + let open = group.span_open().byte_range(); + let close = group.span_close().byte_range(); + let inner = token_stream_range(&group.stream()); + let start = inner + .as_ref() + .map_or(open.start, |range| range.start) + .min(open.start); + let end = inner + .as_ref() + .map_or(close.end, |range| range.end) + .max(close.end); + start..end + } + TokenTree::Ident(ident) => ident.span().byte_range(), + TokenTree::Punct(punct) => punct.span().byte_range(), + TokenTree::Literal(lit) => lit.span().byte_range(), + } +} + +fn line_start(content: &str, offset: usize) -> usize { + content[..offset.min(content.len())] + .rfind('\n') + .map_or(0, |idx| idx + 1) +} + +fn line_end(content: &str, offset: usize) -> usize { + content[offset.min(content.len())..] + .find('\n') + .map_or(content.len(), |idx| offset + idx) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::position::position_to_offset; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype + Token; +%start E; +%userdata + ParserState; +%allow + unused_terminals([ + 'a'-'z' + '+' + ]); +%left plus "spaced literal"; +%token n Token::Num(_); +%token plus Token::Plus; + +E(i32):left=E plus n { left } +| n { + n +} +; +"#; + + #[test] + fn formats_tokens_and_productions() { + let edits = formatting(MOCK_GRAMMAR); + let formatted = apply_edits(MOCK_GRAMMAR, edits); + + assert!(formatted.contains("%token n Token::Num(_);")); + assert!(formatted.contains("%token plus Token::Plus;")); + assert!(formatted.contains("%tokentype Token;")); + assert!(formatted.contains("%start E;")); + assert!(formatted.contains("%userdata ParserState;")); + assert!(formatted.contains("%allow unused_terminals([ 'a'-'z' '+' ]);")); + assert!(formatted.contains("%left plus \"spaced literal\";")); + assert!(formatted + .contains("E(i32)\n : left=E plus n { left }\n | n {\n n\n }\n ;")); + } + + #[test] + fn indents_reduce_action_body_one_level_deeper() { + let edits = formatting(MOCK_GRAMMAR); + let formatted = apply_edits(MOCK_GRAMMAR, edits); + assert!(formatted.contains("{\n n\n }")); + } + + #[test] + fn reindents_reduce_action_as_a_whole_block() { + let action = r#"{ + if n > 0 { + n + } else { + 0 + } + }"#; + + assert_eq!( + format_reduce_action(action), + "{\n if n > 0 {\n n\n } else {\n 0\n }\n }" + ); + } + + #[test] + fn formats_multiline_directive_as_one_line() { + let content = "%%\n%tokentype\n [u8; 32];\n%userdata\n &'a str;\n%start\n E;\n"; + let formatted = apply_edits(content, directive_edits(content)); + + assert!(formatted.contains("%tokentype [u8; 32];")); + assert!(formatted.contains("%userdata &'a str;")); + assert!(formatted.contains("%start E;")); + } + + #[test] + fn skips_rule_formatting_when_grammar_comment_would_be_lost() { + let content = r#" +#[derive(Debug, Clone)] +pub enum Token { A } + +%% + +%tokentype Token; +%token a Token::A; + +Rule(i32): a { 1 } +// | a { 2 } +; +"#; + let formatted = apply_edits(content, formatting(content)); + + assert!(formatted.contains("Rule(i32): a { 1 }\n// | a { 2 }\n;")); + } + + #[test] + fn formats_reduce_action_comments_inside_action_range() { + let content = r#" +#[derive(Debug, Clone)] +pub enum Token { A } + +%% + +%tokentype Token; +%token a Token::A; + +Rule(i32): a { + // keep this comment + 1 +} +; +"#; + let formatted = apply_edits(content, formatting(content)); + + assert!(formatted.contains( + "Rule(i32)\n : a {\n // keep this comment\n 1\n }\n ;" + )); + } + + #[test] + fn skips_multiline_directive_with_inline_comment() { + let content = "%%\n%tokentype\n // token type comment\n Token;\n"; + let formatted = apply_edits(content, directive_edits(content)); + + assert!(formatted.contains("%tokentype\n // token type comment\n Token;")); + } + + #[test] + fn formats_directive_with_comments_containing_semicolons() { + // Single-line comment with semicolon + let content1 = "%%\n%token num Token::Num(_); // comment; here\n"; + let formatted1 = apply_edits(content1, formatting(content1)); + assert_eq!( + formatted1, + "%%\n%token num Token::Num(_); // comment; here\n" + ); + + // Multi-line block comment with semicolon + let content2 = "%%\n%token num Token::Num(_) /* comment; here */ ;\n"; + let formatted2 = apply_edits(content2, formatting(content2)); + assert_eq!( + formatted2, + "%%\n%token num Token::Num(_) /* comment; here */ ;\n" + ); + } + + #[test] + fn preserves_comments_in_parser_grammar_fixture() { + let content = include_str!("../../rusty_lr_parser/src/parser/parser.rustylr"); + let formatted = apply_edits(content, formatting(content)); + + assert!(formatted.contains("// | Pattern error {")); + assert!(formatted.contains("// Pattern")); + assert!(crate::completion::parse_args(&formatted).is_ok()); + } + + fn apply_edits(content: &str, edits: Vec) -> String { + let mut edits = edits + .into_iter() + .map(|edit| { + let start = position_to_offset(content, edit.range.start); + let end = position_to_offset(content, edit.range.end); + (start, end, edit.new_text) + }) + .collect::>(); + edits.sort_by_key(|(start, _, _)| *start); + + let mut result = String::new(); + let mut cursor = 0; + for (start, end, new_text) in edits { + result.push_str(&content[cursor..start]); + result.push_str(&new_text); + cursor = end; + } + result.push_str(&content[cursor..]); + result + } +} diff --git a/rusty_lr_lsp/src/goto_definition.rs b/rusty_lr_lsp/src/goto_definition.rs new file mode 100644 index 00000000..eceab5ae --- /dev/null +++ b/rusty_lr_lsp/src/goto_definition.rs @@ -0,0 +1,282 @@ +use lsp_types::{Position, Range}; +use proc_macro2::TokenStream; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{ + GrammarArgs, IdentOrLiteral, Located, PatternArgs, PrecDPrecArgs, TerminalSetItem, +}; +use std::str::FromStr; + +use crate::diagnostics::split_stream; +use crate::position::{position_to_offset, range_to_lsp_range}; + +/// Traverses the AST of GrammarArgs to collect all Located instances. +fn collect_located(args: &GrammarArgs) -> Vec> { + let mut collected = Vec::new(); + + // 1. %start names + for start_name in &args.start_rule_name { + collected.push(start_name.clone()); + } + + // 2. %token definitions + for (t_name, _) in &args.terminals { + collected.push(t_name.clone()); + } + + // 3. Precedence definitions + for (_, _, items) in &args.precedences { + for item in items { + if let IdentOrLiteral::Ident(ident) = item { + collected.push(ident.clone()); + } + } + } + + // 4. %allow diagnostics names + for (allow_name, _) in &args.allowed_diagnostics { + collected.push(allow_name.clone()); + } + + // 5. Rule definitions + for rule in &args.rules { + collected.push(rule.name.clone()); + for line in &rule.rule_lines { + for (opt_loc, pattern) in &line.tokens { + if let Some(loc) = opt_loc { + collected.push(loc.clone()); + } + collect_pattern_located(pattern, &mut collected); + } + // %prec identifiers + for prec in &line.precs { + if let PrecDPrecArgs::Prec(IdentOrLiteral::Ident(ident)) = prec { + collected.push(ident.clone()); + } + } + } + } + + collected +} + +/// Recursively traverses a PatternArgs structure to collect Located instances. +fn collect_pattern_located(pattern: &PatternArgs, collected: &mut Vec>) { + match pattern { + PatternArgs::Ident(ident) => { + collected.push(ident.clone()); + } + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => { + collect_pattern_located(base, collected); + } + PatternArgs::TerminalSet(ts) => { + for item in &ts.items { + match item { + TerminalSetItem::Terminal(ident) => { + collected.push(ident.clone()); + } + TerminalSetItem::Range(first, last) => { + collected.push(first.clone()); + collected.push(last.clone()); + } + _ => {} + } + } + } + PatternArgs::Group { alternatives, .. } => { + for alt in alternatives { + for pat in alt { + collect_pattern_located(pat, collected); + } + } + } + PatternArgs::Minus { base, exclude } => { + collect_pattern_located(base, collected); + collect_pattern_located(exclude, collected); + } + PatternArgs::Sep { + base, delimiter, .. + } => { + collect_pattern_located(base, collected); + collect_pattern_located(delimiter, collected); + } + _ => {} + } +} + +/// Locates the definition of the symbol under the cursor. +pub fn find_definition(content: &str, target_pos: Position) -> Option { + let offset = position_to_offset(content, target_pos); + + // Parse the entire document into TokenStream + let token_stream = TokenStream::from_str(content).ok()?; + let (_, macro_stream) = split_stream(token_stream).ok()?; + let grammar_args = Grammar::parse_args(macro_stream).ok()?; + let span_manager = grammar_args.span_manager.clone(); + + // Collect all located identifier strings in the AST + let all_located = collect_located(&grammar_args); + + // Find the one that contains the click offset + let clicked = all_located.iter().find(|loc| { + if let Some(range) = span_manager.get_byterange(&loc.location()) { + range.contains(&offset) + } else { + false + } + })?; + + // Look up the definition by name + let name = clicked.value(); + + // 1. Check rule definitions + if let Some(rule) = grammar_args.rules.iter().find(|r| r.name.value == *name) { + let def_range = span_manager.get_byterange(&rule.name.location())?; + return Some(range_to_lsp_range(content, def_range)); + } + + // 2. Check token definitions + if let Some((t_name, _)) = grammar_args + .terminals + .iter() + .find(|(t, _)| t.value == *name) + { + let def_range = span_manager.get_byterange(&t_name.location())?; + return Some(range_to_lsp_range(content, def_range)); + } + + // 3. Check precedence definitions + for (_, _, items) in &grammar_args.precedences { + for item in items { + if let IdentOrLiteral::Ident(ident) = item { + if ident.value() == name { + let def_range = span_manager.get_byterange(&ident.location())?; + return Some(range_to_lsp_range(content, def_range)); + } + } + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype Token; +%start E; + +%token num Token::Num(_); +%token plus Token::Plus; + +E(_) : E plus num { 0 } + | num { 0 } + ; +"#; + + const MOCK_GRAMMAR_WITH_ERROR: &str = r#" +%% +%start E; +E : num plus error ; +"#; + + #[test] + fn test_split_stream() { + let ts = TokenStream::from_str(MOCK_GRAMMAR).unwrap(); + let (output, macro_stream) = split_stream(ts).unwrap(); + + let output_str = output.to_string(); + let macro_str = macro_stream.to_string(); + + assert!(output_str.contains("enum Token")); + assert!(macro_str.contains("tokentype")); + assert!(macro_str.contains("start E")); + } + + #[test] + fn test_diagnostics() { + // Test valid grammar diagnostics (should be empty or only warnings about unused tokens/etc if any) + let diags = crate::diagnostics::compile_and_get_diagnostics(MOCK_GRAMMAR); + // Under normal circumstances, MOCK_GRAMMAR is valid + for diag in &diags { + eprintln!("Diag: {:?}", diag.message); + } + + // Test invalid grammar diagnostics + let diags_err = crate::diagnostics::compile_and_get_diagnostics(MOCK_GRAMMAR_WITH_ERROR); + assert!(!diags_err.is_empty()); + assert!(diags_err + .iter() + .any(|d| d.message.contains("not defined") || d.message.contains("error"))); + } + + #[test] + fn test_goto_definition() { + // Find position of the 'plus' reference in rule "E : E plus num" + // Let's search for "plus num" inside the string + let index = MOCK_GRAMMAR.find("plus num").unwrap(); + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let def_range = find_definition(MOCK_GRAMMAR, pos).unwrap(); + + // The definition should point to "%token plus Token::Plus;" + let def_offset = crate::position::position_to_offset(MOCK_GRAMMAR, def_range.start); + let def_substring = &MOCK_GRAMMAR[def_offset..]; + assert!(def_substring.starts_with("plus")); + + // It should be on the line "%token plus Token::Plus;" + let token_def_index = MOCK_GRAMMAR.find("%token plus").unwrap(); + let expected_start_pos = + crate::position::offset_to_position(MOCK_GRAMMAR, token_def_index + 7); // start of 'plus' + assert_eq!(def_range.start, expected_start_pos); + } + + #[test] + fn test_goto_definition_prec() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), +} + +%% + +%tokentype Token; +%start E; + +%precedence empty_action; +%token num Token::Num(_); + +E(_) : num + | %prec empty_action { 0 } + ; +"#; + + // Click on 'empty_action' after '%prec' + let index = grammar.find("%prec empty_action").unwrap() + 6; // start of 'empty_action' + let pos = crate::position::offset_to_position(grammar, index); + + let def_range = find_definition(grammar, pos).unwrap(); + + // The definition should point to '%precedence empty_action;' + let def_offset = crate::position::position_to_offset(grammar, def_range.start); + let def_substring = &grammar[def_offset..]; + assert!(def_substring.starts_with("empty_action")); + + let prec_def_index = grammar.find("%precedence empty_action").unwrap(); + let expected_start_pos = crate::position::offset_to_position(grammar, prec_def_index + 12); // start of 'empty_action' + assert_eq!(def_range.start, expected_start_pos); + } +} diff --git a/rusty_lr_lsp/src/hover.rs b/rusty_lr_lsp/src/hover.rs new file mode 100644 index 00000000..47acf394 --- /dev/null +++ b/rusty_lr_lsp/src/hover.rs @@ -0,0 +1,995 @@ +use lsp_types::{Hover, HoverContents, MarkupContent, MarkupKind, Position}; +use proc_macro2::TokenStream; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::terminal_info::TerminalName; +use rusty_lr_parser::{GrammarArgs, PatternArgs, TerminalSetItem}; +use std::collections::BTreeSet; +use std::ops::Range as ByteRange; + +use crate::completion::{ + self, ALLOW_DIAGNOSTICS, DIRECTIVES, KEYWORDS, SUBSTITUTION_VARIABLES, SYNTAX_URL, +}; +use crate::position::position_to_offset; + +pub fn hover(content: &str, position: Position) -> Option { + let offset = position_to_offset(content, position); + let parsed = completion::parse_args(content).ok(); + + if let Some(args) = &parsed { + if let Some((brace_range, doc)) = reduce_action_brace_at_offset(args, content, offset) { + return Some(markdown_hover(content, doc, Some(brace_range))); + } + + if let Some((pattern, range)) = pattern_at_offset(args, offset) { + return Some(markdown_hover( + content, + pattern_documentation(args, pattern, content), + Some(range), + )); + } + } + + let word = hover_word(content, offset)?; + let mut documentation = None; + + if let Some(args) = &parsed { + let mut assoc_type = ""; + let mut declaration_items = Vec::new(); + let mut found_prec = false; + for (_, assoc, items) in &args.precedences { + if items.iter().any(|item| item.to_string() == word) { + assoc_type = match assoc { + Some(rusty_lr_core::production::Associativity::Left) => "%left", + Some(rusty_lr_core::production::Associativity::Right) => "%right", + None => "%precedence", + }; + declaration_items = items.iter().map(|i| i.to_string()).collect(); + found_prec = true; + break; + } + } + + if found_prec { + documentation = Some(format!( + "### Precedence Symbol `{}`\n\nDeclared via:\n```rustylr\n{} {};\n```", + word, + assoc_type, + declaration_items.join(" ") + )); + } + } + + if documentation.is_none() { + if word == "data" { + let mut userdata_type = "()".to_string(); + let mut definition_info = "".to_string(); + + if let Some(args) = &parsed { + if let Some((_, ts)) = args.userdata_typename.first() { + userdata_type = ts.to_string(); + definition_info = format!( + "\n\nDefinition:\n```rustylr\n%userdata {};\n```", + userdata_type + ); + } + } + + documentation = Some(format!( + "### `data: &mut {}`{}\n\nMutable user-data binding available inside reduce actions.\n\nExample:\n\n```rustylr\nExpr : num {{ data.count += 1; num }};\n```\n\n[User data]({}#4-user-data-data)", + userdata_type, + definition_info, + SYNTAX_URL + )); + } else { + documentation = hover_word_documentation(&word); + } + } + + let documentation = documentation?; + Some(markdown_hover(content, documentation, None)) +} + +fn markdown_hover(content: &str, value: String, range: Option>) -> Hover { + Hover { + contents: HoverContents::Markup(MarkupContent { + kind: MarkupKind::Markdown, + value, + }), + range: range.map(|range| crate::position::range_to_lsp_range(content, range)), + } +} + +fn reduce_action_brace_at_offset( + args: &GrammarArgs, + content: &str, + offset: usize, +) -> Option<(ByteRange, String)> { + for rule in &args.rules { + for line in &rule.rule_lines { + if let Some(reduce_action) = &line.reduce_action { + if let Some(proc_macro2::TokenTree::Group(group)) = + reduce_action.clone().into_iter().next() + { + if group.delimiter() == proc_macro2::Delimiter::Brace { + let action_range = group.span().byte_range(); + + // Check start brace(s) + if action_range.start < content.len() + && content.as_bytes()[action_range.start] == b'{' + { + let start_brace_end = if action_range.start + 1 < action_range.end + && content.as_bytes()[action_range.start + 1] == b'{' + { + action_range.start + 2 + } else { + action_range.start + 1 + }; + let start_brace_range = action_range.start..start_brace_end; + if start_brace_range.contains(&offset) { + return Some((start_brace_range, reduce_action_documentation())); + } + } + + // Check end brace(s) + if action_range.end > action_range.start + && action_range.end <= content.len() + { + if content.as_bytes()[action_range.end - 1] == b'}' { + let end_brace_start = if action_range.end - 2 >= action_range.start + && content.as_bytes()[action_range.end - 2] == b'}' + { + action_range.end - 2 + } else { + action_range.end - 1 + }; + let end_brace_range = end_brace_start..action_range.end; + if end_brace_range.contains(&offset) { + return Some((end_brace_range, reduce_action_documentation())); + } + } + } + } + } + } + } + } + None +} + +fn reduce_action_documentation() -> String { + format!( + "### Reduce Action\n\nA block of Rust code executed when this production rule is reduced.\n\n[Reduce Actions]({}#reduceaction-optional)", + SYNTAX_URL + ) +} + +fn hover_word(content: &str, offset: usize) -> Option { + let mut offset = offset.min(content.len()); + if offset < content.len() { + let ch = content[offset..].chars().next()?; + if ch == '@' || ch == '$' || ch == '%' { + offset += ch.len_utf8(); + } + } + + let start = completion::current_prefix_start(content, offset, true); + let mut end = offset; + while end < content.len() { + let ch = content[end..].chars().next()?; + if completion::is_ident_continue(ch) { + end += ch.len_utf8(); + } else if ch == '$' && &content[start..end] == "@" { + end += ch.len_utf8(); + break; + } else if ch == '$' && &content[start..end] == "$" { + end += ch.len_utf8(); + break; + } else { + break; + } + } + if start == end { + return None; + } + Some(content[start..end].to_string()) +} + +fn hover_word_documentation(word: &str) -> Option { + if DIRECTIVES.contains(&word) || KEYWORDS.contains(&word) { + return completion::keyword_documentation(word); + } + if SUBSTITUTION_VARIABLES.contains(&word) { + return completion::substitution_documentation(word); + } + if word.starts_with('@') { + return completion::location_documentation(word); + } + if ALLOW_DIAGNOSTICS.contains(&word) { + return completion::allow_diagnostic_documentation(word); + } + None +} + +fn pattern_at_offset( + args: &GrammarArgs, + offset: usize, +) -> Option<(&PatternArgs, ByteRange)> { + for rule in &args.rules { + for line in &rule.rule_lines { + for (_, pattern) in &line.tokens { + if let Some(range) = args.span_manager.get_byterange(&pattern.location()) { + if range.contains(&offset) { + return Some((pattern, range)); + } + } + } + } + } + None +} + +fn pattern_documentation(args: &GrammarArgs, pattern: &PatternArgs, content: &str) -> String { + if let Some(documentation) = identifier_pattern_documentation(args, pattern, content) { + return documentation; + } + + let pattern_text = pattern_text(args, pattern, content); + let grammar = Grammar::from_grammar_args(args.clone()).ok(); + let pattern_type = grammar + .as_ref() + .and_then(|grammar| pattern_type(args, grammar, pattern)); + let type_line = hover_type_line(pattern_type.as_ref()); + let subterms = grammar + .as_ref() + .map(|grammar| subterm_documentation(args, grammar, pattern, content)) + .unwrap_or_default(); + let keyword = pattern_keyword_documentation(pattern); + + let mut documentation = format!("Pattern `{pattern_text}`.\n\n{type_line}"); + if !subterms.is_empty() { + documentation.push_str("\n\n"); + documentation.push_str(&subterms); + } + if let Some(keyword) = keyword { + documentation.push_str("\n\n---\n\n"); + documentation.push_str(&keyword); + } + documentation.push_str(&format!("\n\n[Patterns]({SYNTAX_URL}#patterns)")); + documentation +} + +fn identifier_pattern_documentation( + args: &GrammarArgs, + pattern: &PatternArgs, + content: &str, +) -> Option { + let PatternArgs::Ident(ident) = pattern else { + return None; + }; + + let grammar = Grammar::from_grammar_args(args.clone()).ok()?; + nonterminal_symbol_documentation(args, &grammar, content, ident.value()) + .or_else(|| terminal_symbol_documentation(args, &grammar, content, ident.value())) + .or_else(|| pattern_keyword_documentation(pattern)) +} + +fn pattern_text(args: &GrammarArgs, pattern: &PatternArgs, content: &str) -> String { + args.span_manager + .get_byterange(&pattern.location()) + .and_then(|range| content.get(range)) + .map(str::trim) + .filter(|text| !text.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| pattern.to_string()) +} + +fn subterm_documentation( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, + content: &str, +) -> String { + let mut seen_nonterminals = BTreeSet::new(); + let mut seen_terminals = BTreeSet::new(); + let mut symbols = Vec::new(); + collect_symbol_documentation( + args, + grammar, + pattern, + content, + &mut seen_nonterminals, + &mut seen_terminals, + &mut symbols, + ); + + let mut seen_syntax = BTreeSet::new(); + let mut syntax = Vec::new(); + collect_pattern_syntax(pattern, &mut seen_syntax, &mut syntax); + + let mut sections = Vec::new(); + if !symbols.is_empty() { + sections.push(format!("Identifiers:\n\n{}", symbols.join("\n\n"))); + } + if !syntax.is_empty() { + sections.push(format!("Pattern syntax:\n\n{}", syntax.join("\n"))); + } + sections.join("\n\n") +} + +fn collect_symbol_documentation( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, + content: &str, + seen_nonterminals: &mut BTreeSet, + seen_terminals: &mut BTreeSet, + symbols: &mut Vec, +) { + match pattern { + PatternArgs::Ident(ident) => { + let name = ident.value(); + if let Some(symbol) = nonterminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_nonterminals.insert(name.clone())) + { + symbols.push(symbol); + } else if let Some(symbol) = terminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_terminals.insert(name.clone())) + { + symbols.push(symbol); + } + } + PatternArgs::TerminalSet(terminal_set) => { + for item in &terminal_set.items { + match item { + TerminalSetItem::Terminal(ident) => { + let name = ident.value(); + if let Some(symbol) = + terminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_terminals.insert(name.clone())) + { + symbols.push(symbol); + } + } + TerminalSetItem::Range(first, last) => { + for ident in [first, last] { + let name = ident.value(); + if let Some(symbol) = + terminal_symbol_documentation(args, grammar, content, name) + .filter(|_| seen_terminals.insert(name.clone())) + { + symbols.push(symbol); + } + } + } + TerminalSetItem::Byte(_) + | TerminalSetItem::ByteRange(_, _) + | TerminalSetItem::Char(_) + | TerminalSetItem::CharRange(_, _) => {} + } + } + } + _ => {} + } + + for child in pattern_children(pattern) { + collect_symbol_documentation( + args, + grammar, + child, + content, + seen_nonterminals, + seen_terminals, + symbols, + ); + } +} + +fn nonterminal_symbol_documentation( + args: &GrammarArgs, + grammar: &Grammar, + content: &str, + name: &str, +) -> Option { + let rule = args.rules.iter().find(|rule| rule.name.value() == name)?; + let (ty, boxed) = grammar.nonterminal_type(name)?; + let type_line = hover_type_line(rust_type(ty, boxed).as_ref()); + let definition = completion::rule_definition_text(args, content, rule); + Some(format!( + "**Non-terminal `{name}`**\n\n{type_line}\n\nDefinition:\n\n{}", + definition_code_block(&definition) + )) +} + +fn terminal_symbol_documentation( + args: &GrammarArgs, + grammar: &Grammar, + content: &str, + name: &str, +) -> Option { + let (terminal, _) = args + .terminals + .iter() + .find(|(terminal, _)| terminal.value() == name)?; + let type_line = hover_type_line(Some(&token_type(grammar))); + let definition = completion::line_text_for_location(args, content, &terminal.location()); + Some(format!( + "**Terminal `{name}`**\n\n{type_line}\n\nDefinition:\n\n{}", + definition_code_block(&definition) + )) +} + +fn definition_code_block(definition: &str) -> String { + format!("```rustylr\n{definition}\n```") +} + +fn collect_pattern_syntax( + pattern: &PatternArgs, + seen: &mut BTreeSet<&'static str>, + syntax: &mut Vec, +) { + if let Some(label) = pattern_syntax_label(pattern) { + if seen.insert(label) { + syntax.push(format!( + "- `{label}`: {}", + pattern_syntax_documentation(pattern) + )); + } + } + + for child in pattern_children(pattern) { + collect_pattern_syntax(child, seen, syntax); + } +} + +fn pattern_syntax_label(pattern: &PatternArgs) -> Option<&'static str> { + match pattern { + PatternArgs::Plus { .. } => Some("A+"), + PatternArgs::Star { .. } => Some("A*"), + PatternArgs::Question { .. } => Some("A?"), + PatternArgs::Exclamation { .. } => Some("A!"), + PatternArgs::TerminalSet(_) => Some("[...]"), + PatternArgs::Group { .. } => Some("(...)"), + PatternArgs::Minus { .. } => Some("A - B"), + PatternArgs::Sep { .. } => Some("$sep(A, Sep, ...)"), + PatternArgs::Ident(_) + | PatternArgs::Byte(_) + | PatternArgs::ByteString(_) + | PatternArgs::Char(_) + | PatternArgs::String(_) => None, + } +} + +fn pattern_children(pattern: &PatternArgs) -> Vec<&PatternArgs> { + match pattern { + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => vec![base.as_ref()], + PatternArgs::Group { alternatives, .. } => alternatives.iter().flatten().collect(), + PatternArgs::Minus { base, exclude } => vec![base.as_ref(), exclude.as_ref()], + PatternArgs::Sep { + base, delimiter, .. + } => vec![base.as_ref(), delimiter.as_ref()], + PatternArgs::Ident(_) + | PatternArgs::TerminalSet(_) + | PatternArgs::Byte(_) + | PatternArgs::ByteString(_) + | PatternArgs::Char(_) + | PatternArgs::String(_) => Vec::new(), + } +} + +fn pattern_syntax_documentation(pattern: &PatternArgs) -> String { + match pattern { + PatternArgs::Ident(_) => { + "Identifier pattern. It references a terminal or non-terminal symbol.".to_string() + } + PatternArgs::Plus { .. } => { + "`A+` matches one or more repetitions of `A` and collects valued matches into a `Vec`." + .to_string() + } + PatternArgs::Star { .. } => { + "`A*` matches zero or more repetitions of `A` and collects valued matches into a `Vec`." + .to_string() + } + PatternArgs::Question { .. } => { + "`A?` matches zero or one `A` and maps valued matches to `Option`.".to_string() + } + PatternArgs::Exclamation { .. } => { + "`A!` matches `A` but discards its semantic value from the production.".to_string() + } + PatternArgs::TerminalSet(_) => { + "Terminal set pattern. It matches one terminal from the set.".to_string() + } + PatternArgs::Group { .. } => { + "Grouped pattern. Alternatives are matched as a nested pattern; valued children are returned as a single value or tuple." + .to_string() + } + PatternArgs::Byte(_) => { + "Byte literal pattern. It is available when `%tokentype` is `u8`.".to_string() + } + PatternArgs::ByteString(_) => { + "Byte string literal pattern. It expands to a sequence of byte terminals.".to_string() + } + PatternArgs::Char(_) => { + "Character literal pattern. It is available when `%tokentype` is `char`.".to_string() + } + PatternArgs::String(_) => { + "String literal pattern. It expands to a sequence of character terminals.".to_string() + } + PatternArgs::Minus { .. } => { + "`A - B` matches terminals in `A` excluding terminals in `B`.".to_string() + } + PatternArgs::Sep { at_least_one, .. } => { + let quantifier = if *at_least_one { + "one or more" + } else { + "zero or more" + }; + format!( + "`$sep(A, Sep, ...)` matches {quantifier} `A` patterns separated by `Sep` and collects valued `A` matches into a `Vec`." + ) + } + } +} + +fn pattern_keyword_documentation(pattern: &PatternArgs) -> Option { + match pattern { + PatternArgs::Ident(ident) if ident.value() == "error" => { + completion::keyword_documentation("error") + } + PatternArgs::Sep { .. } => completion::keyword_documentation("$sep"), + _ => None, + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct HoverRustType { + name: String, + boxed: bool, +} + +pub(crate) fn pattern_final_type( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, +) -> String { + hover_type_name(pattern_type(args, grammar, pattern).as_ref()) +} + +fn hover_type_line(ty: Option<&HoverRustType>) -> String { + match ty { + Some(ty) if ty.boxed => format!("Final type: `{}` (boxed)", ty.name), + Some(ty) => format!("Final type: `{}`", ty.name), + None => "Final type: `()`".to_string(), + } +} + +fn hover_type_name(ty: Option<&HoverRustType>) -> String { + match ty { + Some(ty) if ty.boxed => format!("{} (boxed)", ty.name), + Some(ty) => ty.name.clone(), + None => "()".to_string(), + } +} + +pub(crate) fn pattern_type( + args: &GrammarArgs, + grammar: &Grammar, + pattern: &PatternArgs, +) -> Option { + match pattern { + PatternArgs::Ident(ident) => { + if ident.value() == "error" { + return None; + } + if grammar + .terminals_index + .contains_key(&TerminalName::Ident(ident.value().clone())) + { + return Some(token_type(grammar)); + } + let (ty, boxed) = grammar.nonterminal_type(ident.value())?; + rust_type(ty, boxed) + } + PatternArgs::Plus { base, .. } | PatternArgs::Star { base, .. } => { + let base_type = pattern_type(args, grammar, base)?; + Some(HoverRustType { + name: format!("Vec<{}>", base_type.name), + boxed: false, + }) + } + PatternArgs::Question { base, .. } => { + let base_type = pattern_type(args, grammar, base)?; + Some(HoverRustType { + name: format!("Option<{}>", base_type.name), + boxed: false, + }) + } + PatternArgs::Exclamation { .. } => None, + PatternArgs::TerminalSet(_) | PatternArgs::Byte(_) | PatternArgs::Char(_) => { + Some(token_type(grammar)) + } + PatternArgs::ByteString(_) => Some(HoverRustType { + name: "&'static [u8]".to_string(), + boxed: false, + }), + PatternArgs::String(_) => Some(HoverRustType { + name: "&'static str".to_string(), + boxed: false, + }), + PatternArgs::Group { alternatives, .. } => group_type(args, grammar, alternatives), + PatternArgs::Minus { .. } => Some(token_type(grammar)), + PatternArgs::Sep { base, .. } => { + let base_type = pattern_type(args, grammar, base)?; + Some(HoverRustType { + name: format!("Vec<{}>", base_type.name), + boxed: false, + }) + } + } +} + +fn group_type( + args: &GrammarArgs, + grammar: &Grammar, + alternatives: &[Vec], +) -> Option { + let mut alternatives = alternatives + .iter() + .map(|alternative| alternative_type(args, grammar, alternative)); + let first = alternatives.next()?; + if alternatives.all(|ty| ty == first) { + first + } else { + None + } +} + +fn alternative_type( + args: &GrammarArgs, + grammar: &Grammar, + alternative: &[PatternArgs], +) -> Option { + let child_types = alternative + .iter() + .filter_map(|pattern| pattern_type(args, grammar, pattern)) + .collect::>(); + match child_types.len() { + 0 => None, + 1 => child_types.into_iter().next(), + _ => Some(HoverRustType { + name: format!( + "({})", + child_types + .iter() + .map(|ty| format!("{},", ty.name)) + .collect::>() + .join(" ") + ), + boxed: false, + }), + } +} + +fn token_type(grammar: &Grammar) -> HoverRustType { + rust_type(Some(grammar.token_type()), grammar.token_type_boxed()).unwrap() +} + +fn rust_type(ty: Option<&TokenStream>, boxed: bool) -> Option { + let name = ty.map(TokenStream::to_string).filter(|ty| !ty.is_empty())?; + Some(HoverRustType { name, boxed }) +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Comma, +} + +%% + +%tokentype Token; +%start List; + +%token num Token::Num(_); +%token plus Token::Plus; +%token comma Token::Comma; + +E(i32) : value=num { 0 }; +List(Vec) : $sep(E, comma, +) { E }; +"#; + + #[test] + fn hovers_keyword() { + let offset = MOCK_GRAMMAR.find("%token num").unwrap() + 1; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Defines a terminal symbol")); + } + + #[test] + fn hovers_identifier_pattern_with_type() { + let offset = MOCK_GRAMMAR.find("E(i32) : value=num").unwrap(); + let offset = MOCK_GRAMMAR[offset..].find("num").unwrap() + offset; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Final type: `Token`")); + assert!(markup.value.contains("**Terminal `num`**")); + assert!(markup + .value + .contains("```rustylr\n%token num Token::Num(_);\n```")); + assert!(!markup.value.contains("Pattern `num`")); + assert!(!markup.value.contains("Identifiers:")); + assert!(!markup.value.contains("Identifier pattern")); + } + + #[test] + fn hovers_sep_pattern_with_vec_type_and_keyword_details() { + let offset = MOCK_GRAMMAR.find("$sep").unwrap() + 1; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Pattern `$sep(E, comma, +)`")); + assert!(markup.value.contains("Final type: `Vec`")); + assert!(markup.value.contains("Identifiers:")); + assert!(markup.value.contains("**Non-terminal `E`**")); + assert!(markup.value.contains("```rustylr\nE(i32)\n : num\n ;\n```")); + assert!(!markup.value.contains("value=num")); + assert!(markup.value.contains("**Terminal `comma`**")); + assert!(markup + .value + .contains("```rustylr\n%token comma Token::Comma;\n```")); + assert!(markup.value.contains("Pattern syntax:")); + assert!(markup.value.contains("- `$sep(A, Sep, ...)`:")); + assert_eq!(markup.value.matches("**Non-terminal `E`**").count(), 1); + assert_eq!(markup.value.matches("**Terminal `comma`**").count(), 1); + assert!(markup + .value + .contains("Pattern helper for separated repetition")); + } + + #[test] + fn hovers_whole_pattern_when_cursor_is_on_inner_symbol() { + let sep_offset = MOCK_GRAMMAR.find("$sep").unwrap(); + let offset = MOCK_GRAMMAR[sep_offset..].find("comma").unwrap() + sep_offset + 1; + let hover = hover( + MOCK_GRAMMAR, + crate::position::offset_to_position(MOCK_GRAMMAR, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Pattern `$sep(E, comma, +)`")); + assert!(markup.value.contains("Final type: `Vec`")); + assert!(markup.value.contains("Identifiers:")); + assert!(markup.value.contains("**Non-terminal `E`**")); + assert!(markup.value.contains("**Terminal `comma`**")); + assert!(markup.value.contains("Pattern syntax:")); + assert!(markup + .value + .contains("Pattern helper for separated repetition")); + } + + #[test] + fn hovers_data_with_userdata_type() { + let grammar_with_userdata = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32) } +%% +%userdata MyCoolData; +%tokentype Token; +%start Expr; +%token num Token::Num(_); +Expr : num { *data += 1; 0 }; +"#; + let offset = grammar_with_userdata.find("*data").unwrap() + 1; // points to 'd' in 'data' + let hover = hover( + grammar_with_userdata, + crate::position::offset_to_position(grammar_with_userdata, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("data: &mut MyCoolData")); + assert!(markup.value.contains("%userdata MyCoolData;")); + } + + #[test] + fn hovers_sigils() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32) } +%% +%userdata MyCoolData; +%tokentype Token; +%start Expr; +%token num Token::Num(_); +Expr : num { println!("{:?}, {:?}", @1, @$); 0 }; +"#; + // Hover on '@' of '@1' + let offset = grammar.find("@1").unwrap(); + let hover1 = hover( + grammar, + crate::position::offset_to_position(grammar, offset), + ) + .unwrap(); + let HoverContents::Markup(markup1) = hover1.contents else { + panic!("expected markup hover"); + }; + assert!(markup1.value.contains("`@1` refers to a source-location")); + + // Hover on '@' of '@$' + let offset = grammar.find("@$").unwrap(); + let hover2 = hover( + grammar, + crate::position::offset_to_position(grammar, offset), + ) + .unwrap(); + let HoverContents::Markup(markup2) = hover2.contents else { + panic!("expected markup hover"); + }; + assert!(markup2.value.contains("`@$` refers to a source-location")); + } + + #[test] + fn hovers_precedence_symbol() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32) } +%% +%userdata MyCoolData; +%tokentype Token; +%start Expr; +%left plus minus; +%token num Token::Num(_); +Expr : Expr plus Expr + | num %prec minus + ; +"#; + let offset = grammar.find("minus").unwrap(); + let hover_res = hover( + grammar, + crate::position::offset_to_position(grammar, offset), + ) + .unwrap(); + let HoverContents::Markup(markup) = hover_res.contents else { + panic!("expected markup hover"); + }; + assert!(markup.value.contains("Precedence Symbol `minus`")); + assert!(markup.value.contains("```rustylr\n%left plus minus;\n```")); + } + + #[test] + fn hovers_reduce_action_braces() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { Num(i32), Plus } +%% +%tokentype Token; +%start Expr; +%token num Token::Num(_); +%token plus Token::Plus; +Expr : num { 0 } + | num plus num {{ 0 }} + ; +"#; + + // 1. Single brace start hover + let start_brace_offset = grammar.find("{ 0 }").unwrap(); + let hover_start = hover( + grammar, + crate::position::offset_to_position(grammar, start_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_start) = hover_start.contents else { + panic!("expected markup hover"); + }; + assert!(markup_start.value.contains("### Reduce Action")); + assert!(markup_start + .value + .contains("A block of Rust code executed when this production rule is reduced")); + assert!(markup_start.value.contains("#reduceaction-optional")); + assert_eq!( + hover_start.range.unwrap(), + crate::position::range_to_lsp_range( + grammar, + start_brace_offset..start_brace_offset + 1 + ) + ); + + // 2. Single brace end hover + let end_brace_offset = start_brace_offset + 4; // points to '}' of '{ 0 }' + let hover_end = hover( + grammar, + crate::position::offset_to_position(grammar, end_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_end) = hover_end.contents else { + panic!("expected markup hover"); + }; + assert!(markup_end.value.contains("### Reduce Action")); + assert_eq!( + hover_end.range.unwrap(), + crate::position::range_to_lsp_range(grammar, end_brace_offset..end_brace_offset + 1) + ); + + // 3. Double brace start hover (first brace) + let dstart_brace_offset = grammar.find("{{ 0 }}").unwrap(); + let hover_dstart1 = hover( + grammar, + crate::position::offset_to_position(grammar, dstart_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_dstart1) = hover_dstart1.contents else { + panic!("expected markup hover"); + }; + assert!(markup_dstart1.value.contains("### Reduce Action")); + assert_eq!( + hover_dstart1.range.unwrap(), + crate::position::range_to_lsp_range( + grammar, + dstart_brace_offset..dstart_brace_offset + 2 + ) + ); + + // 4. Double brace start hover (second brace) + let hover_dstart2 = hover( + grammar, + crate::position::offset_to_position(grammar, dstart_brace_offset + 1), + ) + .unwrap(); + let HoverContents::Markup(markup_dstart2) = hover_dstart2.contents else { + panic!("expected markup hover"); + }; + assert!(markup_dstart2.value.contains("### Reduce Action")); + assert_eq!( + hover_dstart2.range.unwrap(), + crate::position::range_to_lsp_range( + grammar, + dstart_brace_offset..dstart_brace_offset + 2 + ) + ); + + // 5. Double brace end hover (first of closing braces) + let dend_brace_offset = grammar.find("}}").unwrap(); + let hover_dend1 = hover( + grammar, + crate::position::offset_to_position(grammar, dend_brace_offset), + ) + .unwrap(); + let HoverContents::Markup(markup_dend1) = hover_dend1.contents else { + panic!("expected markup hover"); + }; + assert!(markup_dend1.value.contains("### Reduce Action")); + assert_eq!( + hover_dend1.range.unwrap(), + crate::position::range_to_lsp_range(grammar, dend_brace_offset..dend_brace_offset + 2) + ); + } +} diff --git a/rusty_lr_lsp/src/inlay_hint.rs b/rusty_lr_lsp/src/inlay_hint.rs new file mode 100644 index 00000000..65cdf0bc --- /dev/null +++ b/rusty_lr_lsp/src/inlay_hint.rs @@ -0,0 +1,203 @@ +use lsp_types::{InlayHint, InlayHintKind, InlayHintLabel, Range}; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{GrammarArgs, PatternArgs}; + +use crate::completion; +use crate::hover; +use crate::position::{offset_to_position, position_to_offset}; + +pub fn inlay_hints(content: &str, range: Range) -> Vec { + let Ok(args) = completion::parse_args(content) else { + return Vec::new(); + }; + let Ok(grammar) = Grammar::from_grammar_args(args.clone()) else { + return Vec::new(); + }; + + let range_start = position_to_offset(content, range.start); + let range_end = position_to_offset(content, range.end); + let mut hints = Vec::new(); + + for rule in &args.rules { + for line in &rule.rule_lines { + for (_, pattern) in &line.tokens { + let Some(pattern_range) = args.span_manager.get_byterange(&pattern.location()) + else { + continue; + }; + if !ranges_overlap( + pattern_range.start, + pattern_range.end, + range_start, + range_end, + ) { + continue; + } + + hints.push(pattern_inlay_hint(&args, &grammar, content, pattern)); + } + + if let Some(reduce_action) = &line.reduce_action { + if let Some(proc_macro2::TokenTree::Group(group)) = + reduce_action.clone().into_iter().next() + { + if group.delimiter() == proc_macro2::Delimiter::Brace { + let action_range = group.span().byte_range(); + if ranges_overlap( + action_range.start, + action_range.end, + range_start, + range_end, + ) { + hints.push(InlayHint { + position: offset_to_position(content, action_range.start), + label: InlayHintLabel::String("ReduceAction".to_string()), + kind: None, + text_edits: None, + tooltip: Some(lsp_types::InlayHintTooltip::MarkupContent(lsp_types::MarkupContent { + kind: lsp_types::MarkupKind::Markdown, + value: format!( + "A block of Rust code executed when this production rule is reduced.\n\n[Reduce Actions]({}#reduceaction-optional)", + completion::SYNTAX_URL + ), + })), + padding_left: Some(true), + padding_right: Some(true), + data: None, + }); + } + } + } + } + } + } + + hints +} + +fn pattern_inlay_hint( + args: &GrammarArgs, + grammar: &Grammar, + content: &str, + pattern: &PatternArgs, +) -> InlayHint { + let end = args + .span_manager + .get_byterange(&pattern.location()) + .map_or(0, |range| range.end); + let final_type = hover::pattern_final_type(args, grammar, pattern); + InlayHint { + position: offset_to_position(content, end), + label: InlayHintLabel::String(format!(": {final_type}")), + kind: Some(InlayHintKind::TYPE), + text_edits: None, + tooltip: None, + padding_left: Some(true), + padding_right: None, + data: None, + } +} + +fn ranges_overlap(start_a: usize, end_a: usize, start_b: usize, end_b: usize) -> bool { + start_a <= end_b && start_b <= end_a +} + +#[cfg(test)] +mod tests { + use super::*; + use lsp_types::Position; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Comma, +} + +%% + +%tokentype Token; +%start List; + +%token num Token::Num(_); +%token plus Token::Plus; +%token comma Token::Comma; + +E(i32) : left=E plus num { 0 } + | num { 0 } + ; +List(Vec) : $sep(E, comma, +) { E }; +"#; + + #[test] + fn hints_top_level_patterns_in_rule_definitions() { + let hints = inlay_hints( + MOCK_GRAMMAR, + Range::new(Position::new(0, 0), Position::new(100, 0)), + ); + let labels = hints + .iter() + .map(|hint| match &hint.label { + InlayHintLabel::String(label) => label.as_str(), + InlayHintLabel::LabelParts(_) => "", + }) + .collect::>(); + + assert!(labels.contains(&": i32")); + assert!(labels.contains(&": Token")); + assert!(labels.contains(&": Vec")); + } + + #[test] + fn does_not_hint_nested_sep_children_separately() { + let sep_line_start = MOCK_GRAMMAR.find("List(Vec)").unwrap(); + let sep_line_end = MOCK_GRAMMAR[sep_line_start..].find(';').unwrap() + sep_line_start; + let hints = inlay_hints( + MOCK_GRAMMAR, + Range::new( + offset_to_position(MOCK_GRAMMAR, sep_line_start), + offset_to_position(MOCK_GRAMMAR, sep_line_end), + ), + ); + let labels = hints + .iter() + .map(|hint| match &hint.label { + InlayHintLabel::String(label) => label.as_str(), + InlayHintLabel::LabelParts(_) => "", + }) + .collect::>(); + + assert_eq!(labels, vec![": Vec", "ReduceAction"]); + } + + #[test] + fn hints_reduce_actions_with_custom_tooltip() { + let hints = inlay_hints( + MOCK_GRAMMAR, + Range::new(Position::new(0, 0), Position::new(100, 0)), + ); + + let reduce_action_hints = hints + .iter() + .filter(|hint| match &hint.label { + InlayHintLabel::String(label) => label == "ReduceAction", + _ => false, + }) + .collect::>(); + + assert!(!reduce_action_hints.is_empty()); + for hint in reduce_action_hints { + let tooltip = hint.tooltip.as_ref().unwrap(); + match tooltip { + lsp_types::InlayHintTooltip::MarkupContent(markup) => { + assert!(markup.value.contains( + "A block of Rust code executed when this production rule is reduced" + )); + assert!(markup.value.contains("#reduceaction-optional")); + } + _ => panic!("expected MarkupContent tooltip"), + } + } + } +} diff --git a/rusty_lr_lsp/src/main.rs b/rusty_lr_lsp/src/main.rs new file mode 100644 index 00000000..82a6bccc --- /dev/null +++ b/rusty_lr_lsp/src/main.rs @@ -0,0 +1,473 @@ +use lsp_server::{Connection, Message, Notification, Request, RequestId, Response}; +use lsp_types::{ + notification::{ + DidChangeTextDocument, DidOpenTextDocument, DidSaveTextDocument, PublishDiagnostics, + }, + request::{ + CodeActionRequest, Completion, Formatting, GotoDefinition, HoverRequest, InlayHintRequest, + SemanticTokensFullRequest, + }, + CodeActionKind, CodeActionOptions, CompletionOptions, Diagnostic, DiagnosticSeverity, + GotoDefinitionResponse, Hover, HoverProviderCapability, InlayHint, InlayHintOptions, + InlayHintServerCapabilities, Location, OneOf, PublishDiagnosticsParams, Range, + ServerCapabilities, TextDocumentSyncCapability, TextDocumentSyncKind, Url, +}; +use std::collections::HashMap; +use std::error::Error; +use std::panic::{catch_unwind, set_hook, take_hook, AssertUnwindSafe}; + +// Import the traits providing `METHOD` constant: +use lsp_types::notification::Notification as LspNotification; +use lsp_types::request::Request as LspRequest; + +mod code_action; +mod completion; +mod diagnostics; +mod formatter; +mod goto_definition; +mod hover; +mod inlay_hint; +mod position; +mod references; +mod semantic_tokens; + +fn main() -> Result<(), Box> { + eprintln!("Starting RustyLR LSP server..."); + + // Create stdio transport connection + let (connection, io_threads) = Connection::stdio(); + + // Advertise full document sync and definition provider capabilities + let server_capabilities = serde_json::to_value(&ServerCapabilities { + text_document_sync: Some(TextDocumentSyncCapability::Kind(TextDocumentSyncKind::FULL)), + definition_provider: Some(OneOf::Left(true)), + references_provider: Some(OneOf::Left(true)), + document_formatting_provider: Some(OneOf::Left(true)), + code_action_provider: Some(lsp_types::CodeActionProviderCapability::Options( + CodeActionOptions { + code_action_kinds: Some(vec![CodeActionKind::QUICKFIX]), + resolve_provider: Some(false), + ..Default::default() + }, + )), + hover_provider: Some(HoverProviderCapability::Simple(true)), + inlay_hint_provider: Some(OneOf::Right(InlayHintServerCapabilities::Options( + InlayHintOptions { + resolve_provider: Some(false), + ..Default::default() + }, + ))), + completion_provider: Some(CompletionOptions { + trigger_characters: Some(completion_trigger_characters()), + ..Default::default() + }), + semantic_tokens_provider: Some( + lsp_types::SemanticTokensServerCapabilities::SemanticTokensOptions( + lsp_types::SemanticTokensOptions { + work_done_progress_options: lsp_types::WorkDoneProgressOptions { + work_done_progress: Some(false), + }, + legend: lsp_types::SemanticTokensLegend { + token_types: vec![ + lsp_types::SemanticTokenType::ENUM_MEMBER, // terminal + lsp_types::SemanticTokenType::TYPE, // non-terminal + lsp_types::SemanticTokenType::KEYWORD, // directive + lsp_types::SemanticTokenType::PARAMETER, // binding + lsp_types::SemanticTokenType::VARIABLE, // $var + lsp_types::SemanticTokenType::PROPERTY, // @loc + ], + token_modifiers: vec![], + }, + range: Some(false), + full: Some(lsp_types::SemanticTokensFullOptions::Bool(true)), + }, + ), + ), + ..Default::default() + })?; + + connection.initialize(server_capabilities)?; + + eprintln!("RustyLR LSP server initialized successfully."); + + // Store open document contents + let mut documents: HashMap = HashMap::new(); + let mut semantic_tokens_enabled = true; + + // Main event loop + for msg in &connection.receiver { + match msg { + Message::Request(req) => { + if connection.handle_shutdown(&req)? { + return Ok(()); + } + + if req.method == GotoDefinition::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting goto definition request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position_params.text_document.uri; + let position = params.text_document_position_params.position; + + let mut response = Response::new_ok(id.clone(), serde_json::Value::Null); + if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| { + goto_definition::find_definition(content, position) + }) { + Ok(Some(range)) => { + let loc = Location::new(uri.clone(), range); + response = + Response::new_ok(id, GotoDefinitionResponse::Scalar(loc)); + } + Ok(None) => {} + Err(message) => { + eprintln!("RustyLR goto-definition panicked: {message}"); + } + } + } + connection.sender.send(Message::Response(response))?; + } else if req.method == lsp_types::request::References::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting references request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position.text_document.uri; + let position = params.text_document_position.position; + + let mut response = Response::new_ok(id.clone(), serde_json::Value::Null); + if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| references::find_references(content, position)) { + Ok(Some(locations)) => { + let mapped_locations = locations + .into_iter() + .map(|range| Location::new(uri.clone(), range)) + .collect::>(); + response = Response::new_ok(id, mapped_locations); + } + Ok(None) => { + response = Response::new_ok(id, Vec::::new()); + } + Err(message) => { + eprintln!("RustyLR references panicked: {message}"); + } + } + } + connection.sender.send(Message::Response(response))?; + } else if req.method == Completion::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting completion request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position.text_document.uri; + let position = params.text_document_position.position; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| completion::completions(content, position)) { + Ok(completions) => Response::new_ok(id, completions), + Err(message) => { + eprintln!("RustyLR completion panicked: {message}"); + Response::new_ok( + id, + lsp_types::CompletionResponse::Array(Vec::new()), + ) + } + } + } else { + Response::new_ok(id, lsp_types::CompletionResponse::Array(Vec::new())) + }; + connection.sender.send(Message::Response(response))?; + } else if req.method == HoverRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting hover request: {:?}", e); + continue; + } + }; + + let uri = params.text_document_position_params.text_document.uri; + let position = params.text_document_position_params.position; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| hover::hover(content, position)) { + Ok(hover) => Response::new_ok(id, hover), + Err(message) => { + eprintln!("RustyLR hover panicked: {message}"); + Response::new_ok(id, Option::::None) + } + } + } else { + Response::new_ok(id, Option::::None) + }; + connection.sender.send(Message::Response(response))?; + } else if req.method == InlayHintRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting inlay hint request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let range = params.range; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| inlay_hint::inlay_hints(content, range)) { + Ok(hints) => Response::new_ok(id, Some(hints)), + Err(message) => { + eprintln!("RustyLR inlay hint panicked: {message}"); + Response::new_ok(id, Option::>::None) + } + } + } else { + Response::new_ok(id, Option::>::None) + }; + connection.sender.send(Message::Response(response))?; + } else if req.method == CodeActionRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting code action request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| { + code_action::code_actions(content, uri, params.context.diagnostics) + }) { + Ok(actions) => Response::new_ok(id, Some(actions)), + Err(message) => { + eprintln!("RustyLR code action panicked: {message}"); + Response::new_ok(id, Option::::None) + } + } + } else { + Response::new_ok(id, Option::::None) + }; + connection.sender.send(Message::Response(response))?; + } else if req.method == Formatting::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting formatting request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let response = if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| formatter::formatting(content)) { + Ok(edits) => Response::new_ok(id, Some(edits)), + Err(message) => { + eprintln!("RustyLR formatting panicked: {message}"); + Response::new_ok(id, Option::>::None) + } + } + } else { + Response::new_ok(id, Option::>::None) + }; + connection.sender.send(Message::Response(response))?; + } else if req.method == SemanticTokensFullRequest::METHOD { + let (id, params) = match cast_request::(req) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting semantic tokens request: {:?}", e); + continue; + } + }; + + let uri = params.text_document.uri; + let response = if semantic_tokens_enabled { + if let Some(content) = documents.get(&uri) { + match catch_lsp_panic(|| semantic_tokens::semantic_tokens(content)) { + Ok(Some(tokens)) => Response::new_ok( + id, + Some(lsp_types::SemanticTokensResult::Tokens(tokens)), + ), + Ok(None) => Response::new_ok( + id, + Option::::None, + ), + Err(message) => { + eprintln!("RustyLR semantic tokens panicked: {message}"); + Response::new_ok( + id, + Option::::None, + ) + } + } + } else { + Response::new_ok(id, Option::::None) + } + } else { + Response::new_ok(id, Option::::None) + }; + connection.sender.send(Message::Response(response))?; + } + } + Message::Response(_resp) => {} + Message::Notification(not) => { + if not.method == DidOpenTextDocument::METHOD { + let params = match cast_notification::(not) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting didOpen notification: {:?}", e); + continue; + } + }; + let uri = params.text_document.uri; + let text = params.text_document.text; + + documents.insert(uri.clone(), text.clone()); + publish_diagnostics(&connection, uri, &text); + } else if not.method == DidChangeTextDocument::METHOD { + let params = match cast_notification::(not) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting didChange notification: {:?}", e); + continue; + } + }; + let uri = params.text_document.uri; + + if let Some(change) = params.content_changes.into_iter().next() { + documents.insert(uri.clone(), change.text.clone()); + publish_diagnostics(&connection, uri, &change.text); + } + } else if not.method == DidSaveTextDocument::METHOD { + let params = match cast_notification::(not) { + Ok(res) => res, + Err(e) => { + eprintln!("Error extracting didSave notification: {:?}", e); + continue; + } + }; + let uri = params.text_document.uri; + if let Some(text) = documents.get(&uri) { + publish_diagnostics(&connection, uri, text); + } + } else if not.method == lsp_types::notification::DidChangeConfiguration::METHOD { + let params = match cast_notification::< + lsp_types::notification::DidChangeConfiguration, + >(not) + { + Ok(res) => res, + Err(e) => { + eprintln!( + "Error extracting didChangeConfiguration notification: {:?}", + e + ); + continue; + } + }; + let mut enabled = None; + if let Some(rustylr) = params.settings.get("rustylr") { + if let Some(sem_toks) = rustylr.get("semanticTokens") { + if let Some(val) = sem_toks.get("enabled").and_then(|v| v.as_bool()) { + enabled = Some(val); + } + } + } + if enabled.is_none() { + if let Some(val) = params + .settings + .get("rustylr.semanticTokens.enabled") + .and_then(|v| v.as_bool()) + { + enabled = Some(val); + } + } + if let Some(val) = enabled { + semantic_tokens_enabled = val; + } + } + } + } + } + + io_threads.join()?; + eprintln!("RustyLR LSP server stopped."); + Ok(()) +} + +fn completion_trigger_characters() -> Vec { + "%@$_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + .chars() + .map(|ch| ch.to_string()) + .collect() +} + +fn publish_diagnostics(connection: &Connection, uri: Url, content: &str) { + let diags = match catch_lsp_panic(|| diagnostics::compile_and_get_diagnostics(content)) { + Ok(diags) => diags, + Err(message) => vec![Diagnostic { + range: Range::default(), + severity: Some(DiagnosticSeverity::ERROR), + code: None, + code_description: None, + source: Some("rusty_lr".to_string()), + message: format!("RustyLR compiler panicked: {message}"), + related_information: None, + tags: None, + data: None, + }], + }; + let params = PublishDiagnosticsParams { + uri, + diagnostics: diags, + version: None, + }; + let notification = Notification::new(PublishDiagnostics::METHOD.to_string(), params); + let _ = connection.sender.send(Message::Notification(notification)); +} + +fn catch_lsp_panic(f: impl FnOnce() -> T) -> Result { + let hook = take_hook(); + set_hook(Box::new(|_| {})); + let result = catch_unwind(AssertUnwindSafe(f)).map_err(panic_message); + set_hook(hook); + result +} + +fn panic_message(payload: Box) -> String { + if let Some(message) = payload.downcast_ref::<&str>() { + (*message).to_string() + } else if let Some(message) = payload.downcast_ref::() { + message.clone() + } else { + "unknown panic payload".to_string() + } +} + +fn cast_request( + req: Request, +) -> Result<(RequestId, R::Params), lsp_server::ExtractError> +where + R: lsp_types::request::Request, + R::Params: serde::de::DeserializeOwned, +{ + req.extract(R::METHOD) +} + +fn cast_notification( + not: Notification, +) -> Result> +where + N: lsp_types::notification::Notification, + N::Params: serde::de::DeserializeOwned, +{ + not.extract(N::METHOD) +} diff --git a/rusty_lr_lsp/src/position.rs b/rusty_lr_lsp/src/position.rs new file mode 100644 index 00000000..f07696c2 --- /dev/null +++ b/rusty_lr_lsp/src/position.rs @@ -0,0 +1,101 @@ +use lsp_types::{Position, Range}; + +/// Converts a 0-indexed byte offset in `content` into an LSP `Position` (line, character). +/// The LSP character index represents the UTF-16 code unit offset on that line. +pub fn offset_to_position(content: &str, offset: usize) -> Position { + let mut line = 0; + let mut character = 0; + let mut current_offset = 0; + + for c in content.chars() { + if current_offset >= offset { + break; + } + let char_len = c.len_utf8(); + if current_offset + char_len > offset { + break; + } + current_offset += char_len; + + if c == '\n' { + line += 1; + character = 0; + } else { + character += c.len_utf16() as u32; + } + } + Position::new(line, character) +} + +/// Converts an LSP `Position` (line, character) back into a 0-indexed byte offset in `content`. +pub fn position_to_offset(content: &str, pos: Position) -> usize { + let mut line = 0; + let mut character = 0; + let mut byte_offset = 0; + + for c in content.chars() { + if line == pos.line && character >= pos.character { + break; + } + byte_offset += c.len_utf8(); + + if c == '\n' { + line += 1; + character = 0; + } else { + character += c.len_utf16() as u32; + } + } + byte_offset +} + +/// Converts a `std::ops::Range` byte range into an LSP `Range`. +pub fn range_to_lsp_range(content: &str, range: std::ops::Range) -> Range { + Range::new( + offset_to_position(content, range.start), + offset_to_position(content, range.end), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_position_conversion() { + let content = "hello\nworld\n안녕 하세요\n😀 hello"; + + // Test ASCII character + let pos = offset_to_position(content, 0); + assert_eq!(pos.line, 0); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 0); + + let pos = offset_to_position(content, 6); // 'w' in 'world' + assert_eq!(pos.line, 1); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 6); + + // Test multi-byte UTF-8 character (Korean '안' is 3 bytes in UTF-8, 1 code unit in UTF-16) + let pos = offset_to_position(content, 12); // start of '안' + assert_eq!(pos.line, 2); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 12); + + let pos = offset_to_position(content, 15); // after '안', start of '녕' + assert_eq!(pos.line, 2); + assert_eq!(pos.character, 1); + assert_eq!(position_to_offset(content, pos), 15); + + // Test Emoji (😀 is 4 bytes in UTF-8, 2 code units in UTF-16) + let pos = offset_to_position(content, 29); // start of emoji + assert_eq!(pos.line, 3); + assert_eq!(pos.character, 0); + assert_eq!(position_to_offset(content, pos), 29); + + let pos = offset_to_position(content, 33); // after emoji, before space + assert_eq!(pos.line, 3); + assert_eq!(pos.character, 2); + assert_eq!(position_to_offset(content, pos), 33); + } +} diff --git a/rusty_lr_lsp/src/references.rs b/rusty_lr_lsp/src/references.rs new file mode 100644 index 00000000..d88e6d9f --- /dev/null +++ b/rusty_lr_lsp/src/references.rs @@ -0,0 +1,288 @@ +use lsp_types::{Position, Range}; +use proc_macro2::TokenStream; +use rusty_lr_parser::grammar::Grammar; +use rusty_lr_parser::{ + GrammarArgs, IdentOrLiteral, Located, PatternArgs, PrecDPrecArgs, TerminalSetItem, +}; +use std::str::FromStr; + +use crate::diagnostics::split_stream; +use crate::position::{position_to_offset, range_to_lsp_range}; + +/// Traverses the AST of GrammarArgs to collect only terminal, non-terminal, prec, and error references. +fn collect_references(args: &GrammarArgs) -> Vec> { + let mut collected = Vec::new(); + + // 1. %start names + for start_name in &args.start_rule_name { + collected.push(start_name.clone()); + } + + // 2. %token definitions + for (t_name, _) in &args.terminals { + collected.push(t_name.clone()); + } + + // 3. Precedence definitions + for (_, _, items) in &args.precedences { + for item in items { + if let IdentOrLiteral::Ident(ident) = item { + collected.push(ident.clone()); + } + } + } + + // 4. Rule definitions, pattern idents, and %prec + for rule in &args.rules { + collected.push(rule.name.clone()); + for line in &rule.rule_lines { + // Pattern idents + for (_, pattern) in &line.tokens { + collect_pattern_located(pattern, &mut collected); + } + // %prec identifiers + for prec in &line.precs { + if let PrecDPrecArgs::Prec(IdentOrLiteral::Ident(ident)) = prec { + collected.push(ident.clone()); + } + } + } + } + + collected +} + +/// Recursively traverses a PatternArgs structure to collect Located instances. +fn collect_pattern_located(pattern: &PatternArgs, collected: &mut Vec>) { + match pattern { + PatternArgs::Ident(ident) => { + collected.push(ident.clone()); + } + PatternArgs::Plus { base, .. } + | PatternArgs::Star { base, .. } + | PatternArgs::Question { base, .. } + | PatternArgs::Exclamation { base, .. } => { + collect_pattern_located(base, collected); + } + PatternArgs::TerminalSet(ts) => { + for item in &ts.items { + match item { + TerminalSetItem::Terminal(ident) => { + collected.push(ident.clone()); + } + TerminalSetItem::Range(first, last) => { + collected.push(first.clone()); + collected.push(last.clone()); + } + _ => {} + } + } + } + PatternArgs::Group { alternatives, .. } => { + for alt in alternatives { + for pat in alt { + collect_pattern_located(pat, collected); + } + } + } + PatternArgs::Minus { base, exclude } => { + collect_pattern_located(base, collected); + collect_pattern_located(exclude, collected); + } + PatternArgs::Sep { + base, delimiter, .. + } => { + collect_pattern_located(base, collected); + collect_pattern_located(delimiter, collected); + } + _ => {} + } +} + +/// Finds all references of the terminal or non-terminal symbol under the cursor. +pub fn find_references(content: &str, target_pos: Position) -> Option> { + let offset = position_to_offset(content, target_pos); + + // Parse the entire document into TokenStream + let token_stream = TokenStream::from_str(content).ok()?; + let (_, macro_stream) = split_stream(token_stream).ok()?; + let grammar_args = Grammar::parse_args(macro_stream).ok()?; + let span_manager = grammar_args.span_manager.clone(); + + // Collect all referenceable locations + let all_references = collect_references(&grammar_args); + + // Find the one that contains the click offset + let clicked = all_references.iter().find(|loc| { + if let Some(range) = span_manager.get_byterange(&loc.location()) { + range.contains(&offset) + } else { + false + } + })?; + + let name = clicked.value(); + + // Ensure the symbol is indeed a valid terminal, non-terminal, precedence symbol, or 'error' + let is_terminal = grammar_args.terminals.iter().any(|(t, _)| t.value == *name); + let is_nonterminal = grammar_args.rules.iter().any(|r| r.name.value == *name); + let is_prec_symbol = grammar_args.precedences.iter().any(|(_, _, items)| { + items.iter().any(|item| match item { + IdentOrLiteral::Ident(ident) => ident.value() == name, + _ => false, + }) + }); + let is_error = name == "error"; + + if !is_terminal && !is_nonterminal && !is_prec_symbol && !is_error { + return None; + } + + // Filter and map all matches of the clicked name to LSP Range + let mut result = Vec::new(); + for loc in &all_references { + if loc.value() == name { + if let Some(range) = span_manager.get_byterange(&loc.location()) { + result.push(range_to_lsp_range(content, range)); + } + } + } + + Some(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, +} + +%% + +%tokentype Token; +%start E; + +%token num Token::Num(_); +%token plus Token::Plus; + +E(_) : E plus num { 0 } + | num { 0 } + ; +"#; + + #[test] + fn test_find_references_terminal() { + // Find position of the 'plus' in rule "E plus num" + let index = MOCK_GRAMMAR.find("plus num").unwrap(); + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let refs = find_references(MOCK_GRAMMAR, pos).unwrap(); + + // There should be 2 references: + // 1. "%token plus Token::Plus;" (definition) + // 2. "E plus num" (usage) + assert_eq!(refs.len(), 2); + + // Verify the content at each range + for range in refs { + let start = crate::position::position_to_offset(MOCK_GRAMMAR, range.start); + let end = crate::position::position_to_offset(MOCK_GRAMMAR, range.end); + assert_eq!(&MOCK_GRAMMAR[start..end], "plus"); + } + } + + #[test] + fn test_find_references_nonterminal() { + // Find position of '%start E' + let index = MOCK_GRAMMAR.find("start E").unwrap() + 6; // start of 'E' + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let refs = find_references(MOCK_GRAMMAR, pos).unwrap(); + + // References to E: + // 1. "%start E;" + // 2. "E(_)" (definition) + // 3. "E plus num" (usage) + assert_eq!(refs.len(), 3); + + for range in refs { + let start = crate::position::position_to_offset(MOCK_GRAMMAR, range.start); + let end = crate::position::position_to_offset(MOCK_GRAMMAR, range.end); + assert_eq!(&MOCK_GRAMMAR[start..end], "E"); + } + } + + #[test] + fn test_find_references_prec_and_error() { + let grammar = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Minus, +} + +%% + +%tokentype Token; +%start E; + +%left plus; +%left minus; +%token num Token::Num(_); +%token plus Token::Plus; +%token minus Token::Minus; + +E(_) : E plus E + | minus E %prec minus + | error + ; +"#; + + // 1. Find references to precedence/terminal 'minus' + // Click on '%prec minus' + let index = grammar.find("%prec minus").unwrap() + 6; // start of 'minus' + let pos = crate::position::offset_to_position(grammar, index); + let refs = find_references(grammar, pos).unwrap(); + + // References to 'minus': + // - "%left minus;" + // - "%token minus Token::Minus;" + // - "minus E" (rule pattern) + // - "%prec minus" (precedence override) + assert_eq!(refs.len(), 4); + for range in refs { + let start = crate::position::position_to_offset(grammar, range.start); + let end = crate::position::position_to_offset(grammar, range.end); + assert_eq!(&grammar[start..end], "minus"); + } + + // 2. Find references to 'error' + let index = grammar.find("error").unwrap(); + let pos = crate::position::offset_to_position(grammar, index); + let refs = find_references(grammar, pos).unwrap(); + + assert_eq!(refs.len(), 1); + let range = refs[0]; + let start = crate::position::position_to_offset(grammar, range.start); + let end = crate::position::position_to_offset(grammar, range.end); + assert_eq!(&grammar[start..end], "error"); + } + + #[test] + fn test_find_references_no_action_leak() { + // The mock grammar has `{ 0 }` inside the reduce action. + // If we search inside the reduce action, it shouldn't match anything. + // We verify that clicking inside `{ 0 }` returns None. + let index = MOCK_GRAMMAR.find("{ 0 }").unwrap() + 2; // points to '0' + let pos = crate::position::offset_to_position(MOCK_GRAMMAR, index); + + let refs = find_references(MOCK_GRAMMAR, pos); + assert!(refs.is_none()); + } +} diff --git a/rusty_lr_lsp/src/semantic_tokens.rs b/rusty_lr_lsp/src/semantic_tokens.rs new file mode 100644 index 00000000..7a159bf4 --- /dev/null +++ b/rusty_lr_lsp/src/semantic_tokens.rs @@ -0,0 +1,507 @@ +use crate::position::offset_to_position; +use lsp_types::{SemanticToken, SemanticTokens}; +use proc_macro2::{TokenStream, TokenTree}; +use std::collections::HashSet; +use std::str::FromStr; + +#[derive(Debug, Clone, PartialEq, Eq)] +struct RawSemanticToken { + line: u32, + start: u32, + length: u32, + token_type: u32, +} + +/// Main entry point for semantic tokens: takes file content and returns encoded SemanticTokens. +pub fn semantic_tokens(content: &str) -> Option { + let grammar_start = find_grammar_start_offset(content).unwrap_or(0); + let grammar_section = &content[grammar_start..]; + + let token_stream = TokenStream::from_str(grammar_section).ok()?; + let tokens: Vec = token_stream.into_iter().collect(); + + // 1. Collect terminal and non-terminal names + let (mut terminals, mut non_terminals) = collect_names(&tokens); + + // Also attempt to get names from completion module's parsed GrammarArgs if possible + if let Ok(args) = crate::completion::parse_args(content) { + for (term, _) in args.terminals { + terminals.insert(term.value().clone()); + } + for rule in args.rules { + non_terminals.insert(rule.name.value().clone()); + } + for (_, _, items) in args.precedences { + for item in items { + terminals.insert(item.to_string()); + } + } + } + + terminals.insert("error".to_string()); + + // 2. Traverse tokens to build RawSemanticToken list + let mut raw_tokens = Vec::new(); + traverse_tokens( + tokens, + content, + grammar_start, + false, + &terminals, + &non_terminals, + &mut raw_tokens, + ); + + // 3. Sort tokens: by line, then by start character + raw_tokens.sort_by(|a, b| { + if a.line != b.line { + a.line.cmp(&b.line) + } else { + a.start.cmp(&b.start) + } + }); + + // 4. Delta-encode the sorted tokens + let mut data = Vec::new(); + let mut last_line = 0; + let mut last_start = 0; + + for token in raw_tokens { + let delta_line = token.line - last_line; + let delta_start = if delta_line == 0 { + token.start - last_start + } else { + token.start + }; + + data.push(SemanticToken { + delta_line, + delta_start, + length: token.length, + token_type: token.token_type, + token_modifiers_bitset: 0, + }); + + last_line = token.line; + last_start = token.start; + } + + Some(SemanticTokens { + result_id: None, + data, + }) +} + +/// Find the end of the `%%` separator, which marks the start of the grammar section. +fn find_grammar_start_offset(content: &str) -> Option { + let token_stream = TokenStream::from_str(content).ok()?; + let mut iter = token_stream.into_iter().peekable(); + while let Some(token) = iter.next() { + if let TokenTree::Punct(punct) = &token { + if punct.as_char() == '%' && punct.spacing() == proc_macro2::Spacing::Joint { + if let Some(TokenTree::Punct(next)) = iter.peek() { + if next.as_char() == '%' && next.spacing() == proc_macro2::Spacing::Alone { + return Some(next.span().byte_range().end); + } + } + } + } + } + None +} + +/// Helper to scan top-level tokens in the grammar section to extract terminal/non-terminal declarations. +fn collect_names(tokens: &[TokenTree]) -> (HashSet, HashSet) { + let mut terminals = HashSet::new(); + let mut non_terminals = HashSet::new(); + let mut iter = tokens.iter().peekable(); + + while let Some(token) = iter.next() { + match token { + TokenTree::Punct(punct) if punct.as_char() == '%' => { + if let Some(TokenTree::Ident(ident)) = iter.peek() { + let directive = ident.to_string(); + if directive == "token" { + iter.next(); // consume "token" + if let Some(TokenTree::Ident(term_name)) = iter.peek() { + terminals.insert(term_name.to_string()); + } + } else if directive == "left" + || directive == "right" + || directive == "precedence" + { + iter.next(); // consume directive keyword + while let Some(next_token) = iter.peek() { + match next_token { + TokenTree::Punct(p) if p.as_char() == ';' => { + iter.next(); + break; + } + TokenTree::Ident(id) => { + terminals.insert(id.to_string()); + iter.next(); + } + TokenTree::Literal(lit) => { + terminals.insert(lit.to_string()); + iter.next(); + } + TokenTree::Punct(_) => { + iter.next(); + } + _ => { + iter.next(); + } + } + } + } + } + } + TokenTree::Ident(ident) => { + // Rule definition: Ident [type] : ... + let mut temp_iter = iter.clone(); + let mut is_rule = false; + if let Some(next) = temp_iter.peek() { + if let TokenTree::Group(group) = next { + if group.delimiter() == proc_macro2::Delimiter::Parenthesis { + temp_iter.next(); + } + } + } + if let Some(TokenTree::Punct(punct)) = temp_iter.peek() { + if punct.as_char() == ':' { + is_rule = true; + } + } + if is_rule { + non_terminals.insert(ident.to_string()); + } + } + _ => {} + } + } + + (terminals, non_terminals) +} + +/// Recursive traversal of the token stream. +fn traverse_tokens( + tokens: Vec, + full_content: &str, + section_offset: usize, + in_action: bool, + terminals: &HashSet, + non_terminals: &HashSet, + raw_tokens: &mut Vec, +) { + let mut iter = tokens.into_iter().peekable(); + + while let Some(token) = iter.next() { + match token { + TokenTree::Group(group) => { + let delimiter = group.delimiter(); + let sub_in_action = in_action || delimiter == proc_macro2::Delimiter::Brace; + let sub_tokens: Vec = group.stream().into_iter().collect(); + traverse_tokens( + sub_tokens, + full_content, + section_offset, + sub_in_action, + terminals, + non_terminals, + raw_tokens, + ); + } + TokenTree::Punct(punct) => { + let ch = punct.as_char(); + let span = punct.span(); + let range = span.byte_range(); + let absolute_start = section_offset + range.start; + + if ch == '%' { + if !in_action { + if let Some(TokenTree::Ident(next_ident)) = iter.peek() { + let next_range = next_ident.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 2, // directive + }); + iter.next(); + continue; + } + } + } else if ch == '$' { + let mut handled = false; + if let Some(next) = iter.peek() { + match next { + TokenTree::Ident(next_ident) => { + let next_range = next_ident.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + iter.next(); + handled = true; + } + TokenTree::Literal(next_lit) => { + let next_str = next_lit.to_string(); + if next_str.chars().all(|c| c.is_ascii_digit()) { + let next_range = next_lit.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = + offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + iter.next(); + handled = true; + } + } + TokenTree::Punct(next_punct) if next_punct.as_char() == '$' => { + let next_range = next_punct.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + iter.next(); + handled = true; + } + _ => {} + } + } + if !handled { + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, section_offset + range.end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 4, // $var + }); + } + } else if ch == '@' { + let mut handled = false; + if let Some(next) = iter.peek() { + match next { + TokenTree::Ident(next_ident) => { + let next_range = next_ident.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + iter.next(); + handled = true; + } + TokenTree::Literal(next_lit) => { + let next_str = next_lit.to_string(); + if next_str.chars().all(|c| c.is_ascii_digit()) { + let next_range = next_lit.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = + offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + iter.next(); + handled = true; + } + } + TokenTree::Punct(next_punct) if next_punct.as_char() == '$' => { + let next_range = next_punct.span().byte_range(); + let absolute_end = section_offset + next_range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + iter.next(); + handled = true; + } + _ => {} + } + } + if !handled { + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, section_offset + range.end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 5, // @loc + }); + } + } + } + TokenTree::Ident(ident) => { + let name = ident.to_string(); + if in_action { + if name == "data" || name == "lookahead" || name == "shift" { + let span = ident.span(); + let range = span.byte_range(); + let absolute_start = section_offset + range.start; + let absolute_end = section_offset + range.end; + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: 3, // binding / parameter + }); + } + continue; + } + + let span = ident.span(); + let range = span.byte_range(); + let absolute_start = section_offset + range.start; + let absolute_end = section_offset + range.end; + let name = ident.to_string(); + + let mut is_binding = false; + if let Some(TokenTree::Punct(next_punct)) = iter.peek() { + if next_punct.as_char() == '=' { + is_binding = true; + } + } + + let token_type = if is_binding { + Some(3) // binding + } else if terminals.contains(&name) { + Some(0) // terminal + } else if non_terminals.contains(&name) { + Some(1) // non-terminal + } else { + None + }; + + if let Some(tt) = token_type { + let start_pos = offset_to_position(full_content, absolute_start); + let end_pos = offset_to_position(full_content, absolute_end); + raw_tokens.push(RawSemanticToken { + line: start_pos.line, + start: start_pos.character, + length: end_pos.character - start_pos.character, + token_type: tt, + }); + } + } + _ => {} + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const MOCK_GRAMMAR: &str = r#" +#[derive(Debug, Clone)] +pub enum Token { + Num(i32), + Plus, + Comma, +} + +%% + +%tokentype Token; +%start List; + +%left plus minus; + +%token num Token::Num(_); +%token plus Token::Plus; +%token comma Token::Comma; + +E(i32) : left=E plus num { $1 + $3 } + | error { *data += 1; 0 } + | num %prec minus { 0 } + ; +List(Vec) : $sep(E, comma, +) { E }; +"#; + + #[test] + fn test_semantic_tokens() { + let tokens_res = semantic_tokens(MOCK_GRAMMAR).expect("Failed to parse semantic tokens"); + let data_res = tokens_res.data; + assert!(!data_res.is_empty()); + + // Decode delta-encoded tokens and map back to substrings + let mut decoded = Vec::new(); + let mut current_line = 0; + let mut current_char = 0; + for token in &data_res { + current_line += token.delta_line; + if token.delta_line == 0 { + current_char += token.delta_start; + } else { + current_char = token.delta_start; + } + + // Find substring in MOCK_GRAMMAR + let pos = lsp_types::Position::new(current_line, current_char); + let start_offset = crate::position::position_to_offset(MOCK_GRAMMAR, pos); + let end_offset = start_offset + token.length as usize; + let text = &MOCK_GRAMMAR[start_offset..end_offset]; + decoded.push((text.to_string(), token.token_type)); + } + + // Directives (type 2) + assert!(decoded.contains(&("%tokentype".to_string(), 2))); + assert!(decoded.contains(&("%start".to_string(), 2))); + assert!(decoded.contains(&("%token".to_string(), 2))); + assert!(decoded.contains(&("%left".to_string(), 2))); + assert!(decoded.contains(&("%prec".to_string(), 2))); + + // Terminals (type 0) + assert!(decoded.contains(&("num".to_string(), 0))); + assert!(decoded.contains(&("plus".to_string(), 0))); + assert!(decoded.contains(&("comma".to_string(), 0))); + assert!(decoded.contains(&("minus".to_string(), 0))); // precedence-only symbol highlighted as terminal + assert!(decoded.contains(&("error".to_string(), 0))); // reserved terminal + + // Non-terminals (type 1) + assert!(decoded.contains(&("E".to_string(), 1))); + assert!(decoded.contains(&("List".to_string(), 1))); + + // Bindings / parameters (type 3) + assert!(decoded.contains(&("left".to_string(), 3))); + assert!(decoded.contains(&("data".to_string(), 3))); // reserved reduce parameter + + // $vars (type 4) + assert!(decoded.contains(&("$1".to_string(), 4))); + assert!(decoded.contains(&("$3".to_string(), 4))); + assert!(decoded.contains(&("$sep".to_string(), 4))); + } +} diff --git a/rusty_lr_parser/src/grammar.rs b/rusty_lr_parser/src/grammar.rs index e2feeb05..e64b7266 100644 --- a/rusty_lr_parser/src/grammar.rs +++ b/rusty_lr_parser/src/grammar.rs @@ -154,6 +154,26 @@ pub enum ResolvedAllowTarget { } impl Grammar { + /// Resolved Rust type for `%tokentype`, after substitutions such as `$tokentype` + /// and storage modifiers such as `box` have been stripped. + pub fn token_type(&self) -> &TokenStream { + &self.token_typename + } + + /// Whether terminal values are stored as `Box<%tokentype>` in the generated + /// parser's data enum. + pub fn token_type_boxed(&self) -> bool { + self.is_tokentype_boxed + } + + /// Resolved Rust type for a non-terminal by name, plus whether it is boxed + /// in the generated parser's data enum. + pub fn nonterminal_type(&self, name: &str) -> Option<(Option<&TokenStream>, bool)> { + let index = self.nonterminals_index.get(name)?; + let nonterminal = &self.nonterminals[*index]; + Some((nonterminal.ruletype.as_ref(), nonterminal.ruletype_boxed)) + } + fn is_terminal_allowed_by_target(&self, term: Terminal, target: &ResolvedAllowTarget) -> bool { match target { ResolvedAllowTarget::Name(name) => { diff --git a/rusty_lr_parser/src/lib.rs b/rusty_lr_parser/src/lib.rs index 68dcaeff..ad6e83a8 100644 --- a/rusty_lr_parser/src/lib.rs +++ b/rusty_lr_parser/src/lib.rs @@ -16,9 +16,11 @@ pub mod terminal_info; pub(crate) mod terminalset; pub mod utils; -pub use parser::args::TableLayout; -/// Re-export Location for use by external crates (e.g. rusty_lr_buildscript) -pub use parser::location::Location; +pub use parser::args::{ + GrammarArgs, IdentOrLiteral, PatternArgs, PrecDPrecArgs, RuleDefArgs, RuleLineArgs, TableLayout, +}; +pub use parser::location::{Located, Location}; +pub use terminalset::{TerminalSet, TerminalSetItem}; /// This, `rusty_lr_parser` is designed to generate a code, that will be relied on `rusty_lr`. ///