Skip to content

Commit f5ccd9f

Browse files
cscheidclaude
andcommitted
Fix llms-txt breadcrumb leaking and link prefix
- Remove breadcrumbs from extracted HTML before Pandoc conversion, since Pandoc strips <nav> wrappers and loses the droppable class - Strip ./ prefix from converted .llms.md links for cleaner output - Fix test regex to match code annotation markers with space Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 312ac69 commit f5ccd9f

3 files changed

Lines changed: 3 additions & 1 deletion

File tree

src/project/types/website/website-llms.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ function extractMainContent(doc: Document): string {
131131
".sidebar",
132132
".quarto-search",
133133
"nav.navbar",
134+
".quarto-page-breadcrumbs",
134135
"script",
135136
"style",
136137
"link[rel='stylesheet']",

src/resources/filters/llms/llms.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ function Link(link)
193193
if link.target and (link.target:match("%.html$") or link.target:match("%.html#")) then
194194
link.target = link.target:gsub("%.html#", ".llms.md#")
195195
link.target = link.target:gsub("%.html$", ".llms.md")
196+
link.target = link.target:gsub("^%./", "")
196197
if link.classes:includes("btn") then
197198
link.attr = pandoc.Attr()
198199
end

tests/docs/smoke-all/website/llms-txt/index.qmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ _quarto:
1313
- []
1414
ensureLlmsMdRegexMatches:
1515
# First array: patterns that MUST match - verify anchor links, code annotations, and conditional content
16-
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "#<1>", "#<2>", "Load tidyverse", "Open help for ggplot", "only for LLM consumption"]
16+
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "# <1>", "# <2>", "Load tidyverse", "Open help for ggplot", "only for LLM consumption"]
1717
# Second array: patterns that must NOT match (no .html links, no annotation UI, no hidden content)
1818
- ["\\.html\\)", "\\.html#", "code-annotation-anchor", "should not appear in LLM output"]
1919
---

0 commit comments

Comments
 (0)