Skip to content

Commit d45f55d

Browse files
authored
llms-txt - do not process external links (#14183)
1 parent cb7f370 commit d45f55d

3 files changed

Lines changed: 13 additions & 6 deletions

File tree

src/resources/filters/llms/llms.lua

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ function Link(link)
190190
return link.content
191191
end
192192

193-
if link.target and (link.target:match("%.html$") or link.target:match("%.html#")) then
193+
local is_absolute = link.target:match("^%a[%w+%-%.]*:") or link.target:match("^//")
194+
if link.target and not is_absolute and (link.target:match("%.html$") or link.target:match("%.html#")) then
194195
link.target = link.target:gsub("%.html#", ".llms.md#")
195196
link.target = link.target:gsub("%.html$", ".llms.md")
196197
link.target = link.target:gsub("^%./", "")

tests/docs/smoke-all/website/llms-txt/about.qmd

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ _quarto:
66
ensureLlmsMdExists: true
77
ensureLlmsMdRegexMatches:
88
# First array: patterns that MUST match
9-
- ["^# About", "> \\*\\*NOTE:\\*\\*", "> \\*\\*WARNING:\\*\\*", "This is a note", "``` python", "def hello", "\\| Feature", "\\|[-]+\\|", "\\[home page\\]\\(.*\\.llms\\.md\\)", "\\[test site intro\\]\\(index\\.llms\\.md#test-content\\)", "## Alpha Tab", "Alpha content here", "## Beta Tab", "Beta content here"]
10-
# Second array: patterns that must NOT match (no .html links, no breadcrumbs, no empty tab links)
11-
- ["\\.html\\)", "\\.html#", "\\[Info\\]", "\\[Alpha Tab\\]\\(\\)", "\\[Beta Tab\\]\\(\\)"]
9+
- ["^# About", "> \\*\\*NOTE:\\*\\*", "> \\*\\*WARNING:\\*\\*", "This is a note", "``` python", "def hello", "\\| Feature", "\\|[-]+\\|", "\\[home page\\]\\(.*\\.llms\\.md\\)", "\\[test site intro\\]\\(index\\.llms\\.md#test-content\\)", "## Alpha Tab", "Alpha content here", "## Beta Tab", "Beta content here", "https://pandoc.org/lua-filters.html", "https://www.lua.org/manual/5.3/manual.html#6.4"]
10+
# Second array: patterns that must NOT match (no internal .html links, no breadcrumbs, no empty tab links)
11+
- ["\\(index\\.html\\)", "\\(index\\.html#", "\\[Info\\]", "\\[Alpha Tab\\]\\(\\)", "\\[Beta Tab\\]\\(\\)"]
1212
---
1313

1414
About this test site.
@@ -58,3 +58,9 @@ Beta content here.
5858
Go back to the [home page](index.qmd).
5959

6060
Go to the [test site intro](index.qmd#test-content).
61+
62+
## External Link Example
63+
64+
Check out [Pandoc filters](https://pandoc.org/lua-filters.html) for more info.
65+
66+
See the [Lua manual](https://www.lua.org/manual/5.3/manual.html#6.4) for string functions.

tests/docs/smoke-all/website/llms-txt/index.qmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ _quarto:
1414
ensureLlmsMdRegexMatches:
1515
# First array: patterns that MUST match - verify anchor links, code annotations, and conditional content
1616
- ["\\[callout examples\\]\\(about\\.llms\\.md#callout-examples\\)", "# <1>", "# <2>", "Load tidyverse", "Open help for ggplot", "only for LLM consumption"]
17-
# Second array: patterns that must NOT match (no .html links, no annotation UI, no hidden content)
18-
- ["\\.html\\)", "\\.html#", "code-annotation-anchor", "should not appear in LLM output"]
17+
# Second array: patterns that must NOT match (no internal .html links, no annotation UI, no hidden content)
18+
- ["\\(about\\.html\\)", "\\(about\\.html#", "code-annotation-anchor", "should not appear in LLM output"]
1919
---
2020

2121
## Test Content

0 commit comments

Comments
 (0)