Skip to content
3 changes: 0 additions & 3 deletions src/generators/metadata/constants.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,5 @@ export const DOC_API_HEADING_TYPES = [
},
];

// This regex is used to match basic TypeScript generic types (e.g., Promise<string>)
export const TYPE_GENERIC_REGEX = /^([^<]+)<([^>]+)>$/;

// This is the base URL of the Man7 documentation
export const DOC_MAN_BASE_URL = 'http://man7.org/linux/man-pages/man';
23 changes: 23 additions & 0 deletions src/generators/metadata/utils/__tests__/transformers.test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,27 @@ describe('transformTypeToReferenceLink', () => {
'[`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)&gt; & [`<Array>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)&gt;'
);
});

it('should transform a function returning a Generic type', () => {
strictEqual(
transformTypeToReferenceLink('(err: Error) => Promise<boolean>', {}),
'(err: Error) =&gt; [`<Promise>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)&lt;[`<boolean>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)&gt;'
);
});

it('should respect precedence: Unions (|) are weaker than Intersections (&)', () => {
strictEqual(
transformTypeToReferenceLink('string | number & boolean', {}),
'[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [`<boolean>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#boolean_type)'
);
});

it('should handle extreme nested combinations of functions, generics, unions, and intersections', () => {
const input =
'(str: MyType) => Promise<Map<string, number & string>, Map<string | number>>';
const expected =
'(str: MyType) =&gt; [`<Promise>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)&lt;[`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type), [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type) & [`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type)&gt;, [`<Map>`](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Map)&lt;[`<string>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#string_type) | [`<number>`](https://developer.mozilla.org/docs/Web/JavaScript/Data_structures#number_type)&gt;&gt;';
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

MyType should be handled, no?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, Parsing individual types inside the parameter string (handling colons, commas, optional params ?, etc.) requires a much deeper level of AST-like parsing and I focused on generics and left the parameter string for now as I will open another PR soon to handle this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wanted to drop a quick update: regarding your question about myType not being parsed, I went ahead and implemented the function signature parsing.


strictEqual(transformTypeToReferenceLink(input, {}), expected);
});
});
170 changes: 94 additions & 76 deletions src/generators/metadata/utils/transformers.mjs
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
import {
DOC_MAN_BASE_URL,
DOC_API_HEADING_TYPES,
TYPE_GENERIC_REGEX,
} from '../constants.mjs';
import { DOC_MAN_BASE_URL, DOC_API_HEADING_TYPES } from '../constants.mjs';
import { slug } from './slugger.mjs';
import { transformNodesToString } from '../../../utils/unist.mjs';
import BUILTIN_TYPE_MAP from '../maps/builtin.json' with { type: 'json' };
Expand All @@ -22,84 +18,130 @@ export const transformUnixManualToLink = (
) => {
return `[\`${text}\`](${DOC_MAN_BASE_URL}${sectionNumber}/${command}.${sectionNumber}${sectionLetter}.html)`;
};

/**
* Safely splits the string by `|` or `&` at the top level (ignoring those
* inside `< >`), and returns both the pieces and the separator used.
* Safely splits a string by a given set of separators at depth 0 (ignoring those inside < > or ( )).
*
* @param {string} str The type string to split
* @returns {{ pieces: string[], separator: string }} The split pieces and the separator string used to join them (` | ` or ` & `)
* @param {string} str The string to split
* @param {string} separator The separator to split by (e.g., '|', '&', ',', '=>')
* @returns {string[]} The split pieces
*/
const splitByOuterSeparator = str => {
Comment thread
cursor[bot] marked this conversation as resolved.
const splitByOuterSeparator = (str, separator) => {
const pieces = [];
let current = '';
let depth = 0;
let separator;

for (const char of str) {
if (char === '<') {
for (let i = 0; i < str.length; i++) {
const char = str[i];

// Track depth using brackets and parentheses
if (char === '<' || char === '(') {
depth++;
} else if (char === '>') {
} else if ((char === '>' && str[i - 1] !== '=') || char === ')') {
depth--;
} else if ((char === '|' || char === '&') && depth === 0) {
pieces.push(current);
}
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated

// Check for multi-character separators like '=>'
const isArrow = separator === '=>' && char === '=' && str[i + 1] === '>';
// Check for single-character separators
const isCharSeparator = separator === char;

if (depth === 0 && (isCharSeparator || isArrow)) {
pieces.push(current.trim());
current = '';
separator ??= ` ${char} `;
if (isArrow) {
i++;
} // skip the '>' part of '=>'
continue;
}

current += char;
}

pieces.push(current);
return { pieces, separator };
pieces.push(current.trim());
return pieces;
};

/**
* Attempts to parse and format a basic Generic type (e.g., Promise<string>).
* It also supports union and multi-parameter types within the generic brackets.
*
* @param {string} typePiece The plain type piece to be evaluated
* Recursively parses advanced TypeScript types, including Unions, Intersections, Functions, and Nested Generics.
* * @param {string} typeString The plain type string to evaluate
* @param {Function} transformType The function used to resolve individual types into links
* @returns {string|null} The formatted Markdown link, or null if no match is found
* @returns {string|null} The formatted Markdown link(s), or null if the base type doesn't map
*/
const formatBasicGeneric = (typePiece, transformType) => {
const genericMatch = typePiece.match(TYPE_GENERIC_REGEX);
const parseAdvancedType = (typeString, transformType) => {
const trimmed = typeString.trim();
if (!trimmed) {
return null;
}

// Handle Unions (|)
if (trimmed.includes('|')) {
const parts = splitByOuterSeparator(trimmed, '|');
if (parts.length > 1) {
// Re-evaluate each part recursively and join with ' | '
const resolvedParts = parts.map(
p => parseAdvancedType(p, transformType) || `\`<${p}>\``
);
return resolvedParts.join(' | ');
Comment thread
moshams272 marked this conversation as resolved.
Outdated
}
}

// Handle Intersections (&)
if (trimmed.includes('&')) {
const parts = splitByOuterSeparator(trimmed, '&');
if (parts.length > 1) {
// Re-evaluate each part recursively and join with ' & '
const resolvedParts = parts.map(
p => parseAdvancedType(p, transformType) || `\`<${p}>\``
);
return resolvedParts.join(' & ');
}
}

// Handle Functions (=>)
if (trimmed.includes('=>')) {
const parts = splitByOuterSeparator(trimmed, '=>');
if (parts.length === 2) {
const params = parts[0];
const returnType = parts[1];

// Preserve the function signature, just link the return type for now
// (Mapping param types inside the signature string is complex and often unnecessary for simple docs)
const parsedReturn =
parseAdvancedType(returnType, transformType) || `\`<${returnType}>\``;
return `${params} =&gt; ${parsedReturn}`;
}
}

if (genericMatch) {
const baseType = genericMatch[1].trim();
const innerType = genericMatch[2].trim();
// 3. Handle Generics (Base<Inner, Inner>)
if (trimmed.includes('<') && trimmed.endsWith('>')) {
const firstBracketIndex = trimmed.indexOf('<');
const baseType = trimmed.slice(0, firstBracketIndex).trim();
const innerType = trimmed.slice(firstBracketIndex + 1, -1).trim();

const baseResult = transformType(baseType.replace(/\[\]$/, ''));
const baseFormatted = baseResult
? `[\`<${baseType}>\`](${baseResult})`
: `\`<${baseType}>\``;

// Split while capturing delimiters (| or ,) to preserve original syntax
const parts = innerType.split(/([|,])/);

const innerFormatted = parts
.map(part => {
const trimmed = part.trim();
// If it is a delimiter, return it as is
if (trimmed === '|') {
return ' | ';
}

if (trimmed === ',') {
return ', ';
}

const innerRes = transformType(trimmed.replace(/\[\]$/, ''));
return innerRes
? `[\`<${trimmed}>\`](${innerRes})`
: `\`<${trimmed}>\``;
})
.join('');
// Split arguments safely by comma
const innerArgs = splitByOuterSeparator(innerType, ',');
const innerFormatted = innerArgs
.map(arg => parseAdvancedType(arg, transformType) || `\`<${arg}>\``)
.join(', ');

return `${baseFormatted}&lt;${innerFormatted}&gt;`;
}

// Base Case: Plain Type (e.g., string, Buffer, Function)
const result = transformType(trimmed.replace(/\[\]$/, ''));
if (trimmed.length && result) {
return `[\`<${trimmed}>\`](${result})`;
}

return null;
};

/**
* This method replaces plain text Types within the Markdown content into Markdown links
* that link to the actual relevant reference for such type (either internal or external link)
Expand Down Expand Up @@ -150,32 +192,8 @@ export const transformTypeToReferenceLink = (type, record) => {
return '';
};

const { pieces: outerPieces, separator } = splitByOuterSeparator(typeInput);

const typePieces = outerPieces.map(piece => {
// This is the content to render as the text of the Markdown link
const trimmedPiece = piece.trim();

// 1. Attempt to format as a basic Generic type first
const genericMarkdown = formatBasicGeneric(trimmedPiece, transformType);
if (genericMarkdown) {
return genericMarkdown;
}

// 2. Fallback to the logic for plain types
// This is what we will compare against the API types mappings
// The ReGeX below is used to remove `[]` from the end of the type
const result = transformType(trimmedPiece.replace(/\[\]$/, ''));

// If we have a valid result and the piece is not empty, we return the Markdown link
if (trimmedPiece.length && result.length) {
return `[\`<${trimmedPiece}>\`](${result})`;
}
});

// Filter out pieces that we failed to map and then join the valid ones
// using the same separator that appeared in the original type string
const markdownLinks = typePieces.filter(Boolean).join(separator);
// Kick off the recursive parser on the cleaned input
const markdownLinks = parseAdvancedType(typeInput, transformType);
Comment thread
moshams272 marked this conversation as resolved.
Outdated

// Return the replaced links or the original content if they all failed to be replaced
// Note that if some failed to get replaced, only the valid ones will be returned
Expand Down
Loading