Skip to content

Commit f3b5710

Browse files
author
Hannia Valera
committed
feat: implement onboarding tools with GitHub API integration and enhance contributor experience
1 parent b4efe78 commit f3b5710

11 files changed

Lines changed: 442 additions & 4 deletions

File tree

.vscode/mcp.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"cmake-tools-onboarding": {
44
"type": "stdio",
55
"command": "node",
6-
"args": ["${workspaceFolder}/tools/onboarding-mcp/dist/index.js"]
6+
"args": ["${workspaceFolder}/tools/onboarding-mcp/dist/index.js"],
7+
"env": {
8+
"GITHUB_TOKEN": "${env:GITHUB_TOKEN}"
9+
}
710
}
811
}
912
}

tools/onboarding-mcp/.eslintignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
dist/
2+
node_modules/

tools/onboarding-mcp/.eslintrc.cjs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/** @type {import("eslint").Linter.Config} */
2+
module.exports = {
3+
root: true,
4+
parser: "@typescript-eslint/parser",
5+
parserOptions: {
6+
project: "./tsconfig.json",
7+
sourceType: "module"
8+
},
9+
plugins: ["@typescript-eslint"],
10+
extends: [
11+
"eslint:recommended",
12+
"plugin:@typescript-eslint/recommended"
13+
],
14+
rules: {
15+
// Keep it simple — this is a small standalone tool
16+
"@typescript-eslint/no-unused-vars": ["warn", { argsIgnorePattern: "^_" }]
17+
}
18+
};

tools/onboarding-mcp/README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,17 @@ Add the following to your **`.vscode/mcp.json`** at the workspace root:
2020
"cmake-tools-onboarding": {
2121
"type": "stdio",
2222
"command": "node",
23-
"args": ["${workspaceFolder}/tools/onboarding-mcp/dist/index.js"]
23+
"args": ["${workspaceFolder}/tools/onboarding-mcp/dist/index.js"],
24+
"env": {
25+
"GITHUB_TOKEN": "${env:GITHUB_TOKEN}"
26+
}
2427
}
2528
}
2629
}
2730
```
2831

32+
The `GITHUB_TOKEN` env var is **optional** but recommended. Without it, GitHub API calls (used by Phase 3 tools) are limited to 60 requests/hour. With a token, the limit is 5,000/hour. You can create a personal access token at https://github.com/settings/tokens — no scopes are needed for public repos.
33+
2934
Once configured, the MCP server is available to Copilot agent mode (and any other MCP client) in VS Code.
3035

3136
## Development
@@ -57,3 +62,12 @@ npm start
5762
| **`explain_concept`** | `{ "concept": "..." }` | Explains a CMake Tools concept (e.g. `kit`, `preset`, `driver`, `ctest`, `build`, `configure`, `debug`, `settings`). Returns a summary, detailed explanation, related concepts, relevant source files, and a link to the docs page. If the concept is unknown, lists all known concepts. |
5863
| **`find_source_file`** | `{ "feature": "..." }` | Given a natural-language description (e.g. `"kit scanning"`, `"build logic"`, `"test runner"`), returns matching source files with GitHub links, descriptions, and relevance notes. Useful for quickly navigating to the right file. |
5964
| **`get_docs_page`** | `{ "topic": "..." }` | Given a topic (e.g. `presets`, `kits`, `debugging`, `troubleshooting`, `faq`), returns the matching documentation page with file path, GitHub URL, summary, and key section headings. |
65+
66+
### Phase 3 — Live GitHub Data
67+
68+
These tools make real GitHub API calls. They work without authentication (60 req/hr) but setting `GITHUB_TOKEN` raises the limit to 5,000 req/hr (see [Wire it up in VS Code](#wire-it-up-in-vs-code) above).
69+
70+
| Tool | Input | Description |
71+
| --- | --- | --- |
72+
| **`get_contributor_issues`** | `{ "limit?": 20, "label?": "..." }` | Fetches recently updated open issues and enriches each with contributor-friendliness signals (`hasGoodFirstIssueLabel`, `isRecentlyUpdated`, `hasLowCommentCount`, `hasNoBugLabel`). Optionally filter by label (e.g. `"bug"`, `"enhancement"`, `"good first issue"`, `"help wanted"`). Intentionally does **not** default to `"good first issue"` filtering because that label is sparsely used — Copilot should reason over the full issue list instead. |
73+
| **`get_recent_changes`** | `{ "limit?": 10 }` | Fetches the most recent commits to main and annotates each with `affectedAreas` (derived from commit message keywords matched against the codebase source map). Includes a summary of the most active areas so new contributors can see what's currently in flux. |

tools/onboarding-mcp/package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
{
22
"name": "cmake-tools-onboarding-mcp",
3-
"version": "0.2.0",
3+
"version": "1.0.0",
44
"private": true,
55
"description": "MCP server to help new contributors onboard to microsoft/vscode-cmake-tools",
66
"type": "module",
7+
"engines": {
8+
"node": ">=18"
9+
},
710
"scripts": {
811
"build": "tsc",
912
"dev": "tsx src/index.ts",
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
export interface RepoLabel {
2+
name: string;
3+
description: string;
4+
/** If true, issues with this label are generally not code changes — skip for newcomers. */
5+
skipForContributors?: boolean;
6+
}
7+
8+
export const knownLabels: RepoLabel[] = [
9+
{
10+
name: "bug",
11+
description: "Something is broken"
12+
},
13+
{
14+
name: "enhancement",
15+
description: "New feature or improvement"
16+
},
17+
{
18+
name: "documentation",
19+
description: "Documentation gap or error"
20+
},
21+
{
22+
name: "good first issue",
23+
description: "Explicitly tagged for newcomers — great starting point"
24+
},
25+
{
26+
name: "help wanted",
27+
description: "Maintainers are looking for community contribution"
28+
},
29+
{
30+
name: "question",
31+
description: "Not a code change — usually a support request",
32+
skipForContributors: true
33+
}
34+
];

tools/onboarding-mcp/src/data/sourceMap.ts

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,71 @@ export function findSourceEntries(feature: string): Array<SourceEntry & { matchC
225225

226226
return scored;
227227
}
228+
229+
/**
230+
* Given a text string (e.g. a commit message), return the area names that match.
231+
* An "area" is derived from the first keyword of each SourceEntry.
232+
* Returns deduplicated area names, or ["general"] if nothing matched.
233+
*
234+
* Uses stricter matching than the interactive `findSourceEntries`:
235+
* - Multi-word keywords (e.g. "file api", "cmake driver") match as exact substrings.
236+
* - Single-word keywords only match if they appear as whole words (word-boundary match)
237+
* AND are specific enough (> 4 chars) to avoid false positives from common English words
238+
* like "build", "test", "debug" that appear in most commit messages.
239+
* - File-path-like patterns (e.g. "ctest.ts", "kitsController") are always high-signal.
240+
*/
241+
export function matchAreas(text: string): string[] {
242+
const lower = text.toLowerCase();
243+
244+
// Very short common keywords that cause false positives in commit messages.
245+
// These only match when they appear near a file path or as a standalone technical term.
246+
const overlyGeneric = new Set([
247+
"build", "compile", "test", "debug", "launch", "log", "error",
248+
"warning", "status", "state", "config", "setting", "settings",
249+
"configure", "configuration", "package", "query", "reply", "ui"
250+
]);
251+
252+
const matched = new Set<string>();
253+
for (const entry of sourceMap) {
254+
const areaName = entry.keywords[0];
255+
let found = false;
256+
257+
for (const keyword of entry.keywords) {
258+
const kw = keyword.toLowerCase();
259+
260+
if (kw.includes(" ")) {
261+
// Multi-word keyword: exact substring match — high confidence
262+
if (lower.includes(kw)) {
263+
found = true;
264+
break;
265+
}
266+
} else if (!overlyGeneric.has(kw)) {
267+
// Specific single-word keyword: word-boundary match
268+
const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
269+
if (re.test(lower)) {
270+
found = true;
271+
break;
272+
}
273+
}
274+
// overlyGeneric single-word keywords are skipped unless they're part
275+
// of a multi-word keyword that matched above.
276+
}
277+
278+
// Also check if any file paths from this entry appear in the text
279+
if (!found) {
280+
for (const file of entry.files) {
281+
const filename = file.path.split("/").pop()?.toLowerCase() ?? "";
282+
if (filename && lower.includes(filename)) {
283+
found = true;
284+
break;
285+
}
286+
}
287+
}
288+
289+
if (found) {
290+
matched.add(areaName);
291+
}
292+
}
293+
294+
return matched.size > 0 ? [...matched] : ["general"];
295+
}

tools/onboarding-mcp/src/github.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
export const REPO = "microsoft/vscode-cmake-tools";
2+
export const REPO_URL = `https://github.com/${REPO}`;
3+
const API_BASE = `https://api.github.com/repos/${REPO}`;
4+
5+
export class GitHubApiError extends Error {
6+
constructor(
7+
public readonly status: number,
8+
public readonly statusText: string,
9+
public readonly body: string
10+
) {
11+
const isRateLimit = status === 403 && body.includes("rate limit");
12+
const message = isRateLimit
13+
? `GitHub API rate limit exceeded (HTTP ${status}). ` +
14+
`Unauthenticated requests are limited to 60/hour. ` +
15+
`Set the GITHUB_TOKEN environment variable to increase the limit to 5,000/hour. ` +
16+
`You can create a personal access token at https://github.com/settings/tokens (no scopes needed for public repos).`
17+
: `GitHub API error: HTTP ${status} ${statusText}${body}`;
18+
super(message);
19+
this.name = "GitHubApiError";
20+
}
21+
}
22+
23+
/**
24+
* Perform a GET request to the GitHub REST API for the cmake-tools repo.
25+
* @param path API path relative to the repo, e.g. "/issues?state=open"
26+
* @returns Parsed JSON response body.
27+
*/
28+
export async function githubGet<T>(path: string): Promise<T> {
29+
const url = `${API_BASE}${path}`;
30+
31+
const headers: Record<string, string> = {
32+
"User-Agent": "cmake-tools-onboarding-mcp",
33+
"Accept": "application/vnd.github+json"
34+
};
35+
36+
const token = process.env.GITHUB_TOKEN;
37+
if (token) {
38+
headers["Authorization"] = `Bearer ${token}`;
39+
}
40+
41+
const response = await fetch(url, { headers });
42+
43+
if (!response.ok) {
44+
const body = await response.text();
45+
throw new GitHubApiError(response.status, response.statusText, body);
46+
}
47+
48+
return (await response.json()) as T;
49+
}

tools/onboarding-mcp/src/index.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ import { registerPrChecklistTool } from "./tools/prChecklist.js";
55
import { registerConceptTool } from "./tools/concepts.js";
66
import { registerCodeMapTool } from "./tools/codeMap.js";
77
import { registerDocsTool } from "./tools/docs.js";
8+
import { registerIssuesTool } from "./tools/issues.js";
9+
import { registerChangelogTool } from "./tools/changelog.js";
810

911
const server = new McpServer({
1012
name: "cmake-tools-onboarding",
11-
version: "0.2.0"
13+
version: "1.0.0"
1214
});
1315

1416
// Phase 1 tools
@@ -20,6 +22,10 @@ registerConceptTool(server);
2022
registerCodeMapTool(server);
2123
registerDocsTool(server);
2224

25+
// Phase 3 tools (live GitHub data)
26+
registerIssuesTool(server);
27+
registerChangelogTool(server);
28+
2329
// Start the server over stdio
2430
const transport = new StdioServerTransport();
2531
await server.connect(transport);
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import { z } from "zod";
2+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3+
import { githubGet, GitHubApiError, REPO_URL } from "../github.js";
4+
import { matchAreas } from "../data/sourceMap.js";
5+
6+
interface GitHubCommit {
7+
sha: string;
8+
commit: {
9+
message: string;
10+
author: {
11+
date: string;
12+
};
13+
};
14+
author: {
15+
login: string;
16+
} | null;
17+
html_url: string;
18+
}
19+
20+
export function registerChangelogTool(server: McpServer): void {
21+
server.registerTool(
22+
"get_recent_changes",
23+
{
24+
title: "Get Recent Changes",
25+
description:
26+
"Fetches the most recent commits to the main branch of vscode-cmake-tools. " +
27+
"Each commit is annotated with affected areas (derived from keywords in the commit message " +
28+
"matched against the codebase source map). Includes a summary of the most active areas " +
29+
"to help new contributors understand what's currently in flux.",
30+
inputSchema: z.object({
31+
limit: z
32+
.number()
33+
.int()
34+
.min(1)
35+
.max(30)
36+
.default(10)
37+
.describe("Number of recent commits to fetch (default 10, max 30).")
38+
})
39+
},
40+
async ({ limit }) => {
41+
const effectiveLimit = Math.min(limit ?? 10, 30);
42+
43+
try {
44+
const raw = await githubGet<GitHubCommit[]>(
45+
`/commits?per_page=${effectiveLimit}`
46+
);
47+
48+
// Track area frequencies for the summary
49+
const areaFrequency = new Map<string, number>();
50+
51+
const commits = raw.map((c) => {
52+
const firstLine = c.commit.message.split("\n")[0].trim();
53+
const areas = matchAreas(c.commit.message);
54+
55+
for (const area of areas) {
56+
areaFrequency.set(area, (areaFrequency.get(area) ?? 0) + 1);
57+
}
58+
59+
return {
60+
sha: c.sha.slice(0, 7),
61+
message: firstLine,
62+
author: c.author?.login ?? "unknown",
63+
date: c.commit.author.date,
64+
url: c.html_url,
65+
affectedAreas: areas
66+
};
67+
});
68+
69+
// Top 3 most active areas
70+
const mostActiveAreas = [...areaFrequency.entries()]
71+
.sort((a, b) => b[1] - a[1])
72+
.slice(0, 3)
73+
.map(([area]) => area);
74+
75+
const areaList = mostActiveAreas.join(", ") || "general";
76+
const tip =
77+
mostActiveAreas.length > 0 && !mostActiveAreas.includes("general")
78+
? `Recent activity is concentrated in ${areaList} — if you're new, these areas may have more in-flux context to catch up on.`
79+
: "Recent commits span a broad range of areas. Check the affectedAreas on each commit to find patterns.";
80+
81+
const result = {
82+
commits,
83+
summary: {
84+
fetchedAt: new Date().toISOString(),
85+
repoUrl: REPO_URL,
86+
totalReturned: commits.length,
87+
mostActiveAreas,
88+
tip
89+
}
90+
};
91+
92+
return {
93+
content: [{ type: "text" as const, text: JSON.stringify(result, null, 2) }]
94+
};
95+
} catch (error) {
96+
const message =
97+
error instanceof GitHubApiError
98+
? error.message
99+
: `Failed to fetch commits: ${error instanceof Error ? error.message : String(error)}`;
100+
101+
return {
102+
content: [
103+
{
104+
type: "text" as const,
105+
text: JSON.stringify({ error: message }, null, 2)
106+
}
107+
],
108+
isError: true
109+
};
110+
}
111+
}
112+
);
113+
}

0 commit comments

Comments
 (0)