olimorris · georgeharker · May 3, 2026 · May 3, 2026 · May 3, 2026
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ Thank you to the following people:
 
 - :speech_balloon: [Copilot Chat](https://github.com/features/copilot) meets [Zed AI](https://zed.dev/blog/zed-ai), in Neovim
 - :zap: Integrates Neovim with LLMs and Agents in the CLI
-- :electric_plug: Support for LLMs from Anthropic, Copilot, GitHub Models, DeepSeek, Gemini, Mistral AI, Novita, Ollama, OpenAI, Azure OpenAI, HuggingFace and xAI (or [bring your own](https://codecompanion.olimorris.dev/extending/adapters.html))
+- :electric_plug: Support for LLMs from Anthropic, Copilot, GitHub Models, DeepSeek, Gemini, Kimi (Moonshot), Mistral AI, Novita, Ollama, OpenAI, Azure OpenAI, HuggingFace and xAI (or [bring your own](https://codecompanion.olimorris.dev/extending/adapters.html))
 - :robot: Support for [Agent Client Protocol](https://agentclientprotocol.com/overview/introduction), enabling coding with agents like [Augment Code](https://docs.augmentcode.com/cli/overview), [Cagent](https://github.com/docker/cagent) from Docker, [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview), [Codex](https://openai.com/codex), [Copilot CLI](https://github.com/features/copilot/cli), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Goose](https://block.github.io/goose/), [Cursor CLI](https://cursor.com/docs/cli/overview), [Kimi CLI](https://github.com/MoonshotAI/kimi-cli), [Kiro](https://kiro.dev/docs/cli/), [Mistral Vibe](https://github.com/mistralai/mistral-vibe) and [OpenCode](https://opencode.ai)
 - :heart_hands: User contributed and supported [adapters](https://codecompanion.olimorris.dev/configuration/adapters-http#community-adapters)
 - :battery: Support for [Model Context Protocol (MCP)](https://codecompanion.olimorris.dev/model-context-protocol#model-context-protocol-mcp-support)

diff --git a/doc/codecompanion.txt b/doc/codecompanion.txt
@@ -1,4 +1,4 @@
-*codecompanion.txt*          For NVIM v0.11         Last change: 2026 April 29
+*codecompanion.txt*           For NVIM v0.11          Last change: 2026 May 03
 
 ==============================================================================
 Table of Contents                            *codecompanion-table-of-contents*
@@ -126,6 +126,7 @@ agents. Out of the box, the plugin supports:
 - Goose (`goose`) - Requires an API key
 - HuggingFace (`huggingface`) - Requires an API key
 - Kilo Code (`kilocode`) - Requires an API key
+- Kimi (`kimi`) - Moonshot's Kimi K2 family; requires an API key
 - Kimi CLI (`kimi_cli`) - Requires an API key
 - Mistral AI (`mistral`) - Requires an API key or a Le Chat Pro subscription
 - Novita (`novita`) - Requires an API key
@@ -1757,6 +1758,59 @@ LLAMA.CPP WITH --REASONING-FORMAT DEEPSEEK
 <
 
 
+KIMI (MOONSHOT)
+
+CodeCompanion ships a built-in `kimi` adapter for Moonshot's Kimi K2 family
+<https://platform.kimi.ai/docs/models>. Unlike the generic `openai_compatible`
+adapter, it captures and round-trips Kimi's `reasoning_content` so the
+K2-thinking variants (`kimi-k2-thinking`, `kimi-k2-thinking-turbo`, and the
+`can_reason` K2 generals such as `kimi-k2.6`) work correctly with tool calling
+— without it, the second turn of a tool-using chat fails with `"thinking is
+enabled but reasoning_content is missing in assistant tool call message"`.
+
+For the default setup, simply set `MOONSHOT_API_KEY` and pick the adapter:
+
+>lua
+    require("codecompanion").setup({
+      interactions = {
+        chat = { adapter = "kimi" },
+        inline = { adapter = "kimi" },
+      },
+    })
+<
+
+To override the API-key source, swap models, or disable thinking mode:
+
+>lua
+    require("codecompanion").setup({
+      adapters = {
+        http = {
+          kimi = function()
+            return require("codecompanion.adapters").extend("kimi", {
+              env = {
+                -- Use the 1Password CLI instead of an environment variable:
+                api_key = "cmd:op read op://API/Kimi/credential --no-newline",
+                -- Region override (Moonshot has separate endpoints, e.g. for China):
+                -- url = "https://api.moonshot.cn",
+              },
+              schema = {
+                model = { default = "kimi-k2.6" },
+                -- Set to false to disable thinking mode (e.g. for the K2-general
+                -- non-reasoning preview models, where `think` is a no-op):
+                think = { default = true },
+              },
+            })
+          end,
+        },
+      },
+    })
+<
+
+
+  [!IMPORTANT] The K2-thinking models pin `temperature` to `1` and `top_p` to
+  `0.95`; the adapter's defaults match. Overriding either with another value will
+  yield a 400 from the API. Other K2 models accept the full ranges.
+
 OLLAMA (REMOTELY)
 
 The simplest way to connect to a remote Ollama instance is to set the

diff --git a/doc/configuration/adapters-http.md b/doc/configuration/adapters-http.md
@@ -380,6 +380,51 @@ require("codecompanion").setup({
 })
 ```
 
+### Kimi (Moonshot)
+
+CodeCompanion ships a built-in `kimi` adapter for Moonshot's [Kimi K2 family](https://platform.kimi.ai/docs/models). Unlike the generic `openai_compatible` adapter, it captures and round-trips Kimi's `reasoning_content` so the K2-thinking variants (`kimi-k2-thinking`, `kimi-k2-thinking-turbo`, and the `can_reason` K2 generals such as `kimi-k2.6`) work correctly with tool calling — without it, the second turn of a tool-using chat fails with `"thinking is enabled but reasoning_content is missing in assistant tool call message"`.
+
+For the default setup, simply set `MOONSHOT_API_KEY` and pick the adapter:
+
+```lua
+require("codecompanion").setup({
+  interactions = {
+    chat = { adapter = "kimi" },
+    inline = { adapter = "kimi" },
+  },
+})
+```
+
+To override the API-key source, swap models, or disable thinking mode:
+
+```lua
+require("codecompanion").setup({
+  adapters = {
+    http = {
+      kimi = function()
+        return require("codecompanion.adapters").extend("kimi", {
+          env = {
+            -- Use the 1Password CLI instead of an environment variable:
+            api_key = "cmd:op read op://API/Kimi/credential --no-newline",
+            -- Region override (Moonshot has separate endpoints, e.g. for China):
+            -- url = "https://api.moonshot.cn",
+          },
+          schema = {
+            model = { default = "kimi-k2.6" },
+            -- Set to false to disable thinking mode (e.g. for the K2-general
+            -- non-reasoning preview models, where `think` is a no-op):
+            think = { default = true },
+          },
+        })
+      end,
+    },
+  },
+})
+```
+
+> [!IMPORTANT]
+> The K2-thinking models pin `temperature` to `1` and `top_p` to `0.95`; the adapter's defaults match. Overriding either with another value will yield a 400 from the API. Other K2 models accept the full ranges.
+
 ### Ollama (remotely)
 
 The simplest way to connect to a remote Ollama instance is to set the `OLLAMA_HOST` environment variable (the same variable used by the Ollama CLI):

diff --git a/doc/index.md b/doc/index.md
@@ -57,6 +57,7 @@ CodeCompanion uses [HTTP](configuration/adapters-http) and [ACP](configuration/a
 - Goose (`goose`) - Requires an API key
 - HuggingFace (`huggingface`) - Requires an API key
 - Kilo Code (`kilocode`) - Requires an API key
+- Kimi (`kimi`) - Moonshot's Kimi K2 family; requires an API key
 - Kimi CLI (`kimi_cli`) - Requires an API key
 - Mistral AI (`mistral`) - Requires an API key or a Le Chat Pro subscription
 - Novita (`novita`) - Requires an API key

diff --git a/lua/codecompanion/adapters/http/kimi.lua b/lua/codecompanion/adapters/http/kimi.lua
@@ -0,0 +1,219 @@
+local openai = require("codecompanion.adapters.http.openai")
+
+---@class CodeCompanion.HTTPAdapter.Kimi: CodeCompanion.HTTPAdapter
+return {
+  name = "kimi",
+  formatted_name = "Kimi",
+  roles = {
+    llm = "assistant",
+    user = "user",
+    tool = "tool",
+  },
+  opts = {
+    stream = true,
+    vision = false,
+    tools = true,
+  },
+  features = {
+    text = true,
+    tokens = true,
+  },
+  url = "${url}${chat_url}",
+  env = {
+    url = "https://api.moonshot.ai",
+    api_key = "MOONSHOT_API_KEY",
+    chat_url = "/v1/chat/completions",
+  },
+  headers = {
+    Authorization = "Bearer ${api_key}",
+    ["Content-Type"] = "application/json",
+  },
+  handlers = {
+    setup = function(self)
+      if self.opts and self.opts.stream then
+        self.parameters.stream = true
+        self.parameters.stream_options = { include_usage = true }
+      end
+
+      local model = self.schema.model.default
+      local model_opts = self.schema.model.choices[model]
+      if model_opts and model_opts.opts then
+        self.opts = vim.tbl_deep_extend("force", self.opts, model_opts.opts)
+      end
+
+      return true
+    end,
+
+    --- Use the OpenAI adapter for the bulk of the work
+    tokens = function(self, data)
+      return openai.handlers.tokens(self, data)
+    end,
+    form_tools = function(self, tools)
+      return openai.handlers.form_tools(self, tools)
+    end,
+    form_parameters = function(self, params, messages)
+      return openai.handlers.form_parameters(self, params, messages)
+    end,
+    ---Format the messages for the request.
+    ---
+    ---Kimi-k2-thinking rejects assistant messages that contain ``tool_calls``
+    ---but no ``reasoning_content`` whenever ``think`` is enabled.  We rewrite
+    ---OpenAI's nested ``reasoning`` field into Moonshot's flat
+    ---``reasoning_content`` string, and insert an empty-string fallback for
+    ---tool-call messages whose original reasoning is unavailable (history that
+    ---pre-dates this adapter, edited messages, model swaps).
+    ---@param self CodeCompanion.HTTPAdapter
+    ---@param messages table
+    ---@return table
+    form_messages = function(self, messages)
+      local result = openai.handlers.form_messages(self, messages)
+
+      local think_on = self.parameters and self.parameters.think == true
+      for _, m in ipairs(result.messages or {}) do
+        if m.role == self.roles.llm then
+          if m.reasoning then
+            m.reasoning_content = type(m.reasoning) == "table" and m.reasoning.content or m.reasoning
+            m.reasoning = nil
+          elseif think_on and m.tool_calls then
+            m.reasoning_content = ""
+          end
+        end
+      end
+
+      return result
+    end,
+    chat_output = function(self, data, tools)
+      return openai.handlers.chat_output(self, data, tools)
+    end,
+    ---Lift streamed ``delta.reasoning_content`` onto the message so it can be
+    ---round-tripped on the next turn (see ``form_messages``).
+    ---@param self CodeCompanion.HTTPAdapter
+    ---@param data table
+    ---@return table
+    parse_message_meta = function(self, data)
+      local extra = data.extra
+      if extra and extra.reasoning_content then
+        data.output.reasoning = { content = extra.reasoning_content }
+        if data.output.content == "" then
+          data.output.content = nil
+        end
+      end
+      return data
+    end,
+    tools = {
+      format_tool_calls = function(self, tools)
+        return openai.handlers.tools.format_tool_calls(self, tools)
+      end,
+      output_response = function(self, tool_call, output)
+        return openai.handlers.tools.output_response(self, tool_call, output)
+      end,
+    },
+    inline_output = function(self, data, context)
+      return openai.handlers.inline_output(self, data, context)
+    end,
+    on_exit = function(self, data)
+      return openai.handlers.on_exit(self, data)
+    end,
+  },
+  schema = {
+    ---@type CodeCompanion.Schema
+    model = {
+      order = 1,
+      mapping = "parameters",
+      type = "enum",
+      desc = "ID of the Moonshot Kimi model to use. See https://platform.kimi.ai/docs/models.",
+      default = "kimi-k2.6",
+      choices = {
+        -- K2 thinking family (reasoning_content round-trip)
+        ["kimi-k2-thinking"] = {
+          formatted_name = "Kimi K2 Thinking",
+          meta = { context_window = 262144 },
+          opts = { can_reason = true },
+        },
+        ["kimi-k2-thinking-turbo"] = {
+          formatted_name = "Kimi K2 Thinking Turbo",
+          meta = { context_window = 262144 },
+          opts = { can_reason = true },
+        },
+        -- K2 general
+        ["kimi-k2.6"] = {
+          formatted_name = "Kimi K2.6",
+          meta = { context_window = 262144 },
+          opts = { can_reason = true },
+        },
+        ["kimi-k2.5"] = {
+          formatted_name = "Kimi K2.5",
+          meta = { context_window = 262144 },
+          opts = { can_reason = true },
+        },
+        ["kimi-k2-turbo-preview"] = {
+          formatted_name = "Kimi K2 Turbo Preview",
+          meta = { context_window = 262144 },
+        },
+        ["kimi-k2-0905-preview"] = {
+          formatted_name = "Kimi K2 0905 Preview",
+          meta = { context_window = 262144 },
+        },
+        ["kimi-k2-0711-preview"] = {
+          formatted_name = "Kimi K2 0711 Preview",
+          meta = { context_window = 131072 },
+        },
+      },
+    },
+    think = {
+      order = 2,
+      mapping = "parameters",
+      type = "boolean",
+      optional = true,
+      default = true,
+      desc = "Enable thinking mode for k2-thinking-class models. When true, the API streams reasoning_content alongside content; this adapter captures and echoes it back on assistant tool-call messages as Moonshot requires.",
+    },
+    temperature = {
+      order = 3,
+      mapping = "parameters",
+      type = "number",
+      optional = true,
+      default = 1,
+      desc = "What sampling temperature to use, between 0 and 2. Note: kimi-k2-thinking only accepts 1.",
+      validate = function(n)
+        return n >= 0 and n <= 2, "Must be between 0 and 2"
+      end,
+    },
+    top_p = {
+      order = 4,
+      mapping = "parameters",
+      type = "number",
+      optional = true,
+      default = 0.95,
+      desc = "Nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both. Note: kimi-k2-thinking only accepts 0.95.",
+      validate = function(n)
+        return n >= 0 and n <= 1, "Must be between 0 and 1"
+      end,
+    },
+    max_tokens = {
+      order = 5,
+      mapping = "parameters",
+      type = "integer",
+      optional = true,
+      default = nil,
+      desc = "The maximum number of tokens to generate in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length.",
+      validate = function(n)
+        return n > 0, "Must be greater than 0"
+      end,
+    },
+    stop = {
+      order = 6,
+      mapping = "parameters",
+      type = "list",
+      optional = true,
+      default = nil,
+      subtype = {
+        type = "string",
+      },
+      desc = "Stop generation if this token is detected. Or if one of these tokens is detected when providing an array.",
+      validate = function(l)
+        return #l >= 1, "Must have more than 1 element"
+      end,
+    },
+  },
+}
diff --git a/lua/codecompanion/config.lua b/lua/codecompanion/config.lua
@@ -19,6 +19,7 @@ local defaults = {
       gemini = "gemini",
       githubmodels = "githubmodels",
       huggingface = "huggingface",
+      kimi = "kimi",
       novita = "novita",
       mistral = "mistral",
       ollama = "ollama",

diff --git a/tests/adapters/http/stubs/kimi_no_streaming.txt b/tests/adapters/http/stubs/kimi_no_streaming.txt
@@ -0,0 +1,21 @@
+{
+    "id": "chatcmpl-kimi-002",
+    "object": "chat.completion",
+    "created": 1777802500,
+    "model": "kimi-k2.6",
+    "choices": [
+        {
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": "Elegant simplicity."
+            },
+            "finish_reason": "stop"
+        }
+    ],
+    "usage": {
+        "prompt_tokens": 9,
+        "completion_tokens": 3,
+        "total_tokens": 12
+    }
+}