Skip to content

Commit c4d2298

Browse files
committed
fix: resolve LIMIT keyword misidentification in SQL pagination
The `strip_limit_offset` and `extract_user_limit` helpers used naive `rfind("LIMIT")` / `rfind("OFFSET")` which matched the keyword as a substring of table names (e.g. `tapp_appointment_message_event_limit`), string literals, or quoted identifiers — producing corrupted pagination queries. Replace the raw string search with a quote-and-bracket-aware SQL tokenizer (`tokenize_sql`) that treats single-quoted strings, double-quoted identifiers, backtick-quoted identifiers, and parenthesized groups as opaque tokens. `strip_limit_offset` and `extract_user_limit` now scan backward over tokens so only standalone `LIMIT` / `OFFSET` keywords are matched. Also update MCP `run_query` tool: - Accept an optional `limit` parameter (default 100) - Document that user LIMIT clauses in the SQL are respected Add 11 new test cases covering table names containing "limit", quoted identifiers, string literals with SQL keywords, and subqueries. Made-with: Cursor
1 parent 8cbf542 commit c4d2298

3 files changed

Lines changed: 270 additions & 21 deletions

File tree

src-tauri/src/drivers/common/query.rs

Lines changed: 179 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,29 +48,192 @@ pub fn calculate_offset(page: u32, page_size: u32) -> u32 {
4848
(page - 1) * page_size
4949
}
5050

51+
/// Simple SQL tokenizer that respects:
52+
/// - Single-quoted strings ('...')
53+
/// - Double-quoted identifiers ("...")
54+
/// - Backtick-quoted identifiers (`...`)
55+
/// - Parenthesized groups (treated as single tokens)
56+
/// - Whitespace as delimiter
57+
///
58+
/// This prevents keywords like LIMIT or OFFSET from being matched
59+
/// inside string literals, quoted identifiers, or table names such as
60+
/// `tapp_appointment_message_event_limit`.
61+
fn tokenize_sql(sql: &str) -> Vec<String> {
62+
let mut tokens = Vec::new();
63+
let chars: Vec<char> = sql.chars().collect();
64+
let len = chars.len();
65+
let mut i = 0;
66+
67+
while i < len {
68+
if chars[i].is_whitespace() {
69+
i += 1;
70+
continue;
71+
}
72+
73+
if chars[i] == '\'' {
74+
let mut token = String::new();
75+
token.push(chars[i]);
76+
i += 1;
77+
while i < len {
78+
token.push(chars[i]);
79+
if chars[i] == '\'' {
80+
if i + 1 < len && chars[i + 1] == '\'' {
81+
i += 1;
82+
token.push(chars[i]);
83+
} else {
84+
i += 1;
85+
break;
86+
}
87+
}
88+
i += 1;
89+
}
90+
tokens.push(token);
91+
continue;
92+
}
93+
94+
if chars[i] == '"' {
95+
let mut token = String::new();
96+
token.push(chars[i]);
97+
i += 1;
98+
while i < len {
99+
token.push(chars[i]);
100+
if chars[i] == '"' {
101+
if i + 1 < len && chars[i + 1] == '"' {
102+
i += 1;
103+
token.push(chars[i]);
104+
} else {
105+
i += 1;
106+
break;
107+
}
108+
}
109+
i += 1;
110+
}
111+
tokens.push(token);
112+
continue;
113+
}
114+
115+
if chars[i] == '`' {
116+
let mut token = String::new();
117+
token.push(chars[i]);
118+
i += 1;
119+
while i < len {
120+
token.push(chars[i]);
121+
if chars[i] == '`' {
122+
if i + 1 < len && chars[i + 1] == '`' {
123+
i += 1;
124+
token.push(chars[i]);
125+
} else {
126+
i += 1;
127+
break;
128+
}
129+
}
130+
i += 1;
131+
}
132+
tokens.push(token);
133+
continue;
134+
}
135+
136+
if chars[i] == '(' {
137+
let mut token = String::new();
138+
let mut depth = 0;
139+
while i < len {
140+
token.push(chars[i]);
141+
if chars[i] == '(' {
142+
depth += 1;
143+
} else if chars[i] == ')' {
144+
depth -= 1;
145+
if depth == 0 {
146+
i += 1;
147+
break;
148+
}
149+
} else if chars[i] == '\'' {
150+
i += 1;
151+
while i < len {
152+
token.push(chars[i]);
153+
if chars[i] == '\'' {
154+
if i + 1 < len && chars[i + 1] == '\'' {
155+
i += 1;
156+
token.push(chars[i]);
157+
} else {
158+
break;
159+
}
160+
}
161+
i += 1;
162+
}
163+
}
164+
i += 1;
165+
}
166+
tokens.push(token);
167+
continue;
168+
}
169+
170+
let mut token = String::new();
171+
while i < len
172+
&& !chars[i].is_whitespace()
173+
&& chars[i] != '('
174+
&& chars[i] != '\''
175+
&& chars[i] != '"'
176+
&& chars[i] != '`'
177+
{
178+
token.push(chars[i]);
179+
i += 1;
180+
}
181+
if !token.is_empty() {
182+
tokens.push(token);
183+
}
184+
}
185+
186+
tokens
187+
}
188+
51189
/// Remove trailing LIMIT and OFFSET clauses from a SQL query.
52190
///
53-
/// Uses `rfind` to locate the last `LIMIT` keyword and strips everything from
54-
/// there onwards (which includes any subsequent OFFSET). Falls back to looking
55-
/// for a standalone `OFFSET` when no LIMIT is present.
56-
pub fn strip_limit_offset(query: &str) -> &str {
57-
let upper = query.to_uppercase();
58-
if let Some(pos) = upper.rfind("LIMIT") {
59-
query[..pos].trim()
60-
} else if let Some(pos) = upper.rfind("OFFSET") {
61-
query[..pos].trim()
62-
} else {
63-
query.trim()
191+
/// Uses a token-aware scan so that `LIMIT` / `OFFSET` keywords inside
192+
/// string literals, quoted identifiers, parenthesized subqueries, or as
193+
/// part of table names (e.g. `tapp_…_limit`) are never misidentified.
194+
pub fn strip_limit_offset(query: &str) -> String {
195+
let tokens = tokenize_sql(query.trim());
196+
let mut end = tokens.len();
197+
198+
// Scan backwards for OFFSET <n>
199+
if end >= 2 && tokens[end - 2].to_uppercase() == "OFFSET" {
200+
if tokens[end - 1].parse::<u64>().is_ok() {
201+
end -= 2;
202+
}
203+
}
204+
205+
// Scan backwards for LIMIT <n>
206+
if end >= 2 && tokens[end - 2].to_uppercase() == "LIMIT" {
207+
if tokens[end - 1].parse::<u64>().is_ok() {
208+
end -= 2;
209+
}
64210
}
211+
212+
tokens[..end].join(" ")
65213
}
66214

67215
/// Extract the numeric value from a trailing LIMIT clause, if present.
216+
///
217+
/// Uses a token-aware scan so that `LIMIT` as a substring of a table name
218+
/// (e.g. `tapp_appointment_message_event_limit`) is never misidentified.
68219
pub fn extract_user_limit(query: &str) -> Option<u32> {
69-
let upper = query.to_uppercase();
70-
let pos = upper.rfind("LIMIT")?;
71-
let after = query[pos + 5..].trim();
72-
let num_str: String = after.chars().take_while(|c| c.is_ascii_digit()).collect();
73-
num_str.parse().ok()
220+
let tokens = tokenize_sql(query.trim());
221+
let len = tokens.len();
222+
223+
// Walk backwards past optional OFFSET <n>
224+
let mut end = len;
225+
if end >= 2 && tokens[end - 2].to_uppercase() == "OFFSET" {
226+
if tokens[end - 1].parse::<u64>().is_ok() {
227+
end -= 2;
228+
}
229+
}
230+
231+
// Check for LIMIT <n>
232+
if end >= 2 && tokens[end - 2].to_uppercase() == "LIMIT" {
233+
return tokens[end - 1].parse().ok();
234+
}
235+
236+
None
74237
}
75238

76239
/// Build a paginated query by stripping any user-supplied LIMIT/OFFSET and

src-tauri/src/drivers/common/tests.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,38 @@ fn test_strip_limit_offset_only_offset() {
186186
);
187187
}
188188

189+
#[test]
190+
fn test_strip_limit_offset_table_name_contains_limit() {
191+
assert_eq!(
192+
strip_limit_offset("SELECT * FROM tapp_appointment_message_event_limit ORDER BY id"),
193+
"SELECT * FROM tapp_appointment_message_event_limit ORDER BY id"
194+
);
195+
}
196+
197+
#[test]
198+
fn test_strip_limit_offset_table_name_contains_limit_with_real_limit() {
199+
assert_eq!(
200+
strip_limit_offset("SELECT * FROM tapp_appointment_message_event_limit ORDER BY id LIMIT 10"),
201+
"SELECT * FROM tapp_appointment_message_event_limit ORDER BY id"
202+
);
203+
}
204+
205+
#[test]
206+
fn test_strip_limit_offset_quoted_identifier() {
207+
assert_eq!(
208+
strip_limit_offset(r#"SELECT * FROM "order_limit_table" WHERE x > 1 LIMIT 5 OFFSET 10"#),
209+
r#"SELECT * FROM "order_limit_table" WHERE x > 1"#
210+
);
211+
}
212+
213+
#[test]
214+
fn test_strip_limit_offset_string_literal_with_limit() {
215+
assert_eq!(
216+
strip_limit_offset("SELECT * FROM t WHERE name LIKE '%limit%' LIMIT 10"),
217+
"SELECT * FROM t WHERE name LIKE '%limit%'"
218+
);
219+
}
220+
189221
#[test]
190222
fn test_extract_user_limit_present() {
191223
assert_eq!(
@@ -210,6 +242,22 @@ fn test_extract_user_limit_absent() {
210242
);
211243
}
212244

245+
#[test]
246+
fn test_extract_user_limit_table_name_contains_limit() {
247+
assert_eq!(
248+
super::extract_user_limit("SELECT * FROM tapp_appointment_message_event_limit"),
249+
None
250+
);
251+
}
252+
253+
#[test]
254+
fn test_extract_user_limit_table_name_contains_limit_with_real_limit() {
255+
assert_eq!(
256+
super::extract_user_limit("SELECT * FROM tapp_appointment_message_event_limit LIMIT 10"),
257+
Some(10)
258+
);
259+
}
260+
213261
#[test]
214262
fn test_build_paginated_query_no_user_limit() {
215263
let q = "SELECT o.id FROM orders o ORDER BY o.created_at DESC";
@@ -244,6 +292,36 @@ fn test_build_paginated_query_user_limit_exhausted() {
244292
assert_eq!(result, "SELECT * FROM t LIMIT 0 OFFSET 100");
245293
}
246294

295+
#[test]
296+
fn test_build_paginated_query_table_name_contains_limit() {
297+
let q = "SELECT * FROM tapp_appointment_message_event_limit ORDER BY id";
298+
let result = build_paginated_query(q, 100, 1);
299+
assert_eq!(
300+
result,
301+
"SELECT * FROM tapp_appointment_message_event_limit ORDER BY id LIMIT 101 OFFSET 0"
302+
);
303+
}
304+
305+
#[test]
306+
fn test_build_paginated_query_table_name_contains_limit_with_user_limit() {
307+
let q = "SELECT * FROM tapp_appointment_message_event_limit ORDER BY id LIMIT 10";
308+
let result = build_paginated_query(q, 100, 1);
309+
assert_eq!(
310+
result,
311+
"SELECT * FROM tapp_appointment_message_event_limit ORDER BY id LIMIT 10 OFFSET 0"
312+
);
313+
}
314+
315+
#[test]
316+
fn test_build_paginated_query_subquery_with_limit() {
317+
let q = "SELECT * FROM (SELECT id FROM t ORDER BY id LIMIT 100) sub ORDER BY id LIMIT 5";
318+
let result = build_paginated_query(q, 100, 1);
319+
assert_eq!(
320+
result,
321+
"SELECT * FROM (SELECT id FROM t ORDER BY id LIMIT 100) sub ORDER BY id LIMIT 5 OFFSET 0"
322+
);
323+
}
324+
247325
#[test]
248326
fn test_encode_blob_full_preserves_all_data() {
249327
// 8KB of data — encode_blob would truncate, encode_blob_full must not

src-tauri/src/mcp/mod.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -502,12 +502,13 @@ fn handle_list_tools() -> Result<Value, JsonRpcError> {
502502
},
503503
Tool {
504504
name: "run_query".to_string(),
505-
description: Some("Execute a SQL query on a specific connection".to_string()),
505+
description: Some("Execute a SQL query on a specific connection. If the query already contains a LIMIT clause, it will be respected.".to_string()),
506506
input_schema: json!({
507507
"type": "object",
508508
"properties": {
509509
"connection_id": { "type": "string", "description": "The ID or name of the connection" },
510-
"query": { "type": "string", "description": "The SQL query to execute" }
510+
"query": { "type": "string", "description": "The SQL query to execute" },
511+
"limit": { "type": "integer", "description": "Maximum number of rows to return (default: 100). If the query already contains a LIMIT clause smaller than this value, the query's LIMIT takes precedence." }
511512
},
512513
"required": ["connection_id", "query"]
513514
}),
@@ -821,6 +822,11 @@ async fn tool_run_query(
821822
data: None,
822823
})?;
823824

825+
let max_rows = args
826+
.get("limit")
827+
.and_then(|v| v.as_u64())
828+
.unwrap_or(100) as u32;
829+
824830
audit.connection_id = Some(conn_id.to_string());
825831
audit.query = Some(query.to_string());
826832
let kind = ai_activity::classify_query_kind(query);
@@ -986,11 +992,13 @@ async fn tool_run_query(
986992
}
987993

988994
let result = match conn.params.driver.as_str() {
989-
"mysql" => mysql::execute_query(&db_params, &effective_query, Some(100), 1, None).await,
995+
"mysql" => {
996+
mysql::execute_query(&db_params, &effective_query, Some(max_rows), 1, None).await
997+
}
990998
"postgres" => {
991-
postgres::execute_query(&db_params, &effective_query, Some(100), 1, None).await
999+
postgres::execute_query(&db_params, &effective_query, Some(max_rows), 1, None).await
9921000
}
993-
"sqlite" => sqlite::execute_query(&db_params, &effective_query, Some(100), 1).await,
1001+
"sqlite" => sqlite::execute_query(&db_params, &effective_query, Some(max_rows), 1).await,
9941002
_ => Err("Unsupported driver".into()),
9951003
}
9961004
.map_err(|e| JsonRpcError {

0 commit comments

Comments
 (0)