Merge pull request #8830 from uinstinct/ripgrep-regex
fix: regular expressions in ripgrep
This commit is contained in:
@@ -13,7 +13,7 @@ export const grepSearchTool: Tool = {
|
||||
function: {
|
||||
name: BuiltInToolNames.GrepSearch,
|
||||
description:
|
||||
"Performs a regex search over the repository using ripgrep. Will not include results for many build, cache, secrets dirs/files. Output may be truncated, so use targeted queries",
|
||||
"Performs a regular expression (regex) search over the repository using ripgrep. Will not include results for many build, cache, secrets dirs/files. Output may be truncated, so use targeted queries",
|
||||
parameters: {
|
||||
type: "object",
|
||||
required: ["query"],
|
||||
@@ -21,7 +21,7 @@ export const grepSearchTool: Tool = {
|
||||
query: {
|
||||
type: "string",
|
||||
description:
|
||||
"The search query to use. Must be the exact string to be searched or a valid ripgrep expression. Use regex with alternation (e.g., 'word1|word2|word3) or character classes to find multiple potential words in a single search.",
|
||||
"The regex pattern to search for within file contents. Use regex with alternation (e.g., 'word1|word2|word3') or character classes to find multiple potential words in a single search.",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -150,16 +150,6 @@ export function prepareQueryForRipgrep(query: string): {
|
||||
query: string;
|
||||
warning?: string;
|
||||
} {
|
||||
// Check if it looks like a literal search that should be escaped
|
||||
if (looksLikeLiteralSearch(query)) {
|
||||
return {
|
||||
query: escapeLiteralForRegex(query),
|
||||
warning:
|
||||
"Query contained special regex characters and was escaped for literal text search",
|
||||
};
|
||||
}
|
||||
|
||||
// Otherwise validate and sanitize as regex
|
||||
const validation = validateAndSanitizeRegex(query);
|
||||
|
||||
return {
|
||||
|
||||
@@ -111,8 +111,7 @@ describe("looksLikeLiteralSearch", () => {
|
||||
describe("prepareQueryForRipgrep", () => {
|
||||
it("should escape literal-looking queries", () => {
|
||||
const result = prepareQueryForRipgrep("hello.world");
|
||||
expect(result.query).toBe("hello\\.world");
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
expect(result.query).toBe("hello.world");
|
||||
});
|
||||
|
||||
it("should sanitize regex patterns", () => {
|
||||
@@ -130,14 +129,12 @@ describe("prepareQueryForRipgrep", () => {
|
||||
describe("real-world examples", () => {
|
||||
it("should escape file patterns", () => {
|
||||
const result = prepareQueryForRipgrep("*.js");
|
||||
expect(result.query).toBe("\\*\\.js");
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
expect(result.query).toBe("*.js");
|
||||
});
|
||||
|
||||
it("should escape function calls", () => {
|
||||
const result = prepareQueryForRipgrep("console.log()");
|
||||
expect(result.query).toBe("console\\.log\\(\\)");
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
expect(result.query).toBe("console.log()");
|
||||
});
|
||||
|
||||
it("should not escape proper regex patterns", () => {
|
||||
@@ -163,8 +160,7 @@ describe("problematic patterns that originally failed", () => {
|
||||
|
||||
it("should handle dollar signs in shell patterns", () => {
|
||||
const result = prepareQueryForRipgrep("$(command)");
|
||||
expect(result.query).toBe("\\$\\(command\\)");
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
expect(result.query).toBe("$(command)");
|
||||
});
|
||||
|
||||
it("should handle escaped dollar signs", () => {
|
||||
@@ -253,8 +249,7 @@ describe("patterns that should NOT be over-sanitized", () => {
|
||||
describe("edge cases that could trigger false positives", () => {
|
||||
it("should not treat mathematical expressions as problematic", () => {
|
||||
const result = prepareQueryForRipgrep("a + b * c");
|
||||
expect(result.query).toBe("a \\+ b \\* c"); // Should be escaped as literal
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
expect(result.query).toBe("a + b * c"); // Should be escaped as literal
|
||||
});
|
||||
|
||||
it("should not break regex that uses word boundaries", () => {
|
||||
@@ -271,8 +266,7 @@ describe("patterns that should NOT be over-sanitized", () => {
|
||||
|
||||
it("should not treat IPv4 addresses as problematic regex", () => {
|
||||
const result = prepareQueryForRipgrep("192.168.1.1");
|
||||
expect(result.query).toBe("192\\.168\\.1\\.1"); // Should be escaped as literal
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
expect(result.query).toBe("192.168.1.1"); // Should be escaped as literal
|
||||
});
|
||||
|
||||
it("should not break hex color codes", () => {
|
||||
@@ -285,8 +279,7 @@ describe("patterns that should NOT be over-sanitized", () => {
|
||||
describe("patterns that should preserve user intent", () => {
|
||||
it("should not sanitize intentional regex alternation", () => {
|
||||
const result = prepareQueryForRipgrep("(foo|bar)");
|
||||
expect(result.query).toBe("\\(foo\\|bar\\)"); // Should be escaped as literal since unescaped
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
expect(result.query).toBe("(foo|bar)"); // Should be escaped as literal since unescaped
|
||||
});
|
||||
|
||||
it("should preserve escaped alternation in regex", () => {
|
||||
@@ -300,11 +293,5 @@ describe("patterns that should NOT be over-sanitized", () => {
|
||||
expect(result.query).toBe("name LIKE '%john%'"); // No regex metacharacters here
|
||||
expect(result.warning).toBeUndefined();
|
||||
});
|
||||
|
||||
it("should not break CSS selectors when used as literal search", () => {
|
||||
const result = prepareQueryForRipgrep(".class-name > div:nth-child(2)");
|
||||
expect(result.query).toBe("\\.class-name > div:nth-child\\(2\\)");
|
||||
expect(result.warning).toContain("escaped for literal text search");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user