forked from ultraworkers/claw-code
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsedEditParser.ts
More file actions
322 lines (287 loc) · 9.34 KB
/
Copy pathsedEditParser.ts
File metadata and controls
322 lines (287 loc) · 9.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
/**
* Parser for sed edit commands (-i flag substitutions)
* Extracts file paths and substitution patterns to enable file-edit-style rendering
*/
import { randomBytes } from 'crypto'
import { tryParseShellCommand } from '../../utils/bash/shellQuote.js'
// BRE→ERE conversion placeholders (null-byte sentinels, never appear in user input)
const BACKSLASH_PLACEHOLDER = '\x00BACKSLASH\x00'
const PLUS_PLACEHOLDER = '\x00PLUS\x00'
const QUESTION_PLACEHOLDER = '\x00QUESTION\x00'
const PIPE_PLACEHOLDER = '\x00PIPE\x00'
const LPAREN_PLACEHOLDER = '\x00LPAREN\x00'
const RPAREN_PLACEHOLDER = '\x00RPAREN\x00'
const BACKSLASH_PLACEHOLDER_RE = new RegExp(BACKSLASH_PLACEHOLDER, 'g')
const PLUS_PLACEHOLDER_RE = new RegExp(PLUS_PLACEHOLDER, 'g')
const QUESTION_PLACEHOLDER_RE = new RegExp(QUESTION_PLACEHOLDER, 'g')
const PIPE_PLACEHOLDER_RE = new RegExp(PIPE_PLACEHOLDER, 'g')
const LPAREN_PLACEHOLDER_RE = new RegExp(LPAREN_PLACEHOLDER, 'g')
const RPAREN_PLACEHOLDER_RE = new RegExp(RPAREN_PLACEHOLDER, 'g')
export type SedEditInfo = {
/** The file path being edited */
filePath: string
/** The search pattern (regex) */
pattern: string
/** The replacement string */
replacement: string
/** Substitution flags (g, i, etc.) */
flags: string
/** Whether to use extended regex (-E or -r flag) */
extendedRegex: boolean
}
/**
* Check if a command is a sed in-place edit command
* Returns true only for simple sed -i 's/pattern/replacement/flags' file commands
*/
export function isSedInPlaceEdit(command: string): boolean {
const info = parseSedEditCommand(command)
return info !== null
}
/**
* Parse a sed edit command and extract the edit information
* Returns null if the command is not a valid sed in-place edit
*/
export function parseSedEditCommand(command: string): SedEditInfo | null {
const trimmed = command.trim()
// Must start with sed
const sedMatch = trimmed.match(/^\s*sed\s+/)
if (!sedMatch) return null
const withoutSed = trimmed.slice(sedMatch[0].length)
const parseResult = tryParseShellCommand(withoutSed)
if (!parseResult.success) return null
const tokens = parseResult.tokens
// Extract string tokens only
const args: string[] = []
for (const token of tokens) {
if (typeof token === 'string') {
args.push(token)
} else if (
typeof token === 'object' &&
token !== null &&
'op' in token &&
token.op === 'glob'
) {
// Glob patterns are too complex for this simple parser
return null
}
}
// Parse flags and arguments
let hasInPlaceFlag = false
let extendedRegex = false
let expression: string | null = null
let filePath: string | null = null
let i = 0
while (i < args.length) {
const arg = args[i]!
// Handle -i flag (with or without backup suffix)
if (arg === '-i' || arg === '--in-place') {
hasInPlaceFlag = true
i++
// On macOS, -i requires a suffix argument (even if empty string)
// Check if next arg looks like a backup suffix (empty, or starts with dot)
// Don't consume flags (-E, -r) or sed expressions (starting with s, y, d)
if (i < args.length) {
const nextArg = args[i]
// If next arg is empty string or starts with dot, it's a backup suffix
if (
typeof nextArg === 'string' &&
!nextArg.startsWith('-') &&
(nextArg === '' || nextArg.startsWith('.'))
) {
i++ // Skip the backup suffix
}
}
continue
}
if (arg.startsWith('-i')) {
// -i.bak or similar (inline suffix)
hasInPlaceFlag = true
i++
continue
}
// Handle extended regex flags
if (arg === '-E' || arg === '-r' || arg === '--regexp-extended') {
extendedRegex = true
i++
continue
}
// Handle -e flag with expression
if (arg === '-e' || arg === '--expression') {
if (i + 1 < args.length && typeof args[i + 1] === 'string') {
// Only support single expression
if (expression !== null) return null
expression = args[i + 1]!
i += 2
continue
}
return null
}
if (arg.startsWith('--expression=')) {
if (expression !== null) return null
expression = arg.slice('--expression='.length)
i++
continue
}
// Skip other flags we don't understand
if (arg.startsWith('-')) {
// Unknown flag - not safe to parse
return null
}
// Non-flag argument
if (expression === null) {
// First non-flag arg is the expression
expression = arg
} else if (filePath === null) {
// Second non-flag arg is the file path
filePath = arg
} else {
// More than one file - not supported for simple rendering
return null
}
i++
}
// Must have -i flag, expression, and file path
if (!hasInPlaceFlag || !expression || !filePath) {
return null
}
// Parse the substitution expression: s/pattern/replacement/flags
// Only support / as delimiter for simplicity
const substMatch = expression.match(/^s\//)
if (!substMatch) {
return null
}
const rest = expression.slice(2) // Skip 's/'
// Find pattern and replacement by tracking escaped characters
let pattern = ''
let replacement = ''
let flags = ''
let state: 'pattern' | 'replacement' | 'flags' = 'pattern'
let j = 0
while (j < rest.length) {
const char = rest[j]!
if (char === '\\' && j + 1 < rest.length) {
// Escaped character
if (state === 'pattern') {
pattern += char + rest[j + 1]
} else if (state === 'replacement') {
replacement += char + rest[j + 1]
} else {
flags += char + rest[j + 1]
}
j += 2
continue
}
if (char === '/') {
if (state === 'pattern') {
state = 'replacement'
} else if (state === 'replacement') {
state = 'flags'
} else {
// Extra delimiter in flags - unexpected
return null
}
j++
continue
}
if (state === 'pattern') {
pattern += char
} else if (state === 'replacement') {
replacement += char
} else {
flags += char
}
j++
}
// Must have found all three parts (pattern, replacement delimiter, and optional flags)
if (state !== 'flags') {
return null
}
// Validate flags - only allow safe substitution flags
const validFlags = /^[gpimIM1-9]*$/
if (!validFlags.test(flags)) {
return null
}
return {
filePath,
pattern,
replacement,
flags,
extendedRegex,
}
}
/**
* Apply a sed substitution to file content
* Returns the new content after applying the substitution
*/
export function applySedSubstitution(
content: string,
sedInfo: SedEditInfo,
): string {
// Convert sed pattern to JavaScript regex
let regexFlags = ''
// Handle global flag
if (sedInfo.flags.includes('g')) {
regexFlags += 'g'
}
// Handle case-insensitive flag (i or I in sed)
if (sedInfo.flags.includes('i') || sedInfo.flags.includes('I')) {
regexFlags += 'i'
}
// Handle multiline flag (m or M in sed)
if (sedInfo.flags.includes('m') || sedInfo.flags.includes('M')) {
regexFlags += 'm'
}
// Convert sed pattern to JavaScript regex pattern
let jsPattern = sedInfo.pattern
// Unescape \/ to /
.replace(/\\\//g, '/')
// In BRE mode (no -E flag), metacharacters have opposite escaping:
// BRE: \+ means "one or more", + is literal
// ERE/JS: + means "one or more", \+ is literal
// We need to convert BRE escaping to ERE for JavaScript regex
if (!sedInfo.extendedRegex) {
jsPattern = jsPattern
// Step 1: Protect literal backslashes (\\) first - in both BRE and ERE, \\ is literal backslash
.replace(/\\\\/g, BACKSLASH_PLACEHOLDER)
// Step 2: Replace escaped metacharacters with placeholders (these should become unescaped in JS)
.replace(/\\\+/g, PLUS_PLACEHOLDER)
.replace(/\\\?/g, QUESTION_PLACEHOLDER)
.replace(/\\\|/g, PIPE_PLACEHOLDER)
.replace(/\\\(/g, LPAREN_PLACEHOLDER)
.replace(/\\\)/g, RPAREN_PLACEHOLDER)
// Step 3: Escape unescaped metacharacters (these are literal in BRE)
.replace(/\+/g, '\\+')
.replace(/\?/g, '\\?')
.replace(/\|/g, '\\|')
.replace(/\(/g, '\\(')
.replace(/\)/g, '\\)')
// Step 4: Replace placeholders with their JS equivalents
.replace(BACKSLASH_PLACEHOLDER_RE, '\\\\')
.replace(PLUS_PLACEHOLDER_RE, '+')
.replace(QUESTION_PLACEHOLDER_RE, '?')
.replace(PIPE_PLACEHOLDER_RE, '|')
.replace(LPAREN_PLACEHOLDER_RE, '(')
.replace(RPAREN_PLACEHOLDER_RE, ')')
}
// Unescape sed-specific escapes in replacement
// Convert \n to newline, & to $& (match), etc.
// Use a unique placeholder with random salt to prevent injection attacks
const salt = randomBytes(8).toString('hex')
const ESCAPED_AMP_PLACEHOLDER = `___ESCAPED_AMPERSAND_${salt}___`
const jsReplacement = sedInfo.replacement
// Unescape \/ to /
.replace(/\\\//g, '/')
// First escape \& to a placeholder
.replace(/\\&/g, ESCAPED_AMP_PLACEHOLDER)
// Convert & to $& (full match) - use $$& to get literal $& in output
.replace(/&/g, '$$&')
// Convert placeholder back to literal &
.replace(new RegExp(ESCAPED_AMP_PLACEHOLDER, 'g'), '&')
try {
const regex = new RegExp(jsPattern, regexFlags)
return content.replace(regex, jsReplacement)
} catch {
// If regex is invalid, return original content
return content
}
}