forked from ultraworkers/claw-code
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsecretScanner.ts
More file actions
324 lines (302 loc) · 9.24 KB
/
Copy pathsecretScanner.ts
File metadata and controls
324 lines (302 loc) · 9.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
/**
* Client-side secret scanner for team memory (PSR M22174).
*
* Scans content for credentials before upload so secrets never leave the
* user's machine. Uses a curated subset of high-confidence rules from
* gitleaks (https://github.com/gitleaks/gitleaks, MIT license) — only
* rules with distinctive prefixes that have near-zero false-positive
* rates are included. Generic keyword-context rules are omitted.
*
* Rule IDs and regexes sourced directly from the public gitleaks config:
* https://github.com/gitleaks/gitleaks/blob/master/config/gitleaks.toml
*
* JS regex notes:
* - gitleaks uses Go regex; inline (?i) and mode groups (?-i:...) are
* not portable to JS. Affected rules are rewritten with explicit
* character classes ([a-zA-Z0-9] instead of (?i)[a-z0-9]).
* - Trailing boundary alternations like (?:[\x60'"\s;]|\\[nr]|$) from
* Go regex are kept (JS $ matches end-of-string in default mode).
*/
import { capitalize } from '../../utils/stringUtils.js'
type SecretRule = {
/** Gitleaks rule ID (kebab-case), used in labels and analytics */
id: string
/** Regex source, lazily compiled on first scan */
source: string
/** Optional JS regex flags (most rules are case-sensitive by default) */
flags?: string
}
export type SecretMatch = {
/** Gitleaks rule ID that matched (e.g., "github-pat", "aws-access-token") */
ruleId: string
/** Human-readable label derived from the rule ID */
label: string
}
// ─── Curated rules ──────────────────────────────────────────────
// High-confidence patterns from gitleaks with distinctive prefixes.
// Ordered roughly by likelihood of appearing in dev-team content.
// Anthropic API key prefix, assembled at runtime so the literal byte
// sequence isn't present in the external bundle (excluded-strings check).
// join() is not constant-folded by the minifier.
const ANT_KEY_PFX = ['sk', 'ant', 'api'].join('-')
const SECRET_RULES: SecretRule[] = [
// — Cloud providers —
{
id: 'aws-access-token',
source: '\\b((?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z2-7]{16})\\b',
},
{
id: 'gcp-api-key',
source: '\\b(AIza[\\w-]{35})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'azure-ad-client-secret',
source:
'(?:^|[\\\\\'"\\x60\\s>=:(,)])([a-zA-Z0-9_~.]{3}\\dQ~[a-zA-Z0-9_~.-]{31,34})(?:$|[\\\\\'"\\x60\\s<),])',
},
{
id: 'digitalocean-pat',
source: '\\b(dop_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'digitalocean-access-token',
source: '\\b(doo_v1_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — AI APIs —
{
id: 'anthropic-api-key',
source: `\\b(${ANT_KEY_PFX}03-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60'"\\s;]|\\\\[nr]|$)`,
},
{
id: 'anthropic-admin-api-key',
source:
'\\b(sk-ant-admin01-[a-zA-Z0-9_\\-]{93}AA)(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'openai-api-key',
source:
'\\b(sk-(?:proj|svcacct|admin)-(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})T3BlbkFJ(?:[A-Za-z0-9_-]{74}|[A-Za-z0-9_-]{58})\\b|sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'huggingface-access-token',
// gitleaks: hf_(?i:[a-z]{34}) → JS: hf_[a-zA-Z]{34}
source: '\\b(hf_[a-zA-Z]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — Version control —
{
id: 'github-pat',
source: 'ghp_[0-9a-zA-Z]{36}',
},
{
id: 'github-fine-grained-pat',
source: 'github_pat_\\w{82}',
},
{
id: 'github-app-token',
source: '(?:ghu|ghs)_[0-9a-zA-Z]{36}',
},
{
id: 'github-oauth',
source: 'gho_[0-9a-zA-Z]{36}',
},
{
id: 'github-refresh-token',
source: 'ghr_[0-9a-zA-Z]{36}',
},
{
id: 'gitlab-pat',
source: 'glpat-[\\w-]{20}',
},
{
id: 'gitlab-deploy-token',
source: 'gldt-[0-9a-zA-Z_\\-]{20}',
},
// — Communication —
{
id: 'slack-bot-token',
source: 'xoxb-[0-9]{10,13}-[0-9]{10,13}[a-zA-Z0-9-]*',
},
{
id: 'slack-user-token',
source: 'xox[pe](?:-[0-9]{10,13}){3}-[a-zA-Z0-9-]{28,34}',
},
{
id: 'slack-app-token',
source: 'xapp-\\d-[A-Z0-9]+-\\d+-[a-z0-9]+',
flags: 'i',
},
{
id: 'twilio-api-key',
source: 'SK[0-9a-fA-F]{32}',
},
{
id: 'sendgrid-api-token',
// gitleaks: SG\.(?i)[a-z0-9=_\-\.]{66} → JS: case-insensitive via flag
source: '\\b(SG\\.[a-zA-Z0-9=_\\-.]{66})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — Dev tooling —
{
id: 'npm-access-token',
source: '\\b(npm_[a-zA-Z0-9]{36})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'pypi-upload-token',
source: 'pypi-AgEIcHlwaS5vcmc[\\w-]{50,1000}',
},
{
id: 'databricks-api-token',
source: '\\b(dapi[a-f0-9]{32}(?:-\\d)?)(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'hashicorp-tf-api-token',
// gitleaks: (?i)[a-z0-9]{14}\.(?-i:atlasv1)\.[a-z0-9\-_=]{60,70}
// → JS: case-insensitive hex+alnum prefix, literal "atlasv1", case-insensitive suffix
source: '[a-zA-Z0-9]{14}\\.atlasv1\\.[a-zA-Z0-9\\-_=]{60,70}',
},
{
id: 'pulumi-api-token',
source: '\\b(pul-[a-f0-9]{40})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'postman-api-token',
// gitleaks: PMAK-(?i)[a-f0-9]{24}\-[a-f0-9]{34} → JS: use [a-fA-F0-9]
source:
'\\b(PMAK-[a-fA-F0-9]{24}-[a-fA-F0-9]{34})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
// — Observability —
{
id: 'grafana-api-key',
source:
'\\b(eyJrIjoi[A-Za-z0-9+/]{70,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'grafana-cloud-api-token',
source: '\\b(glc_[A-Za-z0-9+/]{32,400}={0,3})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'grafana-service-account-token',
source:
'\\b(glsa_[A-Za-z0-9]{32}_[A-Fa-f0-9]{8})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'sentry-user-token',
source: '\\b(sntryu_[a-f0-9]{64})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'sentry-org-token',
source:
'\\bsntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43}',
},
// — Payment / commerce —
{
id: 'stripe-access-token',
source:
'\\b((?:sk|rk)_(?:test|live|prod)_[a-zA-Z0-9]{10,99})(?:[\\x60\'"\\s;]|\\\\[nr]|$)',
},
{
id: 'shopify-access-token',
source: 'shpat_[a-fA-F0-9]{32}',
},
{
id: 'shopify-shared-secret',
source: 'shpss_[a-fA-F0-9]{32}',
},
// — Crypto —
{
id: 'private-key',
source:
'-----BEGIN[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----[\\s\\S-]{64,}?-----END[ A-Z0-9_-]{0,100}PRIVATE KEY(?: BLOCK)?-----',
flags: 'i',
},
]
// Lazily compiled pattern cache — compile once on first scan.
let compiledRules: Array<{ id: string; re: RegExp }> | null = null
function getCompiledRules(): Array<{ id: string; re: RegExp }> {
if (compiledRules === null) {
compiledRules = SECRET_RULES.map(r => ({
id: r.id,
re: new RegExp(r.source, r.flags),
}))
}
return compiledRules
}
/**
* Convert a gitleaks rule ID (kebab-case) to a human-readable label.
* e.g., "github-pat" → "GitHub PAT", "aws-access-token" → "AWS Access Token"
*/
function ruleIdToLabel(ruleId: string): string {
// Words where the canonical capitalization differs from title case
const specialCase: Record<string, string> = {
aws: 'AWS',
gcp: 'GCP',
api: 'API',
pat: 'PAT',
ad: 'AD',
tf: 'TF',
oauth: 'OAuth',
npm: 'NPM',
pypi: 'PyPI',
jwt: 'JWT',
github: 'GitHub',
gitlab: 'GitLab',
openai: 'OpenAI',
digitalocean: 'DigitalOcean',
huggingface: 'HuggingFace',
hashicorp: 'HashiCorp',
sendgrid: 'SendGrid',
}
return ruleId
.split('-')
.map(part => specialCase[part] ?? capitalize(part))
.join(' ')
}
/**
* Scan a string for potential secrets.
*
* Returns one match per rule that fired (deduplicated by rule ID). The
* actual matched text is intentionally NOT returned — we never log or
* display secret values.
*/
export function scanForSecrets(content: string): SecretMatch[] {
const matches: SecretMatch[] = []
const seen = new Set<string>()
for (const rule of getCompiledRules()) {
if (seen.has(rule.id)) {
continue
}
if (rule.re.test(content)) {
seen.add(rule.id)
matches.push({
ruleId: rule.id,
label: ruleIdToLabel(rule.id),
})
}
}
return matches
}
/**
* Get a human-readable label for a gitleaks rule ID.
* Falls back to kebab-to-Title conversion for unknown IDs.
*/
export function getSecretLabel(ruleId: string): string {
return ruleIdToLabel(ruleId)
}
/**
* Redact any matched secrets in-place with [REDACTED].
* Unlike scanForSecrets, this returns the content with spans replaced
* so the surrounding text can still be written to disk safely.
*/
let redactRules: RegExp[] | null = null
export function redactSecrets(content: string): string {
redactRules ??= SECRET_RULES.map(
r => new RegExp(r.source, (r.flags ?? '').replace('g', '') + 'g'),
)
for (const re of redactRules) {
// Replace only the captured group, not the full match — patterns include
// boundary chars (space, quote, ;) outside the group that must survive.
content = content.replace(re, (match, g1) =>
typeof g1 === 'string' ? match.replace(g1, '[REDACTED]') : '[REDACTED]',
)
}
return content
}