Skip to content

Commit 3eb2152

Browse files
committed
refactor: transcription logic to whisper module
1 parent e93447e commit 3eb2152

File tree

5 files changed

+399
-321
lines changed

5 files changed

+399
-321
lines changed

lua/gp/config.lua

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -364,35 +364,39 @@ local config = {
364364
-- by eliminating silence and speeding up the tempo of the recording
365365
-- we can reduce the cost by 50% or more and get the results faster
366366

367-
-- OpenAI audio/transcriptions api endpoint to transcribe audio to text
368-
whisper_api_endpoint = "https://api.openai.com/v1/audio/transcriptions",
369-
-- directory for storing whisper files
370-
whisper_dir = (os.getenv("TMPDIR") or os.getenv("TEMP") or "/tmp") .. "/gp_whisper",
371-
-- multiplier of RMS level dB for threshold used by sox to detect silence vs speech
372-
-- decibels are negative, the recording is normalized to -3dB =>
373-
-- increase this number to pick up more (weaker) sounds as possible speech
374-
-- decrease this number to pick up only louder sounds as possible speech
375-
-- you can disable silence trimming by setting this a very high number (like 1000.0)
376-
whisper_silence = "1.75",
377-
-- whisper tempo (1.0 is normal speed)
378-
whisper_tempo = "1.75",
379-
-- The language of the input audio, in ISO-639-1 format.
380-
whisper_language = "en",
381-
-- command to use for recording can be nil (unset) for automatic selection
382-
-- string ("sox", "arecord", "ffmpeg") or table with command and arguments:
383-
-- sox is the most universal, but can have start/end cropping issues caused by latency
384-
-- arecord is linux only, but has no cropping issues and is faster
385-
-- ffmpeg in the default configuration is macos only, but can be used on any platform
386-
-- (see https://trac.ffmpeg.org/wiki/Capture/Desktop for more info)
387-
-- below is the default configuration for all three commands:
388-
-- whisper_rec_cmd = {"sox", "-c", "1", "--buffer", "32", "-d", "rec.wav", "trim", "0", "60:00"},
389-
-- whisper_rec_cmd = {"arecord", "-c", "1", "-f", "S16_LE", "-r", "48000", "-d", "3600", "rec.wav"},
390-
-- whisper_rec_cmd = {"ffmpeg", "-y", "-f", "avfoundation", "-i", ":0", "-t", "3600", "rec.wav"},
391-
whisper_rec_cmd = nil,
367+
whisper = {
368+
-- you can disable whisper completely by whisper = {disable = true}
369+
disable = false,
370+
-- OpenAI audio/transcriptions api endpoint to transcribe audio to text
371+
endpoint = "https://api.openai.com/v1/audio/transcriptions",
372+
-- directory for storing whisper files
373+
store_dir = (os.getenv("TMPDIR") or os.getenv("TEMP") or "/tmp") .. "/gp_whisper",
374+
-- multiplier of RMS level dB for threshold used by sox to detect silence vs speech
375+
-- decibels are negative, the recording is normalized to -3dB =>
376+
-- increase this number to pick up more (weaker) sounds as possible speech
377+
-- decrease this number to pick up only louder sounds as possible speech
378+
-- you can disable silence trimming by setting this a very high number (like 1000.0)
379+
silence = "1.75",
380+
-- whisper tempo (1.0 is normal speed)
381+
tempo = "1.75",
382+
-- The language of the input audio, in ISO-639-1 format.
383+
language = "en",
384+
-- command to use for recording can be nil (unset) for automatic selection
385+
-- string ("sox", "arecord", "ffmpeg") or table with command and arguments:
386+
-- sox is the most universal, but can have start/end cropping issues caused by latency
387+
-- arecord is linux only, but has no cropping issues and is faster
388+
-- ffmpeg in the default configuration is macos only, but can be used on any platform
389+
-- (see https://trac.ffmpeg.org/wiki/Capture/Desktop for more info)
390+
-- below is the default configuration for all three commands:
391+
-- whisper_rec_cmd = {"sox", "-c", "1", "--buffer", "32", "-d", "rec.wav", "trim", "0", "60:00"},
392+
-- whisper_rec_cmd = {"arecord", "-c", "1", "-f", "S16_LE", "-r", "48000", "-d", "3600", "rec.wav"},
393+
-- whisper_rec_cmd = {"ffmpeg", "-y", "-f", "avfoundation", "-i", ":0", "-t", "3600", "rec.wav"},
394+
rec_cmd = nil,
395+
},
392396

393397
-- image generation settings
394398
image = {
395-
-- you can disable image generation logic completely by image.disable = true
399+
-- you can disable image generation logic completely by image = {disable = true}
396400
disable = false,
397401

398402
-- required openai api key (string or table with command and arguments)

lua/gp/deprecator.lua

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ M._deprecated = {}
1111

1212
local switch_to_agent = "Please use `agents` table and switch agents in runtime via `:GpAgent XY`"
1313

14-
local image_nested = function(variable)
15-
local new_variable = variable:gsub("image_", "")
14+
local nested = function(variable, prefix)
15+
local new_variable = variable:gsub(prefix .. "_", "")
1616
return render.template(
17-
"`{{old}}`\nPlease use `image = { {{new}} = ... }`",
18-
{ ["{{old}}"] = variable, ["{{new}}"] = new_variable }
17+
"`{{old}}`\nPlease use `{{prefix}} = { {{new}} = ... }`",
18+
{ ["{{old}}"] = variable, ["{{new}}"] = new_variable, ["{{prefix}}"] = prefix }
1919
)
2020
end
2121

@@ -44,6 +44,8 @@ local deprecated = {
4444
.. "\nThe `openai_api_key` is still supported for backwards compatibility,\n"
4545
.. "and automatically converted to `providers.openai.secret` if the new config is not set.",
4646
image_dir = "`image_dir`\nPlease use `image = { store_dir = ... }`",
47+
whisper_dir = "`whisper_dir`\nPlease use `whisper = { store_dir = ... }`",
48+
whisper_api_endpoint = "`whisper_api_endpoint`\nPlease use `whisper = { endpoint = ... }`",
4749
}
4850

4951
M.is_valid = function(k, v)
@@ -52,7 +54,11 @@ M.is_valid = function(k, v)
5254
return false
5355
end
5456
if helpers.starts_with(k, "image_") then
55-
table.insert(M._deprecated, { name = k, msg = image_nested(k), value = v })
57+
table.insert(M._deprecated, { name = k, msg = nested(k, "image"), value = v })
58+
return false
59+
end
60+
if helpers.starts_with(k, "whisper_") then
61+
table.insert(M._deprecated, { name = k, msg = nested(k, "whisper"), value = v })
5662
return false
5763
end
5864
return true

lua/gp/imager.lua

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ local I = {
1111
config = {},
1212
_state = {},
1313
cmd = {},
14+
disabled = false,
1415
}
1516

1617
---@param opts table # user config
@@ -20,6 +21,7 @@ I.setup = function(opts)
2021
I.config = vim.deepcopy(default_config.image)
2122

2223
if opts.disable then
24+
I.disabled = true
2325
logger.debug("imager is disabled")
2426
return
2527
end
@@ -69,6 +71,7 @@ I.setup = function(opts)
6971
I.refresh()
7072

7173
for cmd, _ in pairs(I.cmd) do
74+
-- TODO: this could be a helper function
7275
vim.api.nvim_create_user_command(I.config.cmd_prefix .. cmd, function(params)
7376
I.cmd[cmd](params)
7477
end, {

0 commit comments

Comments
 (0)