@@ -364,35 +364,39 @@ local config = {
364364 -- by eliminating silence and speeding up the tempo of the recording
365365 -- we can reduce the cost by 50% or more and get the results faster
366366
367- -- OpenAI audio/transcriptions api endpoint to transcribe audio to text
368- whisper_api_endpoint = " https://api.openai.com/v1/audio/transcriptions" ,
369- -- directory for storing whisper files
370- whisper_dir = (os.getenv (" TMPDIR" ) or os.getenv (" TEMP" ) or " /tmp" ) .. " /gp_whisper" ,
371- -- multiplier of RMS level dB for threshold used by sox to detect silence vs speech
372- -- decibels are negative, the recording is normalized to -3dB =>
373- -- increase this number to pick up more (weaker) sounds as possible speech
374- -- decrease this number to pick up only louder sounds as possible speech
375- -- you can disable silence trimming by setting this a very high number (like 1000.0)
376- whisper_silence = " 1.75" ,
377- -- whisper tempo (1.0 is normal speed)
378- whisper_tempo = " 1.75" ,
379- -- The language of the input audio, in ISO-639-1 format.
380- whisper_language = " en" ,
381- -- command to use for recording can be nil (unset) for automatic selection
382- -- string ("sox", "arecord", "ffmpeg") or table with command and arguments:
383- -- sox is the most universal, but can have start/end cropping issues caused by latency
384- -- arecord is linux only, but has no cropping issues and is faster
385- -- ffmpeg in the default configuration is macos only, but can be used on any platform
386- -- (see https://trac.ffmpeg.org/wiki/Capture/Desktop for more info)
387- -- below is the default configuration for all three commands:
388- -- whisper_rec_cmd = {"sox", "-c", "1", "--buffer", "32", "-d", "rec.wav", "trim", "0", "60:00"},
389- -- whisper_rec_cmd = {"arecord", "-c", "1", "-f", "S16_LE", "-r", "48000", "-d", "3600", "rec.wav"},
390- -- whisper_rec_cmd = {"ffmpeg", "-y", "-f", "avfoundation", "-i", ":0", "-t", "3600", "rec.wav"},
391- whisper_rec_cmd = nil ,
367+ whisper = {
368+ -- you can disable whisper completely by whisper = {disable = true}
369+ disable = false ,
370+ -- OpenAI audio/transcriptions api endpoint to transcribe audio to text
371+ endpoint = " https://api.openai.com/v1/audio/transcriptions" ,
372+ -- directory for storing whisper files
373+ store_dir = (os.getenv (" TMPDIR" ) or os.getenv (" TEMP" ) or " /tmp" ) .. " /gp_whisper" ,
374+ -- multiplier of RMS level dB for threshold used by sox to detect silence vs speech
375+ -- decibels are negative, the recording is normalized to -3dB =>
376+ -- increase this number to pick up more (weaker) sounds as possible speech
377+ -- decrease this number to pick up only louder sounds as possible speech
378+ -- you can disable silence trimming by setting this a very high number (like 1000.0)
379+ silence = " 1.75" ,
380+ -- whisper tempo (1.0 is normal speed)
381+ tempo = " 1.75" ,
382+ -- The language of the input audio, in ISO-639-1 format.
383+ language = " en" ,
384+ -- command to use for recording can be nil (unset) for automatic selection
385+ -- string ("sox", "arecord", "ffmpeg") or table with command and arguments:
386+ -- sox is the most universal, but can have start/end cropping issues caused by latency
387+ -- arecord is linux only, but has no cropping issues and is faster
388+ -- ffmpeg in the default configuration is macos only, but can be used on any platform
389+ -- (see https://trac.ffmpeg.org/wiki/Capture/Desktop for more info)
390+ -- below is the default configuration for all three commands:
391+ -- whisper_rec_cmd = {"sox", "-c", "1", "--buffer", "32", "-d", "rec.wav", "trim", "0", "60:00"},
392+ -- whisper_rec_cmd = {"arecord", "-c", "1", "-f", "S16_LE", "-r", "48000", "-d", "3600", "rec.wav"},
393+ -- whisper_rec_cmd = {"ffmpeg", "-y", "-f", "avfoundation", "-i", ":0", "-t", "3600", "rec.wav"},
394+ rec_cmd = nil ,
395+ },
392396
393397 -- image generation settings
394398 image = {
395- -- you can disable image generation logic completely by image. disable = true
399+ -- you can disable image generation logic completely by image = { disable = true}
396400 disable = false ,
397401
398402 -- required openai api key (string or table with command and arguments)
0 commit comments