fedora-csb-system-manager
1;;; whisper.el --- Record audio and transcribe using whisper-cli -*- lexical-binding: t; -*-
2
3;; Package-Requires: ((emacs "25.1"))
4
5;;; Commentary:
6;; This library provides functions to record audio using ffmpeg and
7;; transcribe it using the whisper-cli command-line tool.
8
9;;; Code:
10
11(defgroup whisper nil
12 "Settings for the whisper audio transcription library."
13 :group 'tools)
14
15(defcustom whisper-cli-executable "whisper-cli"
16 "Path to the whisper-cli executable."
17 :type 'string
18 :group 'whisper)
19
20(defcustom whisper-model "base"
21 "The whisper model to use for transcription (e.g., tiny, base, small, medium, large)."
22 :type 'string
23 :group 'whisper)
24
25(defcustom whisper-language "en"
26 "The language for transcription (e.g., en, es, fr, de)."
27 :type 'string
28 :group 'whisper)
29
30(defcustom whisper-ffmpeg-timeout 300
31 "Default timeout in seconds for ffmpeg recording."
32 :type 'integer
33 :group 'whisper)
34
35(defcustom whisper-ffmpeg-audio-input-source "pulse"
36 "FFmpeg audio input source. For PulseAudio, usually 'pulse'.
37For ALSA, it might be 'hw:0'. For macOS, it might be ':0' (for default input).
38You might need to adjust this based on your system's ffmpeg configuration."
39 :type 'string
40 :group 'whisper)
41
42(defcustom whisper-ffmpeg-audio-input-device "default"
43 "FFmpeg audio input device. For PulseAudio, often 'default'.
44For ALSA, it might be something like 'plughw:1,0'.
45For macOS, check available devices with `ffmpeg -f avfoundation -list_devices true -i \"\"`."
46 :type 'string
47 :group 'whisper)
48
49
50(defvar whisper--recording-process nil
51 "Holds the ffmpeg recording process.")
52(defvar whisper--original-mode-line-format mode-line-format
53 "To store the original mode-line format.")
54
55(defun whisper--start-mode-line-indicator (indicator)
56 "Display an INDICATOR in the mode line."
57 (setq whisper--original-mode-line-format mode-line-format)
58 (setq-default mode-line-format
59 (cons (format " %s " indicator)
60 (if (listp mode-line-format) mode-line-format (list mode-line-format)))))
61
62(defun whisper--stop-mode-line-indicator ()
63 "Restore the original mode line."
64 (setq-default mode-line-format whisper--original-mode-line-format))
65
66(defun whisper--record-audio (output-file-basename timeout callback)
67 "Record audio using ffmpeg.
68A temporary WAV file will be created based on OUTPUT-FILE-BASENAME.
69Recording runs for TIMEOUT seconds, or until the process is interrupted.
70Then, execute CALLBACK function with the path to the recorded audio file.
71The CALLBACK is responsible for processing and eventually deleting the audio file."
72 (whisper--start-mode-line-indicator "")
73 (message "Recording audio for up to %d seconds (or run 'whisper-run' again to stop early)..." timeout)
74 (let* ((temp-wav-file (make-temp-file "whisper-audio-" nil ".wav"))
75 (process-environment (copy-sequence process-environment))
76 (ffmpeg-command
77 (list "ffmpeg"
78 "-y" ; Overwrite output files without asking
79 "-f" whisper-ffmpeg-audio-input-source
80 "-i" whisper-ffmpeg-audio-input-device
81 "-t" (number-to-string timeout)
82 temp-wav-file)))
83 (setenv "LC_ALL" "C" process-environment) ; Ensure consistent ffmpeg output
84 (setq whisper--recording-process
85 (apply #'start-process "whisper-ffmpeg" "*whisper-ffmpeg-output*" ffmpeg-command))
86
87 (set-process-sentinel
88 whisper--recording-process
89 (lambda (proc _event)
90 (let ((status (process-status proc))
91 (audio-file-processed nil)) ; Flag to track if callback was called
92 (unwind-protect
93 (cond
94 ((memq status '(exit signal)) ; Process has definitely terminated
95 (if (and (file-exists-p temp-wav-file)
96 ;; Check if file has content (size > 0)
97 (> (nth 7 (file-attributes temp-wav-file)) 0))
98 (progn
99 (message "Recording finished/stopped. Audio file: %s" temp-wav-file)
100 (setq audio-file-processed t)
101 (funcall callback temp-wav-file)) ; Pass to callback for transcription
102 (progn
103 (message "Recording failed or produced no usable audio data."))))
104 (t ; Other statuses - should not happen often for a finished process
105 (message "Recording process ended in unexpected state: %s" status)))
106 ;; Cleanup actions
107 (setq whisper--recording-process nil) ; Clear the process variable
108 (whisper--stop-mode-line-indicator) ; Always restore mode line
109 ;; Delete the temp wav file only if it was not passed to the callback
110 (when (and (not audio-file-processed) (file-exists-p temp-wav-file))
111 (message "Deleting unused/empty temp audio file: %s" temp-wav-file)
112 (delete-file temp-wav-file))
113 ))))
114 whisper--recording-process))
115
116(defun whisper--transcribe (audio-file callback)
117 "Transcribe AUDIO-FILE using whisper-cli and call CALLBACK with transcription."
118 (whisper--start-mode-line-indicator "")
119 (message "Transcribing audio...")
120 (let* ((temp-output-file (make-temp-file "whisper-transcription-" nil ".txt"))
121 (command (list whisper-cli-executable
122 audio-file
123 "--model" whisper-model
124 "--language" whisper-language
125 "--output_txt" ; Ensure whisper-cli outputs a .txt file
126 "--output_dir" (file-name-directory temp-output-file))))
127 (message "Running command: %s" (string-join command " "))
128 (let ((process (apply #'start-process "whisper-cli" "*whisper-cli-output*" command)))
129 (set-process-sentinel
130 process
131 (lambda (_proc _event)
132 (whisper--stop-mode-line-indicator)
133 (unwind-protect
134 (if (and (eq (process-status process) 'exit)
135 (= (process-exit-status process) 0))
136 (let* ((expected-txt-name (concat (file-name-sans-extension audio-file) ".txt"))
137 (transcription-file (expand-file-name expected-txt-name (file-name-directory temp-output-file))))
138 (if (file-exists-p transcription-file)
139 (progn
140 (message "Transcription successful.")
141 (with-temp-buffer
142 (insert-file-contents transcription-file)
143 (funcall callback (buffer-string)))
144 (delete-file transcription-file)) ; Delete the .txt file created by whisper-cli
145 (message "Transcription output file not found: %s" transcription-file)))
146 (message "whisper-cli transcription failed. Check *whisper-cli-output* buffer."))
147 (when (file-exists-p audio-file)
148 (delete-file audio-file)) ; Clean up the audio file
149 (when (file-exists-p temp-output-file)
150 (delete-file temp-output-file)) ; Clean up the placeholder temp file
151 ))))))
152
153;;;###autoload
154;;;###autoload
155(defun whisper-run ()
156 "Record audio, transcribe it, and insert the text into the current buffer.
157If a recording is already in progress (started by this command),
158running `whisper-run` again will stop the current recording, and
159transcription will proceed on the audio captured so far.
160Uses `whisper-ffmpeg-timeout` for recording duration if starting anew."
161 (interactive)
162 (if (and whisper--recording-process (process-live-p whisper--recording-process))
163 (progn
164 (message "Stopping current recording...")
165 (interrupt-process whisper--recording-process)
166 ;; The sentinel of the existing whisper--recording-process will handle
167 ;; the audio file and initiate transcription.
168 )
169 ;; Else, no recording in progress, so start a new one.
170 (whisper--record-audio
171 "whisper-rec-" ; Base name for make-temp-file
172 whisper-ffmpeg-timeout
173 (lambda (audio-file) ; This is the callback from whisper--record-audio
174 ;; audio-file here is the temp-wav-file from whisper--record-audio
175 (if (and audio-file (file-exists-p audio-file))
176 (whisper--transcribe
177 audio-file ; whisper--transcribe is now responsible for this audio-file
178 (lambda (transcription)
179 (if (string-empty-p transcription)
180 (message "Transcription is empty.")
181 (insert transcription))
182 (message "Transcription inserted.")))
183 (message "No valid audio file was recorded to transcribe."))))))
184
185;;;###autoload
186(defun whisper-file (file)
187 "Record audio, transcribe it, and append the text to the specified FILE.
188Uses `whisper-ffmpeg-timeout` for recording duration."
189 (interactive "FAppend transcription to file: ")
190 (unless (file-writable-p (file-name-directory file))
191 (error "Directory for file %s is not writable" file))
192 (whisper--record-audio
193 "whisper-temp-output.wav" ; Not directly used
194 whisper-ffmpeg-timeout
195 (lambda (audio-file)
196 (whisper--transcribe
197 audio-file
198 (lambda (transcription)
199 (if (string-empty-p transcription)
200 (message "Transcription is empty. Nothing appended to %s." file)
201 (with-temp-buffer
202 (insert transcription)
203 (append-to-file nil nil file))
204 (message "Transcription appended to %s." file)))))))
205
206(provide 'whisper)
207
208;;; whisper.el ends here