flake-update-20260505
  1;;; whisper.el --- Speech-to-Text interface using OpenAI's whisper model -*- lexical-binding: t; -*-
  2
  3;; Copyright (C) 2022 Imran Khan.
  4
  5;; Author: Imran Khan <imran@khan.ovh>
  6;; URL: https://github.com/natrys/whisper.el
  7;; Version: 0.3.3
  8;; Package-Requires: ((emacs "27.1"))
  9
 10;; This file is NOT part of GNU Emacs.
 11
 12;; This program is free software; you can redistribute it and/or modify
 13;; it under the terms of the GNU General Public License as published by
 14;; the Free Software Foundation, either version 3 of the License, or (at
 15;; your option) any later version.
 16;;
 17;; This program is distributed in the hope that it will be useful,
 18;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 19;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 20;; GNU General Public License for more details.
 21;;
 22;; You should have received a copy of the GNU General Public License
 23;; along with this program.  If not, see <https://www.gnu.org/licenses/>.
 24
 25;;; Commentary:
 26;;
 27;; Speech-to-Text interface for Emacs using OpenAI's whisper model
 28;; Uses the awesome C/C++ port that runs on CPU.
 29;; See: https://github.com/ggerganov/whisper.cpp
 30;;
 31;;; Code:
 32
 33(require 'cl-lib)
 34
 35;;; User facing options
 36
 37(defgroup whisper ()
 38  "Speech-to-text interface using OpenAI's whisper model."
 39  :group 'external)
 40
 41(defcustom whisper-enable-speed-up nil
 42  "Whether to sacrifices some accuracy to speed up transcribing.
 43
 44Basically whether to use \"-su\" flag in whisper.cpp.  You should experiment
 45enabling it to see if it works well enough for you.
 46
 47It's currently disabled by upstream because of bugs, so does nothing."
 48  :type 'boolean
 49  :group 'whisper)
 50
 51(defcustom whisper-use-threads nil
 52  "How many threads to use for transcribing.
 53
 54Default is whisper.cpp default (which is number of cores but maxed at 4)."
 55  :type 'integer
 56  :group 'whisper)
 57
 58(defcustom whisper-install-directory (locate-user-emacs-file ".cache/")
 59  "Location of where whisper.cpp is installed."
 60  :type 'directory
 61  :group 'whisper)
 62
 63(defcustom whisper-recording-timeout 300
 64  "Number of seconds after which recording will be automatically stopped."
 65  :type '(choice integer (const nil))
 66  :group 'whisper)
 67
 68(defcustom whisper-model "base"
 69  "Which whisper model to use (default is base).
 70
 71Choose between: tiny, base, small, medium, large.
 72
 73The first four comes with .en variant that only works with English, but
 74might speed up transcribing."
 75  :type 'string
 76  :group 'whisper)
 77
 78(defcustom whisper-language "en"
 79  "Set spoken language for audio.
 80
 81When dealing with unknown language, set this to `auto'.
 82
 83Be sure to use generic model (without .en suffix) when language is not English."
 84  :type 'string
 85  :group 'whisper)
 86
 87(defcustom whisper-translate nil
 88  "Whether to translate to English first, before transcribing."
 89  :type 'boolean
 90  :group 'whisper)
 91
 92(defcustom whisper-quantize nil
 93  "Whether to use quantized version of the model in whisper.cpp.
 94
 95Quantization is a technique to reduce the computational and memory costs of
 96running inference by representing the weights and activations with low-precision
 97data types.  This sacrifices precision for resource efficiency.  The idea is
 98that quantized version of bigger model may afford you to use it (if you are RAM
 99constrained e.g.) with some penalty, while still being better than the smaller
100model you would be using otherwise.
101
102Valid values are (from lowest to highest quality):
103- q4_0
104- q4_1
105- q4_k
106- q5_0
107- q5_1
108- q5_k
109- q6_k
110- q8_0"
111  :type '(choice string (const nil))
112  :group 'whisper)
113
114(defcustom whisper-install-whispercpp t
115  "Specify whether to install whisper.cpp automatically.
116
117By default whisper.el compiles whisper.cpp automatically.   But if you are on a
118platform where our automatic whisper.cpp install doesn't work but you are able
119to do so manually, you can set this to `manual' to skip our try (and failure)
120to install it automatically.  Note that in case a functional install is found
121at `whisper-install-directory', we can still do model download, quantization
122automatically.
123
124But if you are planning to use something other than whisper.cpp entirely, as
125such don't want to install it nor run checks for it, you may opt out of
126whisper.cpp as a whole by setting this to nil.  In that case it's your
127responsibility to override `whisper-command' with appropriate function."
128  :type '(choice boolean (const manual))
129  :group 'whisper)
130
131(defcustom whisper-insert-text-at-point t
132  "Whether to put whisper output under point in current buffer.
133
134When nil, instead of inserting text under current point, a temporary buffer
135containing whisper output text is displayed.  The buffer name is distinguised
136with current timestamp and it's the user's responsibility to kill the buffer if
137they want to."
138  :type 'boolean
139  :group 'whisper)
140
141(defcustom whisper-return-cursor-to-start t
142  "Whether to re-position the cursor after transcription.
143
144When non-nil, the cursor is returned to the original invocation point.
145Otherwise, the cursor remains at the end of the inserted transcription."
146  :type 'boolean
147  :group 'whisper)
148
149(defcustom whisper-show-progress-in-mode-line t
150  "Whether to show transcription progress in mode line."
151  :type 'boolean
152  :group 'whisper)
153
154(define-obsolete-variable-alias 'whisper-pre-process-hook 'whisper-before-transcription-hook "0.3.0")
155(defcustom whisper-before-transcription-hook '(whisper--check-buffer-read-only-p)
156  "Hook run before whisper.el does anything."
157  :type 'hook
158  :group 'whisper)
159
160(define-obsolete-variable-alias 'whisper-post-process-hook 'whisper-after-transcription-hook "0.3.0")
161(defcustom whisper-after-transcription-hook nil
162  "Hook run after whisper command finishes producing output.
163
164If you want to transform the command output text in some way before they are
165inserted into the original buffer, add your function here.  Each function in
166the hook will be run in a buffer containing the whisper command output text
167as its current buffer, and with point set to beginning of that buffer."
168  :type 'hook
169  :group 'whisper)
170
171(defcustom whisper-after-insert-hook nil
172  "Hook run after whisper command has inserted the transcription.
173
174This hook will be run from the buffer in which the transcription was inserted."
175  :type 'hook
176  :group 'whisper)
177
178;;; Internal variables
179
180(defvar whisper--stdout-buffer-name "*whisper-stdout*")
181(defvar whisper--stderr-buffer-name "*whisper-stderr*")
182(defvar whisper--compilation-buffer-name "*whisper-compilation*")
183
184(defvar whisper--point-buffer nil)
185(defvar whisper--compilation-buffer nil)
186
187(defvar whisper--recording-process nil)
188(defvar whisper--transcribing-process nil)
189(defvar whisper--marker (make-marker))
190
191(defvar whisper--install-path "/tmp")
192
193(defvar whisper--temp-file
194  (concat (temporary-file-directory) "emacs-whisper.wav")
195  "Location of the temporary audio file.")
196
197(defvar whisper--ffmpeg-input-format
198  (pcase system-type
199    ('gnu/linux (if (or (executable-find "pulseaudio")
200                        (executable-find "pipewire-pulse"))
201                    "pulse"
202                  "alsa"))
203    ('darwin "avfoundation")
204    ('windows-nt "dshow")
205    (_ nil)))
206
207(defvar whisper--ffmpeg-input-device
208  (pcase whisper--ffmpeg-input-format
209    ("pulse" "default")
210    (_ nil)))
211
212(defvar whisper--ffmpeg-input-file nil)
213
214(defvar whisper--using-whispercpp nil)
215
216(defvar whisper--progress-level "0")
217
218(defvar whisper--mode-line-recording-indicator
219  (propertize "ï„°" 'face font-lock-warning-face))
220
221(defvar whisper--mode-line-transcribing-indicator
222  (propertize "" 'face font-lock-warning-face))
223
224(defun whisper--check-buffer-read-only-p ()
225  "Error out if current buffer is read-only."
226  (when (and whisper-insert-text-at-point buffer-read-only)
227    (error "Buffer is read-only, can't insert text here")))
228
229;; Maybe sox would be a lighter choice for something this simple?
230(defun whisper--record-command (output-file)
231  "Produces FFmpeg command to be run given location of OUTPUT-FILE."
232  (unless (executable-find "ffmpeg")
233    (error "Needs FFmpeg to record audio"))
234
235  (unless (or whisper--ffmpeg-input-file
236              whisper--ffmpeg-input-format)
237    (error "Set a suitable value for whisper--ffmpeg-input-format"))
238
239  (unless (or whisper--ffmpeg-input-file
240              whisper--ffmpeg-input-device)
241    (error "Set a suitable value for whisper--ffmpeg-input-device"))
242
243  `("ffmpeg"
244    ,@(unless whisper--ffmpeg-input-file
245        (list "-f" whisper--ffmpeg-input-format))
246    "-i" ,(or whisper--ffmpeg-input-file whisper--ffmpeg-input-device)
247    ,@(when (and (not whisper--ffmpeg-input-file) whisper-recording-timeout)
248        (list "-t" (number-to-string whisper-recording-timeout)))
249    "-ar" "16000"
250    "-y" ,output-file))
251
252(defun whisper-command (input-file)
253  "Produces whisper.cpp command to be run on the INPUT-FILE.
254
255If you want to use something other than whisper.cpp, you should override this
256function to produce the command for the inference engine of your choice."
257  `(,(whisper--find-whispercpp-main)
258    ,@(when whisper-use-threads (list "--threads" (number-to-string whisper-use-threads)))
259    ;; ,@(when whisper-enable-speed-up '("--speed-up"))
260    ,@(when whisper-translate '("--translate"))
261    ,@(when whisper-show-progress-in-mode-line '("--print-progress"))
262    "--language" ,whisper-language
263    "--model" ,(whisper--model-file whisper-quantize)
264    "--no-timestamps"
265    "--file" ,input-file))
266
267(defalias 'whisper--transcribe-command 'whisper-command)
268(make-obsolete 'whisper--transcribe-command 'whisper-command "0.1.6")
269
270(defun whisper--mode-line-indicator (phase)
271  "Determine what to show in mode line depending on PHASE."
272  (if (eq phase 'recording)
273      whisper--mode-line-recording-indicator
274    (if whisper--using-whispercpp
275        '(:eval (concat whisper--mode-line-transcribing-indicator whisper--progress-level "%%"))
276      whisper--mode-line-transcribing-indicator)))
277
278(defun whisper--setup-mode-line (command phase)
279  "Set up PHASE appropriate indicator in the mode line.
280
281Depending on the COMMAND we either show the indicator or hide it."
282  (when whisper-show-progress-in-mode-line
283    (let ((indicator `(t ,(whisper--mode-line-indicator phase))))
284      (if (eq command :show)
285          (cl-pushnew indicator global-mode-string :test #'equal)
286        (setf global-mode-string (remove indicator global-mode-string))
287        (setq whisper--progress-level "0")))
288    (force-mode-line-update t)))
289
290(defun whisper--get-whispercpp-progress (_process output)
291  "Notify user of transcription progress by parsing whisper.cpp OUTPUT."
292  (let ((marker "whisper_print_progress_callback: progress ="))
293    (when (string-match (rx-to-string `(seq bol ,marker (* blank) (group (+ digit)) "%")) output)
294      (setq whisper--progress-level (match-string 1 output))
295      (force-mode-line-update))))
296
297(defun whisper--using-whispercpp-p ()
298  "Crude way to check we are in fact using whisper.cpp."
299  (let ((command (car (whisper-command whisper--temp-file)))
300        (pattern '(seq (or bol (any "/\\"))
301                       (or "main" "whisper-cli")
302                       (? ".exe")
303                       eol)))
304    (or (string-match-p (rx-to-string pattern) command)
305        ;; for the staunch Nix user
306        (string-equal command "whisper-cpp"))))
307
308(defun whisper--find-whispercpp-main ()
309  "Find whisper.cpp main binary in a backward compatible way."
310  (executable-find "whisper-cli"))
311
312(defun whisper--record-audio ()
313  "Start audio recording process in the background."
314  (when whisper-insert-text-at-point
315    (with-current-buffer whisper--point-buffer
316      (setq whisper--marker (point-marker))))
317  (if whisper--ffmpeg-input-file
318      (message "[*] Pre-processing media file")
319    (message "[*] Recording audio")
320    (whisper--setup-mode-line :show 'recording))
321  (if (string-equal whisper--ffmpeg-input-file whisper--temp-file)
322      (whisper--transcribe-audio)
323    (setq whisper--recording-process
324          (make-process
325           :name "whisper-recording"
326           :command (whisper--record-command whisper--temp-file)
327           :connection-type nil
328           :buffer nil
329           :sentinel (lambda (_process event)
330                       (whisper--setup-mode-line :hide 'recording)
331                       (cond ((or (string-equal "finished\n" event)
332                                  ;; this is would be sane
333                                  (string-equal "terminated\n" event)
334                                  ;; but this is reality
335                                  (string-equal "exited abnormally with code 255\n" event))
336                              (whisper--transcribe-audio))
337                             ((string-match-p "exited abnormally with code [0-9]+\n" event)
338                              (if whisper--ffmpeg-input-file
339                                  (error "FFmpeg failed to convert given file")
340                                (error "FFmpeg failed to record audio")))))))))
341
342(defun whisper--transcribe-audio ()
343  "Start audio transcribing process in the background."
344  (message "[-] Transcribing/Translating audio")
345  (setq whisper--using-whispercpp (whisper--using-whispercpp-p))
346  (whisper--setup-mode-line :show 'transcribing)
347  (setq whisper--transcribing-process
348        (make-process
349         :name "whisper-transcribing"
350         :command (whisper-command whisper--temp-file)
351         :connection-type nil
352         :buffer (get-buffer-create whisper--stdout-buffer-name)
353         :stderr (if (and whisper-show-progress-in-mode-line whisper--using-whispercpp)
354                     (make-pipe-process
355                      :name "whisper-stderr"
356                      :filter #'whisper--get-whispercpp-progress)
357                   (get-buffer-create whisper--stderr-buffer-name))
358         :coding 'utf-8
359         :sentinel (lambda (_process event)
360                     (unwind-protect
361                         (when-let* ((whisper--stdout-buffer (get-buffer whisper--stdout-buffer-name))
362                                     (finished (and (buffer-live-p whisper--stdout-buffer)
363                                                    (string-equal "finished\n" event))))
364                           (with-current-buffer whisper--stdout-buffer
365                             (goto-char (point-min))
366                             (skip-chars-forward " \n")
367                             (when (> (point) (point-min))
368                               (delete-region (point-min) (point)))
369                             (goto-char (point-max))
370                             (skip-chars-backward " \n")
371                             (when (> (point-max) (point))
372                               (delete-region (point) (point-max)))
373                             (when (= (buffer-size) 0)
374                               (error "Whisper command produced no output"))
375                             (goto-char (point-min))
376                             (run-hook-wrapped 'whisper-after-transcription-hook
377                                               (lambda (f)
378                                                 (with-current-buffer whisper--stdout-buffer
379                                                   (save-excursion
380                                                     (funcall f)))
381                                                 nil))
382                             (when (> (buffer-size) 0)
383                               (if whisper-insert-text-at-point
384                                   (with-current-buffer (marker-buffer whisper--marker)
385                                     (goto-char whisper--marker)
386                                     (insert-buffer-substring whisper--stdout-buffer)
387                                     (when whisper-return-cursor-to-start
388                                       (goto-char whisper--marker)))
389                                 (with-current-buffer
390                                     (get-buffer-create
391                                      (format "*whisper-%s*" (format-time-string "%+4Y%m%d%H%M%S")))
392                                   (insert-buffer-substring whisper--stdout-buffer)
393                                   (display-buffer (current-buffer)))))))
394                       (set-marker whisper--marker nil)
395                       (setq whisper--point-buffer nil)
396                       (kill-buffer whisper--stdout-buffer-name)
397                       (unless whisper-show-progress-in-mode-line (kill-buffer whisper--stderr-buffer-name))
398                       (whisper--setup-mode-line :hide 'transcribing)
399                       (message nil)
400                       (run-hooks 'whisper-after-insert-hook))))))
401
402(defun whisper--check-model-consistency ()
403  "Check if chosen language and model are consistent."
404  (when (and (not (string-equal "en" whisper-language))
405             (string-suffix-p ".en" whisper-model))
406    (error "Use generic model (non .en version) for non-English languages"))
407
408  (unless (or (= 2 (length whisper-language))
409              (string-equal "auto" whisper-language))
410    (error (concat "Unknown language shortcode. If unsure use 'auto'. For full list, see: "
411                   "https://github.com/ggerganov/whisper.cpp/blob/master/whisper.cpp")))
412
413  (let ((model-pattern (rx (seq bol
414                                (or "tiny" "base" "small" "medium"
415                                    (seq "large" (opt (seq "-v" (any "1-3") (opt "-turbo")))))
416                                (opt (seq "." (= 2 (any "a-z"))))
417                                eol)))
418        (quantization-pattern (rx (or "q4_0" "q4_1" "q4_k" "q5_0" "q5_1" "q5_k" "q6_k" "q8_0"))))
419    (unless (string-match-p model-pattern whisper-model)
420      (error (concat "Speech recognition model " whisper-model " not recognised. For the list, see: "
421                     "https://github.com/ggerganov/whisper.cpp/tree/master/models")))
422    (when whisper-quantize
423      (unless (string-match-p quantization-pattern whisper-quantize)
424        (error "Quantization format not recognized")))))
425
426(defun whisper--model-file (quantized)
427  "Return path of QUANTIZED model file relative to `whisper-install-directory'."
428  (let ((base (concat
429               (expand-file-name (file-name-as-directory whisper-install-directory))
430               "whisper.cpp/"))
431        (name (if quantized (concat whisper-model "-" whisper-quantize) whisper-model)))
432    (concat base "models/ggml-" name ".bin")))
433
434(defun whisper--check-install-and-run (buffer status)
435  "Run whisper after ensuring installation correctness.
436
437This is a horrible function, and in time due a rewrite. But for now I find this
438amusing and a little bit instructive as to how it became a mess.
439
440To conduct and display installation progress, `compilation-mode' is used because
441it's asynchronous and most importantly capable of handling progress output
442of programs like wget.  However the asynchronicity comes with some complexity
443cost when more than one tasks are run (but only one after another), as there is
444no built-in async/await support, so need to use callbacks instead.
445
446That is done by adding the callback to `compilation-finish-functions'.  Arguably
447it would be simpler to use one function per task and then chain these callbacks.
448However personally I preferred to logically group these together and handle
449synchronisation and cleaning up in one place, hence this big function.
450
451Conventionally, these callbacks are going to be called by passing current
452compilation-buffer in BUFFER and what event triggered the callback in STATUS so
453that's the function signature here.  Checking if BUFFER is indeed originating
454from this particular compilation buffer and not something the user have running
455elsewhere is necessary.  It's possible to make the hook buffer local, but
456compilation command starts before the hook could be added so I have some
457theoretical concern about possible race condition in that approach.
458
459Small Downside of re-using same function is that we need to differentiate
460whether this run is a callback or first normal call, that's what the made up
461status \"whisper-start\" does.
462
463The unfortunate price of asynchronicity is that this breaks dynamic binding
464because call stack is disrupted, callbacks are executed at a later time outside
465of the original dynamic context.  The fault not only lies here, but ultimately
466`make-process' itself is async, so it will likely take some insane hacks that
467escapes me right now, to get let bindings work like synchronous code."
468  (catch 'early-return
469    (unless (string-equal "whisper-start" status)
470      ;; shouldn't do anything when triggered by compilation buffers from elsewhere
471      (unless (eq buffer whisper--compilation-buffer)
472        (throw 'early-return nil))
473
474      ;; being here means this compilation job either finished or was interrupted
475      (remove-hook 'compilation-finish-functions #'whisper--check-install-and-run)
476      (when (string-equal "finished\n" status)
477        (kill-buffer whisper--compilation-buffer)))
478
479    (let ((base (concat
480                 (expand-file-name (file-name-as-directory whisper-install-directory))
481                 "whisper.cpp/"))
482          (old-bin-name (if (eq system-type 'windows-nt) "main.exe" "main"))
483          (bin-name (if (eq system-type 'windows-nt) "whisper-cli.exe" "whisper-cli"))
484          (compilation-buffer-name-function '(lambda (_) whisper--compilation-buffer-name)))
485
486      (setq whisper--install-path base)
487
488      (when (and (not (or (string-equal "interrupt\n" status)
489                          (string-prefix-p "exited abnormally with code" status)))
490                 (not (or (file-exists-p (concat base old-bin-name)) ;; old location
491                          (file-exists-p (concat base "build/bin/" bin-name)))))
492
493        (when (eq whisper-install-whispercpp 'manual)
494          (error (format "Couldn't find whisper.cpp install at: %s" base)))
495
496        (if (yes-or-no-p (format "Couldn't find whisper.cpp, install it at: %s ?" base))
497            (let ((make-commands
498                   (concat
499                    "mkdir -p " whisper-install-directory " && "
500                    "cd " whisper-install-directory " && "
501                    "git clone https://github.com/ggerganov/whisper.cpp && "
502                    "cd whisper.cpp && "
503                    "CLICOLOR=0 make")))
504              (setq whisper--compilation-buffer (get-buffer-create whisper--compilation-buffer-name))
505              (add-hook 'compilation-finish-functions #'whisper--check-install-and-run)
506              (compile make-commands)
507              (throw 'early-return nil))
508          (error "Needs whisper.cpp to be installed")))
509
510      (when (and (not (file-exists-p (whisper--model-file nil)))
511                 (not (or (string-equal "interrupt\n" status)
512                          (string-prefix-p "exited abnormally with code" status))))
513        (if (yes-or-no-p (format "Speech recognition model \"%s\" isn't available, download now?" whisper-model))
514            (let ((make-commands
515                   (concat
516                    "cd " base " && "
517                    "models/download-ggml-model.sh " whisper-model)))
518              (setq whisper--compilation-buffer (get-buffer-create whisper--compilation-buffer-name))
519              (add-hook 'compilation-finish-functions #'whisper--check-install-and-run)
520              (compile make-commands)
521              (throw 'early-return nil))
522          (error "Needs speech recognition model to run whisper")))
523
524      (when (and whisper-quantize
525                 (not (file-exists-p (whisper--model-file t)))
526                 (not (or (string-equal "interrupt\n" status)
527                          (string-prefix-p "exited abnormally with code" status))))
528        (if (not (file-exists-p (concat base "build/bin/quantize")))
529            (let ((make-commands
530                   (concat
531                    "cd " base " && "
532                    "make quantize" " && "
533                    "echo 'Quantizing the model....'" " && "
534                    "./build/bin/quantize " (whisper--model-file nil) " " (whisper--model-file t) " " whisper-quantize)))
535              (setq whisper--compilation-buffer (get-buffer-create whisper--compilation-buffer-name))
536              (add-hook 'compilation-finish-functions #'whisper--check-install-and-run)
537              (compile make-commands)
538              (throw 'early-return nil))
539          (let ((quantize-command
540                 (concat (concat base "build/bin/quantize")
541                         " " (whisper--model-file nil) " " (whisper--model-file t) " " whisper-quantize)))
542            (message "Running quantize binary...")
543            (shell-command quantize-command)
544            (throw 'early-return nil))))
545
546      (when (string-equal "interrupt\n" status)
547        ;; double check to be sure before cleaning up
548        (when (and (file-directory-p base) (string-suffix-p "/whisper.cpp/" base))
549          (if (file-exists-p (concat base "build/bin/" bin-name))
550              ;; model download interrupted probably, should delete partial file
551              (progn
552                (message "Download interrupted, cleaning up.")
553                (delete-file (concat base "models/" "ggml-" whisper-model ".bin")))
554            ;; otherwise whisper.cpp compilation got interrupted
555            (message "Installation interrupted, cleaning up.")
556            (unless (eq whisper-install-whispercpp 'manual)
557              (delete-directory whisper--install-path t))))
558        (throw 'early-return nil))
559
560      (when (string-prefix-p "exited abnormally with code" status)
561        (if (eq whisper-install-whispercpp 'manual)
562            (message "Compilation exited abnormally, but not deleting directory because installation is manual.")
563          (progn
564            (delete-directory whisper--install-path t)
565            (message "Couldn't compile whisper.cpp. Check that you have Git, a C++ compiler and CMake installed.")))
566        (display-buffer whisper--compilation-buffer)
567        (throw 'early-return nil))
568
569      (when (string-equal "finished\n" status)
570        (unless (or whisper--ffmpeg-input-file
571                    (yes-or-no-p "Speech recognition model download completed, want to record audio now?"))
572          (throw 'early-return nil)))
573
574      ;; finally
575      (whisper--record-audio))))
576
577;;;###autoload
578(defun whisper-run (&optional arg)
579  "Transcribe/translate audio using whisper.
580
581When ARG is given, uses a local file as input. Otherwise records the audio.
582
583This is a dwim function that does different things depending on current state:
584
585- When inference engine (whisper.cpp) isn't installed, installs it first.
586- When speech recognition model isn't available, downloads it.
587- When installation/download is already in progress, cancels those.
588- When installation is valid, starts recording audio.
589- When recording is in progress, stops it and starts transcribing.
590- When transcribing is in progress, cancels it."
591  (interactive "P")
592  (if (process-live-p whisper--transcribing-process)
593      (when (yes-or-no-p "A transcribing is already in progress, kill it?")
594        (kill-process whisper--transcribing-process))
595
596    (cond
597     ((process-live-p whisper--recording-process)
598      (interrupt-process whisper--recording-process))
599     ((and (buffer-live-p whisper--compilation-buffer)
600           (process-live-p (get-buffer-process whisper--compilation-buffer)))
601      (when-let* ((proc (get-buffer-process whisper--compilation-buffer)))
602	(interrupt-process proc)))
603     (t
604      (setq whisper--point-buffer (current-buffer))
605      (run-hooks 'whisper-before-transcription-hook)
606      (when whisper-install-whispercpp
607        (whisper--check-model-consistency))
608      (setq-default
609       whisper--ffmpeg-input-file
610       (pcase arg
611        ('nil nil)
612        ('(4)
613         (when-let* ((file (expand-file-name (read-file-name "Media file: " nil nil t))))
614          (unless (file-readable-p file)
615            (error "Media file doesn't exist or isn't readable"))
616          file))
617        ((and (pred file-readable-p) file) file)))
618      (setq whisper--using-whispercpp nil)
619      (if whisper-install-whispercpp
620          (whisper--check-install-and-run nil "whisper-start")
621        ;; if user is bringing their own inference engine, we at least check the command exists
622        (let ((command (car (whisper-command whisper--temp-file))))
623          (if (or (file-exists-p command)
624                  (executable-find command))
625              (whisper--record-audio)
626            (error (format "Couldn't find %s in PATH, nor is it a file" command)))))))))
627
628;;;###autoload
629(defun whisper-file ()
630  "Transcribe/translate local file using whisper."
631  (interactive)
632  (let ((current-prefix-arg '(4)))
633    (call-interactively #'whisper-run)))
634
635(provide 'whisper)
636;;; whisper.el ends here