File size: 2,984 Bytes
f7aef3e
1fd4bde
dfabe35
 
 
1fd4bde
 
 
 
 
 
 
f7aef3e
1fd4bde
f7aef3e
1fd4bde
f7aef3e
 
1fd4bde
 
 
78726ec
 
 
 
 
 
 
 
 
1fd4bde
 
f3283ba
1fd4bde
 
f7aef3e
 
 
 
 
 
 
1fd4bde
 
78726ec
 
1fd4bde
 
 
 
 
 
 
f7aef3e
 
1fd4bde
 
 
 
 
 
 
f7aef3e
1fd4bde
f7aef3e
1fd4bde
 
f7aef3e
1fd4bde
 
 
 
f7aef3e
1fd4bde
f7aef3e
1fd4bde
 
 
 
 
 
 
f7aef3e
1fd4bde
 
 
 
69f99f1
 
 
 
 
 
 
 
 
 
1fd4bde
69f99f1
1fd4bde
69f99f1
1fd4bde
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/bin/sh

printf "whisper.cpp: this script hasn't been maintained and is not functional atm\n"
exit 1

# This script downloads Whisper model files that have already been converted to Core ML format.
# This way you don't have to convert them yourself.

src="https://huggingface.co/datasets/ggerganov/whisper.cpp-coreml"
pfx="resolve/main/ggml"

# get the path of this script
get_script_path() {
    if [ -x "$(command -v realpath)" ]; then
       dirname "$(realpath "$0")"
    else
        _ret="$(cd -- "$(dirname "$0")" >/dev/null 2>&1 || exit ; pwd -P)"
        echo "$_ret"
    fi
}

script_path="$(get_script_path)"

# Check if the script is inside a /bin/ directory
case "$script_path" in
    */bin) default_download_path="$PWD" ;;  # Use current directory as default download path if in /bin/
    *) default_download_path="$script_path" ;;  # Otherwise, use script directory
esac

models_path="${2:-$default_download_path}"

# Whisper models
models="tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3 large-v3-turbo"

# list available models
list_models() {
        printf "\n"
        printf "  Available models:"
        for model in $models; do
                printf " %s" "$models"
        done
        printf "\n\n"
}

if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
    printf "Usage: %s <model> [models_path]\n" "$0"
    list_models

    exit 1
fi

model=$1

if ! echo "$models" | grep -q -w "$model"; then
    printf "Invalid model: %s\n" "$model"
    list_models

    exit 1
fi

# download Core ML model

printf "Downloading Core ML model %s from '%s' ...\n" "$model" "$src"

cd "$models_path" || exit

if [ -f "ggml-$model.mlmodel" ]; then
    printf "Model %s already exists. Skipping download.\n" "$model"
    exit 0
fi

if [ -x "$(command -v wget)" ]; then
    wget --quiet --show-progress -O ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
elif [ -x "$(command -v curl)" ]; then
    curl -L --output ggml-"$model".mlmodel $src/$pfx-"$model".mlmodel
else
    printf "Either wget or curl is required to download models.\n"
    exit 1
fi


if [ $? -ne 0 ]; then
    printf "Failed to download Core ML model %s \n" "$model"
    printf "Please try again later or download the original Whisper model files and convert them yourself.\n"
    exit 1
fi

# Check if 'whisper-cli' is available in the system PATH
if command -v whisper-cli >/dev/null 2>&1; then
    # If found, use 'whisper-cli' (relying on PATH resolution)
    whisper_cmd="whisper-cli"
else
    # If not found, use the local build version
    whisper_cmd="./build/bin/whisper-cli"
fi

printf "Done! Model '%s' saved in '%s/ggml-%s.bin'\n" "$model" "$models_path" "$model"
printf "Run the following command to compile it:\n\n"
printf "  $ xcrun coremlc compile %s/ggml-%s.mlmodel %s\n\n" "$models_path" "$model" "$models_path"
printf "You can now use it like this:\n\n"
printf "  $ %s -m %s/ggml-%s.bin -f samples/jfk.wav\n" "$whisper_cmd" "$models_path" "$model"
printf "\n"