Spaces:
Running
Running
ref #57, #62, #63 : remove unions in C-api + remove designated initializers
Browse filesWe are not ready for designated initializers - many compilers do not
support this C++ feature yet, so removing it's non-trivial usages.
- main.cpp +1 -1
- stream.cpp +1 -1
- whisper.cpp +44 -40
- whisper.h +15 -17
main.cpp
CHANGED
|
@@ -216,7 +216,7 @@ int main(int argc, char ** argv) {
|
|
| 216 |
|
| 217 |
// run the inference
|
| 218 |
{
|
| 219 |
-
whisper_full_params wparams = whisper_full_default_params(
|
| 220 |
|
| 221 |
wparams.print_realtime = true;
|
| 222 |
wparams.print_progress = false;
|
|
|
|
| 216 |
|
| 217 |
// run the inference
|
| 218 |
{
|
| 219 |
+
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
| 220 |
|
| 221 |
wparams.print_realtime = true;
|
| 222 |
wparams.print_progress = false;
|
stream.cpp
CHANGED
|
@@ -282,7 +282,7 @@ int main(int argc, char ** argv) {
|
|
| 282 |
|
| 283 |
// run the inference
|
| 284 |
{
|
| 285 |
-
whisper_full_params wparams = whisper_full_default_params(
|
| 286 |
|
| 287 |
wparams.print_progress = false;
|
| 288 |
wparams.print_special_tokens = params.print_special_tokens;
|
|
|
|
| 282 |
|
| 283 |
// run the inference
|
| 284 |
{
|
| 285 |
+
whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY);
|
| 286 |
|
| 287 |
wparams.print_progress = false;
|
| 288 |
wparams.print_special_tokens = params.print_special_tokens;
|
whisper.cpp
CHANGED
|
@@ -2256,59 +2256,63 @@ void whisper_print_timings(struct whisper_context * ctx) {
|
|
| 2256 |
|
| 2257 |
////////////////////////////////////////////////////////////////////////////
|
| 2258 |
|
| 2259 |
-
struct whisper_full_params whisper_full_default_params(enum
|
| 2260 |
struct whisper_full_params result;
|
| 2261 |
|
| 2262 |
switch (strategy) {
|
| 2263 |
-
case
|
| 2264 |
{
|
| 2265 |
-
#if defined(_MSC_VER)
|
| 2266 |
result = {
|
| 2267 |
-
|
| 2268 |
-
result = (struct whisper_full_params) {
|
| 2269 |
-
#endif
|
| 2270 |
-
.strategy = WHISPER_DECODE_GREEDY,
|
| 2271 |
-
.n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2272 |
-
.offset_ms = 0,
|
| 2273 |
|
| 2274 |
-
|
| 2275 |
-
|
| 2276 |
-
.print_special_tokens = false,
|
| 2277 |
-
.print_progress = true,
|
| 2278 |
-
.print_realtime = false,
|
| 2279 |
-
.print_timestamps = true,
|
| 2280 |
|
| 2281 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2282 |
|
| 2283 |
-
|
| 2284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2285 |
},
|
| 2286 |
};
|
| 2287 |
} break;
|
| 2288 |
-
case
|
| 2289 |
{
|
| 2290 |
-
#if defined(_MSC_VER)
|
| 2291 |
result = {
|
| 2292 |
-
|
| 2293 |
-
|
| 2294 |
-
|
| 2295 |
-
|
| 2296 |
-
|
| 2297 |
-
|
| 2298 |
-
|
| 2299 |
-
|
| 2300 |
-
|
| 2301 |
-
|
| 2302 |
-
|
| 2303 |
-
|
| 2304 |
-
|
| 2305 |
-
|
| 2306 |
-
|
| 2307 |
-
|
| 2308 |
-
|
| 2309 |
-
|
| 2310 |
-
|
| 2311 |
-
|
|
|
|
|
|
|
| 2312 |
},
|
| 2313 |
};
|
| 2314 |
} break;
|
|
|
|
| 2256 |
|
| 2257 |
////////////////////////////////////////////////////////////////////////////
|
| 2258 |
|
| 2259 |
+
struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy) {
|
| 2260 |
struct whisper_full_params result;
|
| 2261 |
|
| 2262 |
switch (strategy) {
|
| 2263 |
+
case WHISPER_SAMPLING_GREEDY:
|
| 2264 |
{
|
|
|
|
| 2265 |
result = {
|
| 2266 |
+
/*.strategy =*/ WHISPER_SAMPLING_GREEDY,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2267 |
|
| 2268 |
+
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2269 |
+
/*.offset_ms =*/ 0,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2270 |
|
| 2271 |
+
/*.translate =*/ false,
|
| 2272 |
+
/*.no_context =*/ false,
|
| 2273 |
+
/*.print_special_tokens =*/ false,
|
| 2274 |
+
/*.print_progress =*/ true,
|
| 2275 |
+
/*.print_realtime =*/ false,
|
| 2276 |
+
/*.print_timestamps =*/ true,
|
| 2277 |
|
| 2278 |
+
/*.language =*/ "en",
|
| 2279 |
+
|
| 2280 |
+
/*.greedy =*/ {
|
| 2281 |
+
/*.n_past =*/ 0,
|
| 2282 |
+
},
|
| 2283 |
+
|
| 2284 |
+
/*.beam_search =*/ {
|
| 2285 |
+
/*.n_past =*/ -1,
|
| 2286 |
+
/*.beam_width =*/ -1,
|
| 2287 |
+
/*.n_best =*/ -1,
|
| 2288 |
},
|
| 2289 |
};
|
| 2290 |
} break;
|
| 2291 |
+
case WHISPER_SAMPLING_BEAM_SEARCH:
|
| 2292 |
{
|
|
|
|
| 2293 |
result = {
|
| 2294 |
+
/*.strategy =*/ WHISPER_SAMPLING_BEAM_SEARCH,
|
| 2295 |
+
|
| 2296 |
+
/*.n_threads =*/ std::min(4, (int32_t) std::thread::hardware_concurrency()),
|
| 2297 |
+
/*.offset_ms =*/ 0,
|
| 2298 |
+
|
| 2299 |
+
/*.translate =*/ false,
|
| 2300 |
+
/*.no_context =*/ false,
|
| 2301 |
+
/*.print_special_tokens =*/ false,
|
| 2302 |
+
/*.print_progress =*/ true,
|
| 2303 |
+
/*.print_realtime =*/ false,
|
| 2304 |
+
/*.print_timestamps =*/ true,
|
| 2305 |
+
|
| 2306 |
+
/*.language =*/ "en",
|
| 2307 |
+
|
| 2308 |
+
/*.greedy =*/ {
|
| 2309 |
+
/*.n_past =*/ -1,
|
| 2310 |
+
},
|
| 2311 |
+
|
| 2312 |
+
/*.beam_search =*/ {
|
| 2313 |
+
/*.n_past =*/ 0,
|
| 2314 |
+
/*.beam_width =*/ 10,
|
| 2315 |
+
/*.n_best =*/ 5,
|
| 2316 |
},
|
| 2317 |
};
|
| 2318 |
} break;
|
whisper.h
CHANGED
|
@@ -153,14 +153,14 @@ extern "C" {
|
|
| 153 |
|
| 154 |
////////////////////////////////////////////////////////////////////////////
|
| 155 |
|
| 156 |
-
// Available
|
| 157 |
-
enum
|
| 158 |
-
|
| 159 |
-
|
| 160 |
};
|
| 161 |
|
| 162 |
struct whisper_full_params {
|
| 163 |
-
enum
|
| 164 |
|
| 165 |
int n_threads;
|
| 166 |
int offset_ms;
|
|
@@ -174,20 +174,18 @@ extern "C" {
|
|
| 174 |
|
| 175 |
const char * language;
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
} beam_search;
|
| 187 |
-
};
|
| 188 |
};
|
| 189 |
|
| 190 |
-
WHISPER_API struct whisper_full_params whisper_full_default_params(enum
|
| 191 |
|
| 192 |
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
| 193 |
// Uses the specified decoding strategy to obtain the text.
|
|
|
|
| 153 |
|
| 154 |
////////////////////////////////////////////////////////////////////////////
|
| 155 |
|
| 156 |
+
// Available sampling strategies
|
| 157 |
+
enum whisper_sampling_strategy {
|
| 158 |
+
WHISPER_SAMPLING_GREEDY, // Always select the most probable token
|
| 159 |
+
WHISPER_SAMPLING_BEAM_SEARCH, // TODO: not implemented yet!
|
| 160 |
};
|
| 161 |
|
| 162 |
struct whisper_full_params {
|
| 163 |
+
enum whisper_sampling_strategy strategy;
|
| 164 |
|
| 165 |
int n_threads;
|
| 166 |
int offset_ms;
|
|
|
|
| 174 |
|
| 175 |
const char * language;
|
| 176 |
|
| 177 |
+
struct {
|
| 178 |
+
int n_past;
|
| 179 |
+
} greedy;
|
| 180 |
+
|
| 181 |
+
struct {
|
| 182 |
+
int n_past;
|
| 183 |
+
int beam_width;
|
| 184 |
+
int n_best;
|
| 185 |
+
} beam_search;
|
|
|
|
|
|
|
| 186 |
};
|
| 187 |
|
| 188 |
+
WHISPER_API struct whisper_full_params whisper_full_default_params(enum whisper_sampling_strategy strategy);
|
| 189 |
|
| 190 |
// Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
|
| 191 |
// Uses the specified decoding strategy to obtain the text.
|