whisper.cpp

Running

App Files Files Community

ggerganov

jbrough commited on Aug 27, 2023

Commit

8ae21a0

unverified ·

1 Parent(s): 7b59286

whisper : allow whisper_full from mel spectrogram - no audio (#1214)

Browse files

Files changed (1) hide show

whisper.cpp +14 -14

whisper.cpp CHANGED Viewed

@@ -3140,7 +3140,6 @@ int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, i
         return false;
     }
     if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) {
         log("%s: failed to eval\n", __func__);
         return 1;
@@ -3374,7 +3373,6 @@ float * whisper_get_logits(struct whisper_context * ctx) {
     return ctx->state->logits.data();
 }
 float * whisper_get_logits_from_state(struct whisper_state * state) {
     return state->logits.data();
 }
@@ -4087,15 +4085,17 @@ int whisper_full_with_state(
     result_all.clear();
-    // compute log mel spectrogram
-    if (params.speed_up) {
-        // TODO: Replace PV with more advanced algorithm
-        log("%s: failed to compute log mel spectrogram\n", __func__);
-        return -1;
-    } else {
-        if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
             log("%s: failed to compute log mel spectrogram\n", __func__);
-            return -2;
         }
     }
@@ -4121,7 +4121,9 @@ int whisper_full_with_state(
         state->t_beg    = 0;
         state->t_last   = 0;
         state->tid_last = 0;
-        state->energy = get_signal_energy(samples, n_samples, 32);
     }
     const int seek_start = params.offset_ms/10;
@@ -4258,7 +4260,7 @@ int whisper_full_with_state(
     while (true) {
         if (params.progress_callback) {
             const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start);
             params.progress_callback(
                 ctx, ctx->state, progress_cur, params.progress_callback_user_data);
         }
@@ -4813,7 +4815,6 @@ int whisper_full_with_state(
     return 0;
 }
 int whisper_full(
         struct whisper_context * ctx,
     struct whisper_full_params   params,
@@ -4890,7 +4891,6 @@ int whisper_full_parallel(
             result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
             result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
             // make sure that segments are not overlapping
             if (!ctx->state->result_all.empty()) {
                 result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);

         return false;
     }
     if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) {
         log("%s: failed to eval\n", __func__);
         return 1;
     return ctx->state->logits.data();
 }
 float * whisper_get_logits_from_state(struct whisper_state * state) {
     return state->logits.data();
 }
     result_all.clear();
+    if (n_samples > 0) {
+        // compute log mel spectrogram
+        if (params.speed_up) {
+            // TODO: Replace PV with more advanced algorithm
             log("%s: failed to compute log mel spectrogram\n", __func__);
+            return -1;
+        } else {
+            if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
+                log("%s: failed to compute log mel spectrogram\n", __func__);
+                return -2;
+            }
         }
     }
         state->t_beg    = 0;
         state->t_last   = 0;
         state->tid_last = 0;
+        if (n_samples > 0) {
+            state->energy = get_signal_energy(samples, n_samples, 32);
+        }
     }
     const int seek_start = params.offset_ms/10;
     while (true) {
         if (params.progress_callback) {
             const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start);
             params.progress_callback(
                 ctx, ctx->state, progress_cur, params.progress_callback_user_data);
         }
     return 0;
 }
 int whisper_full(
         struct whisper_context * ctx,
     struct whisper_full_params   params,
             result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
             result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
             // make sure that segments are not overlapping
             if (!ctx->state->result_all.empty()) {
                 result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);