ggerganov jbrough commited on
Commit
8ae21a0
·
unverified ·
1 Parent(s): 7b59286

whisper : allow whisper_full from mel spectrogram - no audio (#1214)

Browse files
Files changed (1) hide show
  1. whisper.cpp +14 -14
whisper.cpp CHANGED
@@ -3140,7 +3140,6 @@ int whisper_decode(struct whisper_context * ctx, const whisper_token * tokens, i
3140
  return false;
3141
  }
3142
 
3143
-
3144
  if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) {
3145
  log("%s: failed to eval\n", __func__);
3146
  return 1;
@@ -3374,7 +3373,6 @@ float * whisper_get_logits(struct whisper_context * ctx) {
3374
  return ctx->state->logits.data();
3375
  }
3376
 
3377
-
3378
  float * whisper_get_logits_from_state(struct whisper_state * state) {
3379
  return state->logits.data();
3380
  }
@@ -4087,15 +4085,17 @@ int whisper_full_with_state(
4087
 
4088
  result_all.clear();
4089
 
4090
- // compute log mel spectrogram
4091
- if (params.speed_up) {
4092
- // TODO: Replace PV with more advanced algorithm
4093
- log("%s: failed to compute log mel spectrogram\n", __func__);
4094
- return -1;
4095
- } else {
4096
- if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
4097
  log("%s: failed to compute log mel spectrogram\n", __func__);
4098
- return -2;
 
 
 
 
 
4099
  }
4100
  }
4101
 
@@ -4121,7 +4121,9 @@ int whisper_full_with_state(
4121
  state->t_beg = 0;
4122
  state->t_last = 0;
4123
  state->tid_last = 0;
4124
- state->energy = get_signal_energy(samples, n_samples, 32);
 
 
4125
  }
4126
 
4127
  const int seek_start = params.offset_ms/10;
@@ -4258,7 +4260,7 @@ int whisper_full_with_state(
4258
  while (true) {
4259
  if (params.progress_callback) {
4260
  const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start);
4261
-
4262
  params.progress_callback(
4263
  ctx, ctx->state, progress_cur, params.progress_callback_user_data);
4264
  }
@@ -4813,7 +4815,6 @@ int whisper_full_with_state(
4813
  return 0;
4814
  }
4815
 
4816
-
4817
  int whisper_full(
4818
  struct whisper_context * ctx,
4819
  struct whisper_full_params params,
@@ -4890,7 +4891,6 @@ int whisper_full_parallel(
4890
  result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
4891
  result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
4892
 
4893
-
4894
  // make sure that segments are not overlapping
4895
  if (!ctx->state->result_all.empty()) {
4896
  result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);
 
3140
  return false;
3141
  }
3142
 
 
3143
  if (!whisper_decode_internal(*ctx, *ctx->state, ctx->state->decoders[selected_decoder_id], tokens, n_tokens, n_past, n_threads)) {
3144
  log("%s: failed to eval\n", __func__);
3145
  return 1;
 
3373
  return ctx->state->logits.data();
3374
  }
3375
 
 
3376
  float * whisper_get_logits_from_state(struct whisper_state * state) {
3377
  return state->logits.data();
3378
  }
 
4085
 
4086
  result_all.clear();
4087
 
4088
+ if (n_samples > 0) {
4089
+ // compute log mel spectrogram
4090
+ if (params.speed_up) {
4091
+ // TODO: Replace PV with more advanced algorithm
 
 
 
4092
  log("%s: failed to compute log mel spectrogram\n", __func__);
4093
+ return -1;
4094
+ } else {
4095
+ if (whisper_pcm_to_mel_with_state(ctx, state, samples, n_samples, params.n_threads) != 0) {
4096
+ log("%s: failed to compute log mel spectrogram\n", __func__);
4097
+ return -2;
4098
+ }
4099
  }
4100
  }
4101
 
 
4121
  state->t_beg = 0;
4122
  state->t_last = 0;
4123
  state->tid_last = 0;
4124
+ if (n_samples > 0) {
4125
+ state->energy = get_signal_energy(samples, n_samples, 32);
4126
+ }
4127
  }
4128
 
4129
  const int seek_start = params.offset_ms/10;
 
4260
  while (true) {
4261
  if (params.progress_callback) {
4262
  const int progress_cur = (100*(seek - seek_start))/(seek_end - seek_start);
4263
+
4264
  params.progress_callback(
4265
  ctx, ctx->state, progress_cur, params.progress_callback_user_data);
4266
  }
 
4815
  return 0;
4816
  }
4817
 
 
4818
  int whisper_full(
4819
  struct whisper_context * ctx,
4820
  struct whisper_full_params params,
 
4891
  result.t0 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
4892
  result.t1 += 100 * ((i + 1) * n_samples_per_processor) / WHISPER_SAMPLE_RATE + offset_t;
4893
 
 
4894
  // make sure that segments are not overlapping
4895
  if (!ctx->state->result_all.empty()) {
4896
  result.t0 = std::max(result.t0, ctx->state->result_all.back().t1);