ankandrew commited on
Commit
5d44395
·
verified ·
1 Parent(s): 2061420

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -14
app.py CHANGED
@@ -2,11 +2,33 @@ import gradio as gr
2
  import torch
3
  import numpy as np
4
  import librosa
5
-
6
  from torchmetrics.functional.audio.nisqa import non_intrusive_speech_quality_assessment as tm_nisqa
7
 
8
  SR = 16000
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def predict_nisqa(audio):
11
  if isinstance(audio, tuple):
12
  _sr, y = audio
@@ -15,31 +37,56 @@ def predict_nisqa(audio):
15
  y, _ = librosa.load(audio, sr=SR, mono=True)
16
 
17
  wav = torch.tensor(y, dtype=torch.float32)
 
18
 
19
- out = tm_nisqa(wav, SR).detach().cpu().numpy().tolist()
20
- mos, noisiness, discontinuity, coloration, loudness = out
 
 
 
 
 
21
 
22
  table = {
23
- "Metric": ["MOS (overall)", "Noisiness", "Discontinuity", "Coloration", "Loudness"],
24
- "Score": [round(mos, 3), round(noisiness, 3), round(discontinuity, 3),
25
- round(coloration, 3), round(loudness, 3)]
 
26
  }
27
- return table
 
 
28
 
29
  with gr.Blocks(title="NISQA Speech Quality (MOS) Demo") as demo:
30
  gr.Markdown(
31
  """
32
- # 🎧 NISQA Speech Quality (MOS)
33
- Upload or record speech and get **MOS + quality dimensions**.
34
- Uses NISQA v2.0 via TorchMetrics.
 
35
  """
36
  )
37
- with gr.Row():
38
- audio = gr.Audio(sources=['upload', 'microphone'], type="filepath", label="Input audio (wav/mp3/m4a...)")
39
  btn = gr.Button("Predict")
40
- out = gr.Dataframe(headers=["Metric", "Score"], label="Results", interactive=False)
41
 
42
- btn.click(fn=predict_nisqa, inputs=audio, outputs=out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  if __name__ == "__main__":
45
  demo.launch()
 
2
  import torch
3
  import numpy as np
4
  import librosa
 
5
  from torchmetrics.functional.audio.nisqa import non_intrusive_speech_quality_assessment as tm_nisqa
6
 
7
  SR = 16000
8
 
9
+ def label_mos(x: float):
10
+ # ITU-T P.800 ACR-inspired buckets
11
+ if x < 1.5: return "Bad"
12
+ if x < 2.5: return "Poor"
13
+ if x < 3.5: return "Fair"
14
+ if x < 4.3: return "Good"
15
+ return "Excellent"
16
+
17
+ def label_dim(x: float):
18
+ if x < 1.5: return "Severe"
19
+ if x < 2.5: return "High"
20
+ if x < 3.5: return "Moderate"
21
+ if x < 4.3: return "Low"
22
+ return "Negligible"
23
+
24
+ def explain_dim(name: str):
25
+ return {
26
+ "Noisiness": "How noisy it sounds (higher = less noise).",
27
+ "Discontinuity": "Dropouts/glitches (higher = fewer glitches).",
28
+ "Coloration": "Tone/timbre coloration (higher = more natural).",
29
+ "Loudness": "Perceived loudness appropriateness (higher = more appropriate)."
30
+ }[name]
31
+
32
  def predict_nisqa(audio):
33
  if isinstance(audio, tuple):
34
  _sr, y = audio
 
37
  y, _ = librosa.load(audio, sr=SR, mono=True)
38
 
39
  wav = torch.tensor(y, dtype=torch.float32)
40
+ mos, noisiness, discontinuity, coloration, loudness = tm_nisqa(wav, SR).detach().cpu().numpy().tolist()
41
 
42
+ metrics = [
43
+ ("MOS (overall)", mos, label_mos(mos), "Higher = better perceived quality."),
44
+ ("Noisiness", noisiness, label_dim(noisiness), explain_dim("Noisiness")),
45
+ ("Discontinuity", discontinuity, label_dim(discontinuity), explain_dim("Discontinuity")),
46
+ ("Coloration", coloration, label_dim(coloration), explain_dim("Coloration")),
47
+ ("Loudness", loudness, label_dim(loudness), explain_dim("Loudness")),
48
+ ]
49
 
50
  table = {
51
+ "Metric": [m[0] for m in metrics],
52
+ "Score": [round(m[1], 3) for m in metrics],
53
+ "Label": [m[2] for m in metrics],
54
+ "Notes": [m[3] for m in metrics],
55
  }
56
+ bars = {m[0]: float(m[1]) for m in metrics}
57
+
58
+ return table, bars
59
 
60
  with gr.Blocks(title="NISQA Speech Quality (MOS) Demo") as demo:
61
  gr.Markdown(
62
  """
63
+ # 🎧 NISQA Speech Quality (MOS)
64
+ Upload or record speech and get **MOS + quality dimensions**.
65
+ **Scale:** 1–5 where higher = better.
66
+ **Dimensions:** higher = fewer issues in that aspect.
67
  """
68
  )
69
+ audio = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input audio")
 
70
  btn = gr.Button("Predict")
 
71
 
72
+ out_table = gr.Dataframe(headers=["Metric", "Score", "Label", "Notes"], interactive=False)
73
+
74
+ bars = gr.BarPlot(
75
+ x="Metric", y="Score",
76
+ y_lim=(0, 5),
77
+ tooltip=["Score"],
78
+ width=0.6,
79
+ interactive=False,
80
+ label="Scores (0–5, higher = better)"
81
+ )
82
+
83
+ def _bars_to_df(table_dict, bars_dict):
84
+ import pandas as pd
85
+ df = pd.DataFrame({"Metric": list(bars_dict.keys()), "Score": list(bars_dict.values())})
86
+ return table_dict, df
87
+
88
+ btn.click(fn=predict_nisqa, inputs=audio, outputs=[out_table, bars], postprocess=False)\
89
+ .then(fn=_bars_to_df, inputs=[out_table, bars], outputs=[out_table, bars])
90
 
91
  if __name__ == "__main__":
92
  demo.launch()