nisqa-v2.0 / app.py
ankandrew's picture
Create app.py
35d2caa verified
raw
history blame
1.48 kB
import gradio as gr
import torch
import numpy as np
import librosa
from torchmetrics.functional.audio.nisqa import non_intrusive_speech_quality_assessment as tm_nisqa
SR = 16000
def predict_nisqa(audio):
if isinstance(audio, tuple):
_sr, y = audio
y = librosa.resample(np.asarray(y).astype(np.float32), orig_sr=_sr, target_sr=SR)
else:
y, _ = librosa.load(audio, sr=SR, mono=True)
wav = torch.tensor(y, dtype=torch.float32)
out = tm_nisqa(wav, SR).detach().cpu().numpy().tolist()
mos, noisiness, discontinuity, coloration, loudness = out
table = {
"Metric": ["MOS (overall)", "Noisiness", "Discontinuity", "Coloration", "Loudness"],
"Score": [round(mos, 3), round(noisiness, 3), round(discontinuity, 3),
round(coloration, 3), round(loudness, 3)]
}
return table
with gr.Blocks(title="NISQA Speech Quality (MOS) Demo") as demo:
gr.Markdown(
"""
# 🎧 NISQA Speech Quality (MOS)
Upload or record speech and get **MOS + quality dimensions**.
Uses NISQA v2.0 via TorchMetrics.
"""
)
with gr.Row():
audio = gr.Audio(sources=["mic", "upload"], type="filepath", label="Input audio (wav/mp3/m4a...)")
btn = gr.Button("Predict")
out = gr.Dataframe(headers=["Metric", "Score"], label="Results", interactive=False)
btn.click(fn=predict_nisqa, inputs=audio, outputs=out)
if __name__ == "__main__":
demo.launch()