jeanma's picture
Omnilingual ASR transcription demo
ae238b3 verified
import React, { useState, useRef, useEffect } from 'react';
import { useTranscriptionStore } from '../stores/transcriptionStore';
import { useAudioAnalyzer } from '../hooks/useAudioAnalyzer';
interface MediaRecorderProps {
onComplete: () => void;
onCancel: () => void;
}
const MediaRecorder: React.FC<MediaRecorderProps> = ({ onComplete, onCancel }) => {
const { recordingType, setRecordedBlob } = useTranscriptionStore();
const [isRecording, setIsRecording] = useState(false);
const [recordingTime, setRecordingTime] = useState(0);
const [stream, setStream] = useState<MediaStream | null>(null);
const [error, setError] = useState<string | null>(null);
const [permissionState, setPermissionState] = useState<'prompt' | 'granted' | 'denied'>('prompt');
const [currentMicrophone, setCurrentMicrophone] = useState<string | null>(null);
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const videoRef = useRef<HTMLVideoElement>(null);
const chunksRef = useRef<Blob[]>([]);
const timerRef = useRef<number | null>(null);
const isVideo = recordingType === 'video';
// Audio analyzer for real-time waveform
const { audioData, connectToStream, disconnect } = useAudioAnalyzer(256);
// Get microphone device info
const getMicrophoneInfo = async (mediaStream: MediaStream) => {
try {
// Get all available audio input devices
const devices = await navigator.mediaDevices.enumerateDevices();
const audioInputDevices = devices.filter(device => device.kind === 'audioinput');
// Get the audio track from the current stream
const audioTrack = mediaStream.getAudioTracks()[0];
if (audioTrack) {
// Get the device settings
const settings = audioTrack.getSettings();
const deviceId = settings.deviceId;
// Find the matching device in our list
const currentDevice = audioInputDevices.find(device => device.deviceId === deviceId);
if (currentDevice && currentDevice.label) {
setCurrentMicrophone(currentDevice.label);
} else {
// Fallback to device ID if label is not available
setCurrentMicrophone(`Microphone (${deviceId?.substring(0, 8)}...)`);
}
}
} catch (err) {
console.error('Error getting microphone info:', err);
setCurrentMicrophone('Unknown microphone');
}
};
// Request permissions and setup media stream
const requestPermissions = async () => {
try {
setError(null);
const constraints: MediaStreamConstraints = {
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
video: isVideo ? {
width: { ideal: 1280 },
height: { ideal: 720 },
facingMode: 'user'
} : false
};
const mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
setStream(mediaStream);
setPermissionState('granted');
// Get microphone device information
await getMicrophoneInfo(mediaStream);
// Show video preview if recording video
if (isVideo && videoRef.current) {
videoRef.current.srcObject = mediaStream;
videoRef.current.play();
}
// Connect audio analyzer for waveform visualization
connectToStream(mediaStream);
} catch (err) {
console.error('Error accessing media devices:', err);
setPermissionState('denied');
if (err instanceof DOMException) {
switch (err.name) {
case 'NotAllowedError':
setError('Permission denied. Please allow access to your microphone' + (isVideo ? ' and camera' : '') + '.');
break;
case 'NotFoundError':
setError('No ' + (isVideo ? 'camera or ' : '') + 'microphone found.');
break;
case 'NotReadableError':
setError('Media device is already in use by another application.');
break;
default:
setError('Failed to access media devices: ' + err.message);
}
} else {
setError('An unexpected error occurred while accessing media devices.');
}
}
};
// Start recording
const startRecording = () => {
if (!stream) return;
try {
chunksRef.current = [];
// Try different MIME types in order of preference
const mimeTypes = isVideo
? ['video/webm;codecs=vp9,opus', 'video/webm;codecs=vp8,opus', 'video/webm']
: ['audio/webm;codecs=opus', 'audio/webm', 'audio/mp4', ''];
let selectedMimeType = '';
for (const mimeType of mimeTypes) {
if (mimeType === '' || window.MediaRecorder.isTypeSupported(mimeType)) {
selectedMimeType = mimeType;
break;
}
}
const options: MediaRecorderOptions = selectedMimeType ? { mimeType: selectedMimeType } : {};
const mediaRecorder = new window.MediaRecorder(stream, options);
mediaRecorderRef.current = mediaRecorder;
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
chunksRef.current.push(event.data);
}
};
mediaRecorder.onstop = () => {
const blob = new Blob(chunksRef.current, {
type: isVideo ? 'video/webm' : 'audio/webm'
});
setRecordedBlob(blob);
onComplete();
};
mediaRecorder.start();
setIsRecording(true);
setRecordingTime(0);
// Start timer
timerRef.current = setInterval(() => {
setRecordingTime(prev => prev + 1);
}, 1000);
} catch (err) {
console.error('Error starting recording:', err);
setError('Failed to start recording: ' + (err instanceof Error ? err.message : 'Unknown error'));
}
};
// Stop recording
const stopRecording = () => {
if (mediaRecorderRef.current && isRecording) {
mediaRecorderRef.current.stop();
setIsRecording(false);
if (timerRef.current) {
clearInterval(timerRef.current);
timerRef.current = null;
}
}
};
// Format recording time
const formatTime = (seconds: number) => {
const mins = Math.floor(seconds / 60);
const secs = seconds % 60;
return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
};
// Cleanup on unmount and when recording stops
useEffect(() => {
return () => {
if (stream) {
stream.getTracks().forEach(track => track.stop());
}
if (timerRef.current) {
clearInterval(timerRef.current);
}
};
}, [stream]);
// Cleanup stream when recording stops externally
useEffect(() => {
if (!recordingType && stream) {
stream.getTracks().forEach(track => track.stop());
setStream(null);
setCurrentMicrophone(null); // Clear microphone info
disconnect(); // Also disconnect audio analyzer
}
}, [recordingType, stream, disconnect]);
// Auto-request permissions when component mounts
useEffect(() => {
if (permissionState === 'prompt') {
requestPermissions();
}
}, []);
return (
<div className="flex flex-col items-center justify-center min-h-[400px] bg-gray-900 rounded-lg border-2 border-dashed border-gray-600 p-8">
{/* Header */}
<div className="mb-6 text-center">
<h3 className="text-xl font-semibold text-white mb-2">
Record {isVideo ? 'Video' : 'Audio'}
</h3>
<p className="text-gray-400 text-sm">
{permissionState === 'prompt' && 'Requesting permissions...'}
{permissionState === 'denied' && 'Permission required to record'}
{permissionState === 'granted' && !isRecording && 'Ready to record'}
{isRecording && `Recording... ${formatTime(recordingTime)}`}
</p>
{/* Microphone Device Info */}
{permissionState === 'granted' && currentMicrophone && (
<div className="mt-2 flex items-center justify-center gap-2 text-xs text-gray-300">
<svg className="w-4 h-4 text-blue-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M19 11a7 7 0 01-7 7m0 0a7 7 0 01-7-7m7 7v4m0 0H8m4 0h4m-4-8a3 3 0 01-3-3V5a3 3 0 116 0v6a3 3 0 01-3 3z" />
</svg>
<span className="truncate max-w-xs" title={currentMicrophone}>
{currentMicrophone}
</span>
</div>
)}
</div>
{/* Video Preview (only for video recording) */}
{isVideo && permissionState === 'granted' && (
<div className="mb-6">
<video
ref={videoRef}
className="w-80 h-60 bg-black rounded-lg object-cover"
muted
playsInline
/>
</div>
)}
{/* Audio Visualization */}
{permissionState === 'granted' && (
<div className="mb-6 flex items-center justify-center">
<div className="w-80 h-20 bg-gray-800 rounded-lg flex items-center justify-center">
<div className="flex items-center space-x-1">
{/* Real-time audio visualization bars */}
{Array.from({ length: 32 }, (_, i) => {
// Use a wider frequency range for better distribution
// Map across 60% of the frequency spectrum for voice and some harmonics
const voiceRangeEnd = Math.floor(audioData.length * 0.6);
const dataIndex = Math.floor((i / 32) * voiceRangeEnd);
const amplitude = audioData[dataIndex] || 0;
// Apply logarithmic scaling to prevent saturation and better distribute levels
const normalizedAmplitude = amplitude / 255;
const logScaled = Math.log10(1 + normalizedAmplitude * 9) / Math.log10(10); // Log scale 0-1
const height = Math.max(4, logScaled * 60); // Scale to 4-60px
return (
<div
key={i}
className="w-1 bg-blue-500 rounded-full transition-all duration-75"
style={{
height: `${height}px`
}}
/>
);
})}
</div>
</div>
</div>
)}
{/* Error Display */}
{error && (
<div className="mb-4 p-3 bg-red-900/20 border border-red-500 rounded-lg">
<p className="text-red-300 text-sm">{error}</p>
</div>
)}
{/* Controls */}
<div className="flex gap-4">
{permissionState === 'denied' && (
<button
onClick={requestPermissions}
className="px-6 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg transition-colors"
>
Request Permission
</button>
)}
{permissionState === 'granted' && !isRecording && (
<button
onClick={startRecording}
disabled={!stream}
className="px-6 py-2 bg-red-600 hover:bg-red-700 disabled:bg-gray-600 text-white rounded-lg transition-colors flex items-center gap-2"
>
<div className="w-4 h-4 bg-white rounded-full"></div>
Start Recording
</button>
)}
{isRecording && (
<button
onClick={stopRecording}
className="px-6 py-2 bg-gray-600 hover:bg-gray-700 text-white rounded-lg transition-colors flex items-center gap-2"
>
<div className="w-4 h-4 bg-white"></div>
Stop Recording
</button>
)}
<button
onClick={onCancel}
disabled={isRecording}
className="px-6 py-2 bg-gray-700 hover:bg-gray-600 disabled:bg-gray-800 text-white rounded-lg transition-colors"
>
Cancel
</button>
</div>
{/* Tips */}
<div className="mt-6 text-center">
<p className="text-gray-400 text-xs max-w-md">
Speak clearly and minimize background noise for best transcription results.
</p>
</div>
</div>
);
};
export default MediaRecorder;