jeanma's picture
Omnilingual ASR transcription demo
ae238b3 verified
import {useRef, useEffect, useCallback} from "react";
import {AlignedSegment} from "../services/transcriptionApi";
import {useTranscriptionStore} from "../stores/transcriptionStore";
import {useMediaTimeSync} from "../hooks/useMediaTimeSync";
import {
SUPPORTED_AUDIO_FORMATS,
SUPPORTED_VIDEO_FORMATS,
CODEC_INFO,
} from "../utils/mediaTypes";
import MediaPlayer from "./MediaPlayer";
import CanvasTimeline from "./CanvasTimeline";
import MinimapTimeline from "./MinimapTimeline";
import TranscriptionControls from "./TranscriptionControls";
import FullTranscription from "./FullTranscription";
import ErrorBoundary from "./ErrorBoundary";
export default function TranscriptionPlayer() {
// Get state from store
const {file, mediaUrl, transcription, isLoading, isProcessingVideo} =
useTranscriptionStore();
// Get actions from store
const {
handleTimeUpdate: updateTimeInStore,
setSelectedSegmentIndex,
selectedSegmentIndex,
setMediaRefs,
} = useTranscriptionStore();
const audioRef = useRef<HTMLAudioElement>(null);
const videoRef = useRef<HTMLVideoElement>(null);
const canvasTimelineRef = useRef<HTMLDivElement>(null);
// Set media refs in store for centralized seeking
useEffect(() => {
setMediaRefs(audioRef, videoRef);
}, [setMediaRefs]);
const handleTimeUpdate = useCallback(() => {
const mediaElement = audioRef.current || videoRef.current;
if (mediaElement && transcription?.aligned_segments) {
const mediaCurrentTime = mediaElement.currentTime;
// Find the active segment with a small tolerance for timing precision
const activeIndex = transcription.aligned_segments.findIndex(
(segment) =>
mediaCurrentTime >= segment.start && mediaCurrentTime <= segment.end
);
// If no exact match, find the closest segment
let finalActiveIndex: number | null = activeIndex;
if (activeIndex === -1) {
let closestIndex = -1;
let minDistance = Infinity;
transcription.aligned_segments.forEach((segment, index) => {
const distance = Math.min(
Math.abs(mediaCurrentTime - segment.start),
Math.abs(mediaCurrentTime - segment.end)
);
if (distance < minDistance && distance < 0.5) {
// 0.5 second tolerance
minDistance = distance;
closestIndex = index;
}
});
finalActiveIndex = closestIndex >= 0 ? closestIndex : null;
}
updateTimeInStore();
// Auto-select the active segment only if:
// 1. We found an active segment
// 2. Either no segment is selected, or the active segment changed
if (
finalActiveIndex !== null &&
selectedSegmentIndex !== finalActiveIndex
) {
setSelectedSegmentIndex(finalActiveIndex);
}
}
}, [
transcription,
updateTimeInStore,
selectedSegmentIndex,
setSelectedSegmentIndex,
]);
const handleSeekToSegment = (segment: AlignedSegment) => {
const mediaElement = audioRef.current || videoRef.current;
if (mediaElement) {
mediaElement.currentTime = segment.start;
// Immediately update the store to sync the progress indicator
handleTimeUpdate();
}
};
// Use media time sync hook for continuous time updates during playback
useMediaTimeSync({
audioRef,
videoRef,
onTimeUpdate: handleTimeUpdate,
transcription,
});
// Cleanup media URL on unmount
useEffect(() => {
return () => {
if (mediaUrl) {
URL.revokeObjectURL(mediaUrl);
}
};
}, [mediaUrl]);
return (
<div className="flex-1 min-w-0 flex flex-col bg-black">
{/* Media Player */}
{file && (
<ErrorBoundary componentName="MediaPlayer">
<MediaPlayer
audioRef={audioRef}
videoRef={videoRef}
onTimeUpdate={handleTimeUpdate}
/>
</ErrorBoundary>
)}
{/* Transcription Controls */}
<ErrorBoundary componentName="TranscriptionControls">
<TranscriptionControls />
</ErrorBoundary>
{/* Full Transcription */}
<ErrorBoundary componentName="FullTranscription">
<FullTranscription />
</ErrorBoundary>
{/* Transcription Timeline */}
{transcription && (
<>
{/* Minimap Timeline */}
<ErrorBoundary componentName="MinimapTimeline">
<MinimapTimeline
audioRef={audioRef}
videoRef={videoRef}
canvasTimelineRef={canvasTimelineRef}
/>
</ErrorBoundary>
{/* Canvas Timeline */}
{/* <ErrorBoundary componentName="CanvasTimeline">
<CanvasTimeline
audioRef={audioRef}
videoRef={videoRef}
onSeekToSegment={handleSeekToSegment}
onTimeUpdate={handleTimeUpdate}
ref={canvasTimelineRef}
/>
</ErrorBoundary> */}
</>
)}
{/* Transcription Loading State */}
{file && !transcription && (isLoading || isProcessingVideo) && (
<div className="flex-1 flex items-center justify-center bg-gray-900 border-t border-gray-700">
<div className="text-center text-white">
<div className="mb-4">
<div className="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-500 mx-auto"></div>
</div>
<div className="text-2xl md:text-3xl mb-3 font-semibold">
{file?.type.startsWith("video/")
? "Processing Video..."
: "Transcribing Audio..."}
</div>
<div className="text-base md:text-lg text-gray-400 max-w-md mx-auto">
{file?.type.startsWith("video/")
? "Server is extracting audio and generating transcription"
: "Converting speech to text"}
{/* : "Converting speech to text with timestamps"} */}
</div>
</div>
</div>
)}
{/* No File State */}
{!file && (
<div className="flex-1 flex items-center justify-center">
<div className="text-center text-gray-400">
<div className="text-6xl mb-4">🎵</div>
<div className="text-xl mb-2">Upload Audio</div>
<div className="text-sm mb-4">
Choose an audio file or drag and drop or record audio from the
panel on the left anywhere to get started with transcription
</div>
{/* Supported File Types */}
<div className="text-xs text-gray-500 max-w-md mx-auto">
{/* Audio formats section */}
<div className="text-center mb-3">
<div className="font-medium text-gray-400 mb-1">
Audio Formats
</div>
<div className="text-xs text-gray-500">
{SUPPORTED_AUDIO_FORMATS.join(" • ")}
</div>
</div>
{/* Codec info */}
<div className="text-center">
<div className="text-xs text-gray-400 opacity-75">
Recommended: {CODEC_INFO.audio.common.slice(0, 2).join(", ")}{" "}
codecs
</div>
</div>
</div>
</div>
</div>
)}
</div>
);
}