Spaces:
Running
on
A100
Running
on
A100
| import React, { useRef, useEffect, useState, useCallback } from 'react'; | |
| import { ArrowDownTrayIcon } from '@heroicons/react/24/outline'; | |
| import { useTranscriptionStore } from '../stores/transcriptionStore'; | |
| interface MinimapTimelineProps { | |
| audioRef: React.RefObject<HTMLAudioElement>; | |
| videoRef: React.RefObject<HTMLVideoElement>; | |
| canvasTimelineRef: React.RefObject<HTMLDivElement>; // Container that scrolls | |
| } | |
| export default function MinimapTimeline({ | |
| audioRef, | |
| videoRef, | |
| canvasTimelineRef | |
| }: MinimapTimelineProps) { | |
| const canvasRef = useRef<HTMLCanvasElement>(null); | |
| const containerRef = useRef<HTMLDivElement>(null); | |
| const [isDragging, setIsDragging] = useState(false); | |
| const [dragStartX, setDragStartX] = useState(0); | |
| const [dragStartScrollLeft, setDragStartScrollLeft] = useState(0); | |
| const [waveformData, setWaveformData] = useState<number[]>([]); | |
| const [viewport, setViewport] = useState({ start: 0, end: 30, visible: false }); | |
| const { | |
| transcription, | |
| preprocessedAudio, | |
| currentTime, | |
| } = useTranscriptionStore(); | |
| // Constants | |
| const MINIMAP_HEIGHT = 80; | |
| const PIXELS_PER_SECOND = 300; // Match the CanvasTimeline scaling | |
| // Get media duration | |
| const getMediaDuration = useCallback(() => { | |
| const audioElement = audioRef.current; | |
| const videoElement = videoRef.current; | |
| if (audioElement && !isNaN(audioElement.duration)) { | |
| return audioElement.duration; | |
| } | |
| if (videoElement && !isNaN(videoElement.duration)) { | |
| return videoElement.duration; | |
| } | |
| return transcription?.total_duration || 0; | |
| }, [audioRef, videoRef, transcription]); | |
| const mediaDuration = getMediaDuration(); | |
| // Canvas width based on container | |
| const [canvasWidth, setCanvasWidth] = useState(800); | |
| // Update canvas width on resize | |
| useEffect(() => { | |
| const updateCanvasWidth = () => { | |
| if (containerRef.current) { | |
| setCanvasWidth(containerRef.current.clientWidth); | |
| } | |
| }; | |
| updateCanvasWidth(); | |
| window.addEventListener('resize', updateCanvasWidth); | |
| return () => window.removeEventListener('resize', updateCanvasWidth); | |
| }, []); | |
| // Track Canvas Timeline scroll position and calculate viewport | |
| const updateViewportFromScroll = useCallback(() => { | |
| const canvasContainer = canvasTimelineRef.current; | |
| if (!canvasContainer || mediaDuration === 0) return; | |
| const scrollLeft = canvasContainer.scrollLeft; | |
| const containerWidth = canvasContainer.clientWidth; | |
| const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND; | |
| // Calculate what time range is currently visible | |
| const startTime = (scrollLeft / totalCanvasWidth) * mediaDuration; | |
| const endTime = ((scrollLeft + containerWidth) / totalCanvasWidth) * mediaDuration; | |
| setViewport({ | |
| start: Math.max(0, startTime), | |
| end: Math.min(mediaDuration, endTime), | |
| visible: true | |
| }); | |
| }, [canvasTimelineRef, mediaDuration]); | |
| // Listen for scroll events on the Canvas Timeline container | |
| useEffect(() => { | |
| const canvasContainer = canvasTimelineRef.current; | |
| if (!canvasContainer) return; | |
| const handleScroll = () => { | |
| updateViewportFromScroll(); | |
| }; | |
| const handleLoadOrResize = () => { | |
| // Update viewport when container size changes | |
| updateViewportFromScroll(); | |
| }; | |
| // Initial viewport calculation | |
| updateViewportFromScroll(); | |
| canvasContainer.addEventListener('scroll', handleScroll); | |
| window.addEventListener('resize', handleLoadOrResize); | |
| return () => { | |
| canvasContainer.removeEventListener('scroll', handleScroll); | |
| window.removeEventListener('resize', handleLoadOrResize); | |
| }; | |
| }, [updateViewportFromScroll]); | |
| // Generate waveform data from preprocessed audio | |
| const generateWaveformFromPreprocessedAudio = useCallback(async () => { | |
| if (!preprocessedAudio?.data) { | |
| console.log('No preprocessed audio data available'); | |
| return; | |
| } | |
| try { | |
| console.log('Generating waveform from preprocessed audio data'); | |
| // Decode base64 audio data | |
| const audioBytes = atob(preprocessedAudio.data); | |
| const audioArrayBuffer = new ArrayBuffer(audioBytes.length); | |
| const audioUint8Array = new Uint8Array(audioArrayBuffer); | |
| for (let i = 0; i < audioBytes.length; i++) { | |
| audioUint8Array[i] = audioBytes.charCodeAt(i); | |
| } | |
| // Create audio context and decode the WAV data | |
| const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); | |
| const audioBuffer = await audioContext.decodeAudioData(audioArrayBuffer); | |
| // Extract audio data | |
| const channelData = audioBuffer.getChannelData(0); | |
| const samples = Math.min(800, canvasWidth); // Limit samples for performance | |
| const blockSize = Math.floor(channelData.length / samples); | |
| const waveform: number[] = []; | |
| for (let i = 0; i < samples; i++) { | |
| const start = i * blockSize; | |
| const end = Math.min(start + blockSize, channelData.length); | |
| let sum = 0; | |
| for (let j = start; j < end; j++) { | |
| sum += Math.abs(channelData[j]); | |
| } | |
| waveform.push(sum / (end - start)); | |
| } | |
| // Normalize waveform | |
| const max = Math.max(...waveform); | |
| const normalizedWaveform = max > 0 ? waveform.map(val => val / max) : waveform; | |
| setWaveformData(normalizedWaveform); | |
| console.log(`Generated waveform with ${normalizedWaveform.length} samples from preprocessed audio`); | |
| } catch (error) { | |
| console.error('Error generating waveform from preprocessed audio:', error); | |
| // Fallback to segment-based visualization | |
| generateFallbackWaveform(); | |
| } | |
| }, [preprocessedAudio, canvasWidth]); | |
| // Fallback waveform generation from segment data | |
| const generateFallbackWaveform = useCallback(() => { | |
| if (!transcription?.aligned_segments || mediaDuration === 0) return; | |
| console.log('Using fallback waveform generation from segments'); | |
| const segments = transcription.aligned_segments; | |
| const samples = Math.min(400, canvasWidth / 2); | |
| const bars = new Array(samples).fill(0); | |
| // Create waveform based on speech activity in segments | |
| segments.forEach(segment => { | |
| const startIndex = Math.floor((segment.start / mediaDuration) * samples); | |
| const endIndex = Math.ceil((segment.end / mediaDuration) * samples); | |
| for (let i = startIndex; i < Math.min(endIndex, samples); i++) { | |
| // Use segment text length and duration to estimate intensity | |
| const intensity = Math.min(1.0, segment.text.length / 50 + 0.3); | |
| bars[i] = Math.max(bars[i], intensity * (0.7 + Math.random() * 0.3)); | |
| } | |
| }); | |
| setWaveformData(bars); | |
| console.log(`Generated fallback waveform with ${bars.length} samples`); | |
| }, [transcription, mediaDuration, canvasWidth]); | |
| // Generate waveform when preprocessed audio becomes available | |
| useEffect(() => { | |
| if (preprocessedAudio?.data) { | |
| generateWaveformFromPreprocessedAudio(); | |
| } else if (transcription?.aligned_segments) { | |
| // Use fallback if we have segments but no preprocessed audio | |
| generateFallbackWaveform(); | |
| } | |
| }, [preprocessedAudio, generateWaveformFromPreprocessedAudio, generateFallbackWaveform]); | |
| // Draw the minimap | |
| const draw = useCallback(() => { | |
| const canvas = canvasRef.current; | |
| if (!canvas || mediaDuration === 0) return; | |
| const ctx = canvas.getContext('2d'); | |
| if (!ctx) return; | |
| const { width, height } = canvas; | |
| // Clear canvas | |
| ctx.clearRect(0, 0, width, height); | |
| // Draw background | |
| ctx.fillStyle = '#1a1a1a'; | |
| ctx.fillRect(0, 0, width, height); | |
| // Draw waveform | |
| if (waveformData.length > 0) { | |
| ctx.fillStyle = '#4a5568'; | |
| const barWidth = width / waveformData.length; | |
| waveformData.forEach((amplitude, index) => { | |
| const barHeight = amplitude * (height - 20); | |
| const x = index * barWidth; | |
| const y = (height - barHeight) / 2; | |
| ctx.fillRect(x, y, Math.max(1, barWidth - 1), barHeight); | |
| }); | |
| } | |
| // Draw segments as colored bars | |
| if (transcription?.aligned_segments) { | |
| transcription.aligned_segments.forEach((segment, index) => { | |
| const startX = (segment.start / mediaDuration) * width; | |
| const endX = (segment.end / mediaDuration) * width; | |
| const segmentWidth = endX - startX; | |
| // Alternate colors for segments | |
| ctx.fillStyle = index % 2 === 0 ? '#3182ce' : '#38a169'; | |
| ctx.fillRect(startX, height - 4, segmentWidth, 4); | |
| }); | |
| } | |
| // Draw current time indicator | |
| const currentTimeX = (currentTime / mediaDuration) * width; | |
| ctx.strokeStyle = '#f56565'; | |
| ctx.lineWidth = 2; | |
| ctx.beginPath(); | |
| ctx.moveTo(currentTimeX, 0); | |
| ctx.lineTo(currentTimeX, height); | |
| ctx.stroke(); | |
| // Draw viewport region (what's visible in Canvas Timeline) | |
| if (viewport.visible) { | |
| const viewportStartX = (viewport.start / mediaDuration) * width; | |
| const viewportEndX = (viewport.end / mediaDuration) * width; | |
| // Draw viewport selection area (visible region highlight) | |
| ctx.fillStyle = 'rgba(66, 153, 225, 0.3)'; | |
| ctx.fillRect(viewportStartX, 0, viewportEndX - viewportStartX, height); | |
| // Draw left boundary line (start of visible area) | |
| ctx.strokeStyle = '#4299e1'; | |
| ctx.lineWidth = 3; | |
| ctx.beginPath(); | |
| ctx.moveTo(viewportStartX, 0); | |
| ctx.lineTo(viewportStartX, height); | |
| ctx.stroke(); | |
| // Draw right boundary line (end of visible area) | |
| ctx.beginPath(); | |
| ctx.moveTo(viewportEndX, 0); | |
| ctx.lineTo(viewportEndX, height); | |
| ctx.stroke(); | |
| // Draw border around visible area | |
| ctx.strokeStyle = '#4299e1'; | |
| ctx.lineWidth = 1; | |
| ctx.strokeRect(viewportStartX, 0, viewportEndX - viewportStartX, height); | |
| } | |
| }, [waveformData, transcription, currentTime, viewport, mediaDuration]); | |
| // Update canvas size and redraw | |
| useEffect(() => { | |
| const canvas = canvasRef.current; | |
| if (canvas) { | |
| canvas.width = canvasWidth; | |
| canvas.height = MINIMAP_HEIGHT; | |
| draw(); | |
| } | |
| }, [canvasWidth, draw]); | |
| // Redraw when dependencies change | |
| useEffect(() => { | |
| draw(); | |
| }, [draw]); | |
| // Utility function to get time from X coordinate | |
| const getTimeFromX = useCallback((x: number) => { | |
| return (x / canvasWidth) * mediaDuration; | |
| }, [canvasWidth, mediaDuration]); | |
| // Check if clicking inside the viewport region | |
| const isClickingViewport = useCallback((x: number) => { | |
| if (!viewport.visible) return false; | |
| const viewportStartX = (viewport.start / mediaDuration) * canvasWidth; | |
| const viewportEndX = (viewport.end / mediaDuration) * canvasWidth; | |
| return x >= viewportStartX && x <= viewportEndX; | |
| }, [viewport, mediaDuration, canvasWidth]); | |
| // Scroll Canvas Timeline to show specific time | |
| const scrollToTime = useCallback((time: number) => { | |
| const canvasContainer = canvasTimelineRef.current; | |
| if (!canvasContainer) return; | |
| const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND; | |
| const targetScrollLeft = Math.max(0, (time / mediaDuration) * totalCanvasWidth); | |
| canvasContainer.scrollLeft = targetScrollLeft; | |
| }, [canvasTimelineRef, mediaDuration]); | |
| // Mouse event handlers | |
| const handleMouseDown = useCallback((e: React.MouseEvent) => { | |
| const rect = canvasRef.current?.getBoundingClientRect(); | |
| if (!rect) return; | |
| const x = e.clientX - rect.left; | |
| if (isClickingViewport(x)) { | |
| // Start dragging the viewport | |
| setIsDragging(true); | |
| setDragStartX(x); | |
| const canvasContainer = canvasTimelineRef.current; | |
| if (canvasContainer) { | |
| setDragStartScrollLeft(canvasContainer.scrollLeft); | |
| } | |
| } else { | |
| // Click outside viewport - jump to that position | |
| const clickTime = getTimeFromX(x); | |
| scrollToTime(clickTime); | |
| } | |
| }, [isClickingViewport, canvasTimelineRef, getTimeFromX, scrollToTime]); | |
| const handleMouseMove = useCallback((e: React.MouseEvent) => { | |
| if (!isDragging) return; | |
| const rect = canvasRef.current?.getBoundingClientRect(); | |
| if (!rect) return; | |
| const x = e.clientX - rect.left; | |
| const deltaX = x - dragStartX; | |
| const canvasContainer = canvasTimelineRef.current; | |
| if (!canvasContainer) return; | |
| // Convert deltaX to scroll delta | |
| const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND; | |
| const scrollDelta = (deltaX / canvasWidth) * totalCanvasWidth; | |
| const newScrollLeft = Math.max(0, Math.min( | |
| dragStartScrollLeft + scrollDelta, | |
| canvasContainer.scrollWidth - canvasContainer.clientWidth | |
| )); | |
| canvasContainer.scrollLeft = newScrollLeft; | |
| }, [isDragging, dragStartX, dragStartScrollLeft, canvasTimelineRef, mediaDuration, canvasWidth]); | |
| const handleMouseUp = useCallback(() => { | |
| setIsDragging(false); | |
| }, []); | |
| // Add global mouse event listeners when dragging | |
| useEffect(() => { | |
| if (isDragging) { | |
| const handleGlobalMouseMove = (e: MouseEvent) => { | |
| handleMouseMove(e as any); | |
| }; | |
| const handleGlobalMouseUp = () => { | |
| handleMouseUp(); | |
| }; | |
| document.addEventListener('mousemove', handleGlobalMouseMove); | |
| document.addEventListener('mouseup', handleGlobalMouseUp); | |
| return () => { | |
| document.removeEventListener('mousemove', handleGlobalMouseMove); | |
| document.removeEventListener('mouseup', handleGlobalMouseUp); | |
| }; | |
| } | |
| }, [isDragging, handleMouseMove, handleMouseUp]); | |
| // Change cursor based on hover position | |
| const handleMouseHover = useCallback((e: React.MouseEvent) => { | |
| if (isDragging) return; | |
| const rect = canvasRef.current?.getBoundingClientRect(); | |
| if (!rect) return; | |
| const x = e.clientX - rect.left; | |
| const canvas = canvasRef.current; | |
| if (!canvas) return; | |
| if (isClickingViewport(x)) { | |
| canvas.style.cursor = 'move'; | |
| } else { | |
| canvas.style.cursor = 'pointer'; | |
| } | |
| }, [isDragging, isClickingViewport]); | |
| // Download preprocessed audio as WAV file | |
| const downloadPreprocessedAudio = useCallback(() => { | |
| if (!preprocessedAudio?.data) { | |
| console.error('No preprocessed audio data available'); | |
| return; | |
| } | |
| try { | |
| // Decode base64 audio data | |
| const audioBytes = atob(preprocessedAudio.data); | |
| const audioArrayBuffer = new ArrayBuffer(audioBytes.length); | |
| const audioUint8Array = new Uint8Array(audioArrayBuffer); | |
| for (let i = 0; i < audioBytes.length; i++) { | |
| audioUint8Array[i] = audioBytes.charCodeAt(i); | |
| } | |
| // Create blob and download | |
| const blob = new Blob([audioUint8Array], { type: 'audio/wav' }); | |
| const url = URL.createObjectURL(blob); | |
| // Get original filename without extension | |
| const { file } = useTranscriptionStore.getState(); | |
| const originalName = file?.name?.replace(/\.[^/.]+$/, '') || 'audio'; | |
| const filename = `${originalName}_preprocessed_16khz_mono_normalized.wav`; | |
| // Create download link | |
| const link = document.createElement('a'); | |
| link.href = url; | |
| link.download = filename; | |
| document.body.appendChild(link); | |
| link.click(); | |
| document.body.removeChild(link); | |
| // Clean up URL | |
| URL.revokeObjectURL(url); | |
| console.log(`Downloaded preprocessed audio: ${filename}`); | |
| } catch (error) { | |
| console.error('Error downloading preprocessed audio:', error); | |
| } | |
| }, [preprocessedAudio]); | |
| if (!transcription || mediaDuration === 0) { | |
| return null; | |
| } | |
| return ( | |
| <div className="bg-gray-800 border-b border-gray-700"> | |
| <div className="px-4 py-2"> | |
| <div className="flex justify-between items-center text-xs text-gray-400 mb-1"> | |
| <div className="flex items-center gap-2"> | |
| <span> | |
| Overview - Full Timeline ({Math.round(mediaDuration)}s) | |
| {preprocessedAudio ? ' • Preprocessed Waveform' : ' • Segment-Based View'} | |
| </span> | |
| {preprocessedAudio && ( | |
| <div className="tooltip tooltip-bottom" data-tip="Download preprocessed audio as WAV file (16kHz, mono, layer-normalized). This is the exact audio data processed by the AI transcription model after conversion and standardization from the original file."> | |
| <button | |
| onClick={downloadPreprocessedAudio} | |
| className="flex items-center gap-1 px-1.5 py-0.5 text-xs bg-gray-600 hover:bg-gray-500 rounded transition-colors text-white" | |
| > | |
| <ArrowDownTrayIcon className="w-3 h-3" /> | |
| .wav | |
| </button> | |
| </div> | |
| )} | |
| </div> | |
| {viewport.visible && ( | |
| <span> | |
| Visible: {viewport.start.toFixed(1)}s - {viewport.end.toFixed(1)}s | |
| ({Math.round(viewport.end - viewport.start)}s view) | |
| </span> | |
| )} | |
| </div> | |
| <div | |
| ref={containerRef} | |
| className="relative" | |
| style={{ height: MINIMAP_HEIGHT }} | |
| > | |
| <canvas | |
| ref={canvasRef} | |
| onMouseDown={handleMouseDown} | |
| onMouseMove={handleMouseHover} | |
| className="block w-full h-full" | |
| style={{ | |
| width: '100%', | |
| height: MINIMAP_HEIGHT, | |
| }} | |
| /> | |
| </div> | |
| </div> | |
| </div> | |
| ); | |
| } | |