Spaces:

facebook
/

omniasr-transcriptions

Running on A100

App Files Files Community

omniasr-transcriptions / frontend /src /components /MinimapTimeline.tsx

jeanma

Omnilingual ASR transcription demo

ae238b3 verified about 1 month ago

raw

history blame contribute delete

17.4 kB

	import React, { useRef, useEffect, useState, useCallback } from 'react';
	import { ArrowDownTrayIcon } from '@heroicons/react/24/outline';
	import { useTranscriptionStore } from '../stores/transcriptionStore';

	interface MinimapTimelineProps {
	audioRef: React.RefObject<HTMLAudioElement>;
	videoRef: React.RefObject<HTMLVideoElement>;
	canvasTimelineRef: React.RefObject<HTMLDivElement>; // Container that scrolls
	}

	export default function MinimapTimeline({
	audioRef,
	videoRef,
	canvasTimelineRef
	}: MinimapTimelineProps) {
	const canvasRef = useRef<HTMLCanvasElement>(null);
	const containerRef = useRef<HTMLDivElement>(null);
	const [isDragging, setIsDragging] = useState(false);
	const [dragStartX, setDragStartX] = useState(0);
	const [dragStartScrollLeft, setDragStartScrollLeft] = useState(0);
	const [waveformData, setWaveformData] = useState<number[]>([]);
	const [viewport, setViewport] = useState({ start: 0, end: 30, visible: false });

	const {
	transcription,
	preprocessedAudio,
	currentTime,
	} = useTranscriptionStore();

	// Constants
	const MINIMAP_HEIGHT = 80;
	const PIXELS_PER_SECOND = 300; // Match the CanvasTimeline scaling

	// Get media duration
	const getMediaDuration = useCallback(() => {
	const audioElement = audioRef.current;
	const videoElement = videoRef.current;

	if (audioElement && !isNaN(audioElement.duration)) {
	return audioElement.duration;
	}
	if (videoElement && !isNaN(videoElement.duration)) {
	return videoElement.duration;
	}

	return transcription?.total_duration \|\| 0;
	}, [audioRef, videoRef, transcription]);

	const mediaDuration = getMediaDuration();

	// Canvas width based on container
	const [canvasWidth, setCanvasWidth] = useState(800);

	// Update canvas width on resize
	useEffect(() => {
	const updateCanvasWidth = () => {
	if (containerRef.current) {
	setCanvasWidth(containerRef.current.clientWidth);
	}
	};

	updateCanvasWidth();
	window.addEventListener('resize', updateCanvasWidth);
	return () => window.removeEventListener('resize', updateCanvasWidth);
	}, []);

	// Track Canvas Timeline scroll position and calculate viewport
	const updateViewportFromScroll = useCallback(() => {
	const canvasContainer = canvasTimelineRef.current;
	if (!canvasContainer \|\| mediaDuration === 0) return;

	const scrollLeft = canvasContainer.scrollLeft;
	const containerWidth = canvasContainer.clientWidth;
	const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;

	// Calculate what time range is currently visible
	const startTime = (scrollLeft / totalCanvasWidth) * mediaDuration;
	const endTime = ((scrollLeft + containerWidth) / totalCanvasWidth) * mediaDuration;

	setViewport({
	start: Math.max(0, startTime),
	end: Math.min(mediaDuration, endTime),
	visible: true
	});
	}, [canvasTimelineRef, mediaDuration]);

	// Listen for scroll events on the Canvas Timeline container
	useEffect(() => {
	const canvasContainer = canvasTimelineRef.current;
	if (!canvasContainer) return;

	const handleScroll = () => {
	updateViewportFromScroll();
	};

	const handleLoadOrResize = () => {
	// Update viewport when container size changes
	updateViewportFromScroll();
	};

	// Initial viewport calculation
	updateViewportFromScroll();

	canvasContainer.addEventListener('scroll', handleScroll);
	window.addEventListener('resize', handleLoadOrResize);

	return () => {
	canvasContainer.removeEventListener('scroll', handleScroll);
	window.removeEventListener('resize', handleLoadOrResize);
	};
	}, [updateViewportFromScroll]);

	// Generate waveform data from preprocessed audio
	const generateWaveformFromPreprocessedAudio = useCallback(async () => {
	if (!preprocessedAudio?.data) {
	console.log('No preprocessed audio data available');
	return;
	}

	try {
	console.log('Generating waveform from preprocessed audio data');

	// Decode base64 audio data
	const audioBytes = atob(preprocessedAudio.data);
	const audioArrayBuffer = new ArrayBuffer(audioBytes.length);
	const audioUint8Array = new Uint8Array(audioArrayBuffer);

	for (let i = 0; i < audioBytes.length; i++) {
	audioUint8Array[i] = audioBytes.charCodeAt(i);
	}

	// Create audio context and decode the WAV data
	const audioContext = new (window.AudioContext \|\| (window as any).webkitAudioContext)();
	const audioBuffer = await audioContext.decodeAudioData(audioArrayBuffer);

	// Extract audio data
	const channelData = audioBuffer.getChannelData(0);
	const samples = Math.min(800, canvasWidth); // Limit samples for performance
	const blockSize = Math.floor(channelData.length / samples);

	const waveform: number[] = [];
	for (let i = 0; i < samples; i++) {
	const start = i * blockSize;
	const end = Math.min(start + blockSize, channelData.length);

	let sum = 0;
	for (let j = start; j < end; j++) {
	sum += Math.abs(channelData[j]);
	}

	waveform.push(sum / (end - start));
	}

	// Normalize waveform
	const max = Math.max(...waveform);
	const normalizedWaveform = max > 0 ? waveform.map(val => val / max) : waveform;

	setWaveformData(normalizedWaveform);
	console.log(`Generated waveform with ${normalizedWaveform.length} samples from preprocessed audio`);

	} catch (error) {
	console.error('Error generating waveform from preprocessed audio:', error);
	// Fallback to segment-based visualization
	generateFallbackWaveform();
	}
	}, [preprocessedAudio, canvasWidth]);

	// Fallback waveform generation from segment data
	const generateFallbackWaveform = useCallback(() => {
	if (!transcription?.aligned_segments \|\| mediaDuration === 0) return;

	console.log('Using fallback waveform generation from segments');
	const segments = transcription.aligned_segments;
	const samples = Math.min(400, canvasWidth / 2);
	const bars = new Array(samples).fill(0);

	// Create waveform based on speech activity in segments
	segments.forEach(segment => {
	const startIndex = Math.floor((segment.start / mediaDuration) * samples);
	const endIndex = Math.ceil((segment.end / mediaDuration) * samples);

	for (let i = startIndex; i < Math.min(endIndex, samples); i++) {
	// Use segment text length and duration to estimate intensity
	const intensity = Math.min(1.0, segment.text.length / 50 + 0.3);
	bars[i] = Math.max(bars[i], intensity * (0.7 + Math.random() * 0.3));
	}
	});

	setWaveformData(bars);
	console.log(`Generated fallback waveform with ${bars.length} samples`);
	}, [transcription, mediaDuration, canvasWidth]);

	// Generate waveform when preprocessed audio becomes available
	useEffect(() => {
	if (preprocessedAudio?.data) {
	generateWaveformFromPreprocessedAudio();
	} else if (transcription?.aligned_segments) {
	// Use fallback if we have segments but no preprocessed audio
	generateFallbackWaveform();
	}
	}, [preprocessedAudio, generateWaveformFromPreprocessedAudio, generateFallbackWaveform]);

	// Draw the minimap
	const draw = useCallback(() => {
	const canvas = canvasRef.current;
	if (!canvas \|\| mediaDuration === 0) return;

	const ctx = canvas.getContext('2d');
	if (!ctx) return;

	const { width, height } = canvas;

	// Clear canvas
	ctx.clearRect(0, 0, width, height);

	// Draw background
	ctx.fillStyle = '#1a1a1a';
	ctx.fillRect(0, 0, width, height);

	// Draw waveform
	if (waveformData.length > 0) {
	ctx.fillStyle = '#4a5568';
	const barWidth = width / waveformData.length;

	waveformData.forEach((amplitude, index) => {
	const barHeight = amplitude * (height - 20);
	const x = index * barWidth;
	const y = (height - barHeight) / 2;

	ctx.fillRect(x, y, Math.max(1, barWidth - 1), barHeight);
	});
	}

	// Draw segments as colored bars
	if (transcription?.aligned_segments) {
	transcription.aligned_segments.forEach((segment, index) => {
	const startX = (segment.start / mediaDuration) * width;
	const endX = (segment.end / mediaDuration) * width;
	const segmentWidth = endX - startX;

	// Alternate colors for segments
	ctx.fillStyle = index % 2 === 0 ? '#3182ce' : '#38a169';
	ctx.fillRect(startX, height - 4, segmentWidth, 4);
	});
	}

	// Draw current time indicator
	const currentTimeX = (currentTime / mediaDuration) * width;
	ctx.strokeStyle = '#f56565';
	ctx.lineWidth = 2;
	ctx.beginPath();
	ctx.moveTo(currentTimeX, 0);
	ctx.lineTo(currentTimeX, height);
	ctx.stroke();

	// Draw viewport region (what's visible in Canvas Timeline)
	if (viewport.visible) {
	const viewportStartX = (viewport.start / mediaDuration) * width;
	const viewportEndX = (viewport.end / mediaDuration) * width;

	// Draw viewport selection area (visible region highlight)
	ctx.fillStyle = 'rgba(66, 153, 225, 0.3)';
	ctx.fillRect(viewportStartX, 0, viewportEndX - viewportStartX, height);

	// Draw left boundary line (start of visible area)
	ctx.strokeStyle = '#4299e1';
	ctx.lineWidth = 3;
	ctx.beginPath();
	ctx.moveTo(viewportStartX, 0);
	ctx.lineTo(viewportStartX, height);
	ctx.stroke();

	// Draw right boundary line (end of visible area)
	ctx.beginPath();
	ctx.moveTo(viewportEndX, 0);
	ctx.lineTo(viewportEndX, height);
	ctx.stroke();

	// Draw border around visible area
	ctx.strokeStyle = '#4299e1';
	ctx.lineWidth = 1;
	ctx.strokeRect(viewportStartX, 0, viewportEndX - viewportStartX, height);
	}
	}, [waveformData, transcription, currentTime, viewport, mediaDuration]);

	// Update canvas size and redraw
	useEffect(() => {
	const canvas = canvasRef.current;
	if (canvas) {
	canvas.width = canvasWidth;
	canvas.height = MINIMAP_HEIGHT;
	draw();
	}
	}, [canvasWidth, draw]);

	// Redraw when dependencies change
	useEffect(() => {
	draw();
	}, [draw]);

	// Utility function to get time from X coordinate
	const getTimeFromX = useCallback((x: number) => {
	return (x / canvasWidth) * mediaDuration;
	}, [canvasWidth, mediaDuration]);

	// Check if clicking inside the viewport region
	const isClickingViewport = useCallback((x: number) => {
	if (!viewport.visible) return false;

	const viewportStartX = (viewport.start / mediaDuration) * canvasWidth;
	const viewportEndX = (viewport.end / mediaDuration) * canvasWidth;

	return x >= viewportStartX && x <= viewportEndX;
	}, [viewport, mediaDuration, canvasWidth]);

	// Scroll Canvas Timeline to show specific time
	const scrollToTime = useCallback((time: number) => {
	const canvasContainer = canvasTimelineRef.current;
	if (!canvasContainer) return;

	const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
	const targetScrollLeft = Math.max(0, (time / mediaDuration) * totalCanvasWidth);

	canvasContainer.scrollLeft = targetScrollLeft;
	}, [canvasTimelineRef, mediaDuration]);

	// Mouse event handlers
	const handleMouseDown = useCallback((e: React.MouseEvent) => {
	const rect = canvasRef.current?.getBoundingClientRect();
	if (!rect) return;

	const x = e.clientX - rect.left;

	if (isClickingViewport(x)) {
	// Start dragging the viewport
	setIsDragging(true);
	setDragStartX(x);
	const canvasContainer = canvasTimelineRef.current;
	if (canvasContainer) {
	setDragStartScrollLeft(canvasContainer.scrollLeft);
	}
	} else {
	// Click outside viewport - jump to that position
	const clickTime = getTimeFromX(x);
	scrollToTime(clickTime);
	}
	}, [isClickingViewport, canvasTimelineRef, getTimeFromX, scrollToTime]);

	const handleMouseMove = useCallback((e: React.MouseEvent) => {
	if (!isDragging) return;

	const rect = canvasRef.current?.getBoundingClientRect();
	if (!rect) return;

	const x = e.clientX - rect.left;
	const deltaX = x - dragStartX;

	const canvasContainer = canvasTimelineRef.current;
	if (!canvasContainer) return;

	// Convert deltaX to scroll delta
	const totalCanvasWidth = mediaDuration * PIXELS_PER_SECOND;
	const scrollDelta = (deltaX / canvasWidth) * totalCanvasWidth;

	const newScrollLeft = Math.max(0, Math.min(
	dragStartScrollLeft + scrollDelta,
	canvasContainer.scrollWidth - canvasContainer.clientWidth
	));

	canvasContainer.scrollLeft = newScrollLeft;
	}, [isDragging, dragStartX, dragStartScrollLeft, canvasTimelineRef, mediaDuration, canvasWidth]);

	const handleMouseUp = useCallback(() => {
	setIsDragging(false);
	}, []);

	// Add global mouse event listeners when dragging
	useEffect(() => {
	if (isDragging) {
	const handleGlobalMouseMove = (e: MouseEvent) => {
	handleMouseMove(e as any);
	};
	const handleGlobalMouseUp = () => {
	handleMouseUp();
	};

	document.addEventListener('mousemove', handleGlobalMouseMove);
	document.addEventListener('mouseup', handleGlobalMouseUp);

	return () => {
	document.removeEventListener('mousemove', handleGlobalMouseMove);
	document.removeEventListener('mouseup', handleGlobalMouseUp);
	};
	}
	}, [isDragging, handleMouseMove, handleMouseUp]);

	// Change cursor based on hover position
	const handleMouseHover = useCallback((e: React.MouseEvent) => {
	if (isDragging) return;

	const rect = canvasRef.current?.getBoundingClientRect();
	if (!rect) return;

	const x = e.clientX - rect.left;
	const canvas = canvasRef.current;
	if (!canvas) return;

	if (isClickingViewport(x)) {
	canvas.style.cursor = 'move';
	} else {
	canvas.style.cursor = 'pointer';
	}
	}, [isDragging, isClickingViewport]);

	// Download preprocessed audio as WAV file
	const downloadPreprocessedAudio = useCallback(() => {
	if (!preprocessedAudio?.data) {
	console.error('No preprocessed audio data available');
	return;
	}

	try {
	// Decode base64 audio data
	const audioBytes = atob(preprocessedAudio.data);
	const audioArrayBuffer = new ArrayBuffer(audioBytes.length);
	const audioUint8Array = new Uint8Array(audioArrayBuffer);

	for (let i = 0; i < audioBytes.length; i++) {
	audioUint8Array[i] = audioBytes.charCodeAt(i);
	}

	// Create blob and download
	const blob = new Blob([audioUint8Array], { type: 'audio/wav' });
	const url = URL.createObjectURL(blob);

	// Get original filename without extension
	const { file } = useTranscriptionStore.getState();
	const originalName = file?.name?.replace(/\.[^/.]+$/, '') \|\| 'audio';
	const filename = `${originalName}_preprocessed_16khz_mono_normalized.wav`;

	// Create download link
	const link = document.createElement('a');
	link.href = url;
	link.download = filename;
	document.body.appendChild(link);
	link.click();
	document.body.removeChild(link);

	// Clean up URL
	URL.revokeObjectURL(url);

	console.log(`Downloaded preprocessed audio: ${filename}`);
	} catch (error) {
	console.error('Error downloading preprocessed audio:', error);
	}
	}, [preprocessedAudio]);

	if (!transcription \|\| mediaDuration === 0) {
	return null;
	}

	return (
	<div className="bg-gray-800 border-b border-gray-700">
	<div className="px-4 py-2">
	<div className="flex justify-between items-center text-xs text-gray-400 mb-1">
	<div className="flex items-center gap-2">
	<span>
	Overview - Full Timeline ({Math.round(mediaDuration)}s)
	{preprocessedAudio ? ' • Preprocessed Waveform' : ' • Segment-Based View'}
	</span>
	{preprocessedAudio && (
	<div className="tooltip tooltip-bottom" data-tip="Download preprocessed audio as WAV file (16kHz, mono, layer-normalized). This is the exact audio data processed by the AI transcription model after conversion and standardization from the original file.">
	<button
	onClick={downloadPreprocessedAudio}
	className="flex items-center gap-1 px-1.5 py-0.5 text-xs bg-gray-600 hover:bg-gray-500 rounded transition-colors text-white"
	>
	<ArrowDownTrayIcon className="w-3 h-3" />
	.wav
	</button>
	</div>
	)}
	</div>
	{viewport.visible && (
	<span>
	Visible: {viewport.start.toFixed(1)}s - {viewport.end.toFixed(1)}s
	({Math.round(viewport.end - viewport.start)}s view)
	</span>
	)}
	</div>
	<div
	ref={containerRef}
	className="relative"
	style={{ height: MINIMAP_HEIGHT }}
	>
	<canvas
	ref={canvasRef}
	onMouseDown={handleMouseDown}
	onMouseMove={handleMouseHover}
	className="block w-full h-full"
	style={{
	width: '100%',
	height: MINIMAP_HEIGHT,
	}}
	/>
	</div>
	</div>
	</div>
	);
	}