Spaces:

facebook
/

omniasr-transcriptions

Running on A100

App Files Files Community

omniasr-transcriptions / frontend /src /stores /transcriptionStore.ts

jeanma

Omnilingual ASR transcription demo

ae238b3 verified about 1 month ago

raw

history blame contribute delete

36.5 kB

	import {create} from "zustand";
	import {devtools} from "zustand/middleware";
	import debounce from "debounce";
	import {
	TranscriptionResponse,
	PreprocessedAudio,
	transcribeAudio,
	AlignedSegment,
	ServerStatus,
	getServerStatus,
	HealthResponse,
	getServerHealth,
	} from "../services/transcriptionApi";
	import {generateSRT, downloadVideoWithSubtitles} from "../utils/subtitleUtils";
	import {
	trackTranscriptionStart,
	trackTranscriptionComplete,
	trackTranscriptionError,
	trackFileUpload,
	trackLanguageChange,
	trackDownloadVideoWithSubtitles,
	} from "../analytics/gaEvents";

	// Helper function to find the active segment based on current time
	const findActiveSegmentIndex = (
	segments: AlignedSegment[],
	currentTime: number
	): number \| null => {
	for (let i = 0; i < segments.length; i++) {
	const segment = segments[i];
	if (currentTime >= segment.start && currentTime <= segment.end) {
	return i;
	}
	}
	return null;
	};

	// Types for our store state
	interface TranscriptionState {
	// File and media state
	file: File \| null;
	mediaUrl: string \| null;

	// Recording state
	isRecording: boolean;
	recordingType: "audio" \| "video" \| null;
	recordedBlob: Blob \| null;

	// Media refs for seeking (set by components)
	audioRef: React.RefObject<HTMLAudioElement> \| null;
	videoRef: React.RefObject<HTMLVideoElement> \| null;

	// Transcription state
	transcription: TranscriptionResponse \| null;
	preprocessedAudio: PreprocessedAudio \| null;
	currentTime: number;
	activeSegmentIndex: number \| null;
	currentSegments: AlignedSegment[] \| null;

	// Edit state
	selectedSegmentIndex: number \| null;

	// Viewport state for minimap
	viewportStart: number;
	viewportEnd: number;

	// History state for undo/redo
	history: AlignedSegment[][];
	historyIndex: number;

	// Loading and error state
	isLoading: boolean;
	isProcessingVideo: boolean;
	isDownloadingVideo: boolean;
	error: string \| null;

	// Language selection
	selectedLanguage: string \| null;
	selectedScript: string \| null;

	// Server status
	serverStatus: ServerStatus \| null;
	serverHealth: HealthResponse \| null;
	isPollingStatus: boolean;
	statusPollingInterval: number \| null;

	// Modal state
	showWelcomeModal: boolean;

	// Computed properties
	isVideoFile: boolean;
	hasFile: boolean;
	hasTranscription: boolean;

	// Actions
	setFile: (file: File \| null) => void;
	setTranscription: (transcription: TranscriptionResponse \| null) => void;

	// Recording actions
	startRecording: (type: "audio" \| "video") => void;
	stopRecording: () => void;
	setRecordedBlob: (blob: Blob \| null) => void;
	setCurrentTime: (time: number) => void;
	setActiveSegmentIndex: (index: number \| null) => void;
	setIsLoading: (loading: boolean) => void;
	setIsProcessingVideo: (processing: boolean) => void;
	setIsDownloadingVideo: (downloading: boolean) => void;
	setError: (error: string \| null) => void;
	setSelectedLanguage: (language: string \| null) => void;
	setSelectedScript: (script: string \| null) => void;
	setSelectedLanguageAndScript: (
	language: string \| null,
	script: string \| null
	) => void;

	// Modal actions
	setShowWelcomeModal: (show: boolean) => void;

	// Media control actions
	setMediaRefs: (
	audioRef: React.RefObject<HTMLAudioElement>,
	videoRef: React.RefObject<HTMLVideoElement>
	) => void;
	seekToTime: (time: number) => void;

	// Server status actions
	setServerStatus: (status: ServerStatus \| null) => void;
	setServerHealth: (health: HealthResponse \| null) => void;
	fetchServerStatus: () => Promise<void>;
	fetchServerHealth: () => Promise<void>;
	startStatusPolling: () => void;
	stopStatusPolling: () => void;

	// Edit actions
	setSelectedSegmentIndex: (index: number \| null) => void;
	updateSegmentTiming: (
	index: number,
	start: number,
	end: number,
	deferSorting?: boolean
	) => void;
	updateSegmentText: (index: number, text: string) => void;
	deleteSegment: (index: number) => void;
	mergeSegmentsByProximity: (maxDurationSeconds: number) => void;
	finalizeSegmentPositioning: () => void;

	// Viewport actions
	setViewport: (start: number, end: number) => void;
	initializeViewport: (duration: number) => void;

	// History actions
	undo: () => void;
	redo: () => void;
	canUndo: boolean;
	canRedo: boolean;

	// Helper functions
	_recordHistoryImmediate: (segments: AlignedSegment[]) => void;
	_recordHistoryDebounced: (segments: AlignedSegment[]) => void;

	// Complex actions
	handleFileSelect: (file: File) => void;
	handleTranscribe: () => Promise<void>;
	handleTimeUpdate: () => void;
	handleDownloadVideoWithSubtitles: () => Promise<void>;
	reset: () => void;
	}

	// Initial state
	const initialState = {
	file: null,
	mediaUrl: null,
	audioRef: null,
	videoRef: null,

	// Recording state
	isRecording: false,
	recordingType: null,
	recordedBlob: null,
	transcription: null,
	preprocessedAudio: null,
	currentTime: 0,
	activeSegmentIndex: null,
	selectedSegmentIndex: null,
	history: [],
	historyIndex: -1,
	isLoading: false,
	isProcessingVideo: false,
	isDownloadingVideo: false,
	error: null,
	selectedLanguage: null,
	selectedScript: null,
	currentSegments: null,
	viewportStart: 0,
	viewportEnd: 30, // Default to first 30 seconds
	showWelcomeModal: true, // Show modal on app load
	};

	export const useTranscriptionStore = create<TranscriptionState>()(
	devtools(
	(set, get) => ({
	...initialState,
	// Server status state
	serverStatus: null,
	serverHealth: null,
	isPollingStatus: false,
	statusPollingInterval: null,

	// Computed properties - these will be updated when relevant state changes
	isVideoFile: false,
	hasFile: false,
	hasTranscription: false,

	canUndo: false,
	canRedo: false,

	// Simple setters
	setFile: (file) => {
	const {mediaUrl, showWelcomeModal} = get();

	// Clean up previous media URL
	if (mediaUrl) {
	URL.revokeObjectURL(mediaUrl);
	}

	set({
	...initialState,
	// Override only file-specific properties
	file,
	mediaUrl: file ? URL.createObjectURL(file) : null,
	isVideoFile: file?.type.startsWith("video/") ?? false,
	hasFile: !!file,
	hasTranscription: false,
	// Preserve the modal state - don't reset it
	showWelcomeModal,
	});
	},
	setTranscription: (transcription) => {
	set({
	transcription,
	preprocessedAudio: transcription?.preprocessed_audio \|\| null,
	hasTranscription: !!transcription,
	currentSegments: transcription?.aligned_segments \|\| null,
	});

	// Initialize history when transcription is first set
	if (transcription && transcription.aligned_segments) {
	const segments = [...transcription.aligned_segments];
	set({
	history: [segments],
	historyIndex: 0,
	canUndo: false,
	canRedo: false,
	});
	}
	},

	handleTimeUpdate: () => {
	const {audioRef, videoRef, transcription} = get();
	const mediaElement = audioRef?.current \|\| videoRef?.current;

	if (mediaElement && transcription) {
	const currentTime = mediaElement.currentTime;
	const activeIndex = findActiveSegmentIndex(
	transcription.aligned_segments,
	currentTime
	);

	set({
	currentTime,
	activeSegmentIndex: activeIndex,
	});
	}
	},
	setCurrentTime: (currentTime) => set({currentTime}),
	setActiveSegmentIndex: (activeSegmentIndex) => set({activeSegmentIndex}),
	setIsLoading: (isLoading) => set({isLoading}),
	setIsProcessingVideo: (isProcessingVideo) => set({isProcessingVideo}),
	setIsDownloadingVideo: (isDownloadingVideo) => set({isDownloadingVideo}),
	setError: (error) => set({error}),
	setSelectedLanguage: (selectedLanguage) => {
	// Track language selection
	if (selectedLanguage) {
	trackLanguageChange(selectedLanguage);
	}
	set({selectedLanguage});
	},
	setSelectedScript: (selectedScript) => set({selectedScript}),
	setSelectedLanguageAndScript: (selectedLanguage, selectedScript) => {
	// Track language selection
	if (selectedLanguage) {
	trackLanguageChange(selectedLanguage);
	}
	set({selectedLanguage, selectedScript});
	},

	// Modal actions
	setShowWelcomeModal: (showWelcomeModal) => set({showWelcomeModal}),

	// Media control actions
	setMediaRefs: (audioRef, videoRef) => set({audioRef, videoRef}),
	seekToTime: (time) => {
	const {audioRef, videoRef} = get();
	const mediaElement = audioRef?.current \|\| videoRef?.current;
	if (mediaElement) {
	const seekTime = Math.max(
	0,
	Math.min(time, mediaElement.duration \|\| Infinity)
	);
	mediaElement.currentTime = seekTime;
	// Immediately update current time to trigger auto-scroll
	set({currentTime: seekTime});
	}
	},

	// Server status actions
	setServerStatus: (serverStatus) => set({serverStatus}),
	setServerHealth: (serverHealth) => set({serverHealth}),

	fetchServerStatus: async () => {
	try {
	const status = await getServerStatus();
	set({serverStatus: status});
	} catch (error) {
	console.error("Failed to fetch server status:", error);
	}
	},

	fetchServerHealth: async () => {
	try {
	const health = await getServerHealth();
	set({serverHealth: health});
	} catch (error) {
	console.error("Failed to fetch server health:", error);
	}
	},

	startStatusPolling: () => {
	const {isPollingStatus, statusPollingInterval} = get();

	if (isPollingStatus) {
	return; // Already polling
	}

	// Clear any existing interval
	if (statusPollingInterval) {
	clearInterval(statusPollingInterval);
	}

	const {fetchServerStatus} = get();

	// Fetch immediately
	fetchServerStatus();

	// Then poll every 2 seconds
	const interval = setInterval(() => {
	fetchServerStatus();
	}, 2000);

	set({
	isPollingStatus: true,
	statusPollingInterval: interval,
	});
	},

	stopStatusPolling: () => {
	const {statusPollingInterval} = get();

	if (statusPollingInterval) {
	clearInterval(statusPollingInterval);
	}

	set({
	isPollingStatus: false,
	statusPollingInterval: null,
	});
	},

	// Helper function to record history immediately (for instant actions like delete)
	_recordHistoryImmediate: (segments: AlignedSegment[]) => {
	const {history, historyIndex} = get();

	// Remove any history after current index (when we're not at the end)
	const newHistory = history.slice(0, historyIndex + 1);

	// Add new state to history
	newHistory.push([...segments]);

	// Limit history size to prevent memory issues (keep last 50 states)
	const maxHistorySize = 50;
	const newIndex = newHistory.length - 1;

	if (newHistory.length > maxHistorySize) {
	newHistory.shift();
	const adjustedIndex = newIndex - 1;
	set({
	history: newHistory,
	historyIndex: adjustedIndex,
	canUndo: adjustedIndex > 0,
	canRedo: false, // Always false when adding new history
	});
	} else {
	set({
	history: newHistory,
	historyIndex: newIndex,
	canUndo: newIndex > 0,
	canRedo: false, // Always false when adding new history
	});
	}
	},

	// Debounced history recording method
	_recordHistoryDebounced: debounce((segments: AlignedSegment[]) => {
	const {_recordHistoryImmediate} = get();
	_recordHistoryImmediate(segments);
	}, 500),

	// Edit mode actions
	// Initialize history for undo/redo (called automatically when transcription is set)
	initializeHistory: () => {
	const {transcription, history} = get();
	if (!transcription \|\| history.length > 0) return;

	const segments = [...transcription.aligned_segments];
	set({
	history: [segments],
	historyIndex: 0,
	canUndo: false,
	canRedo: false,
	});
	},

	setSelectedSegmentIndex: (selectedSegmentIndex) => {
	set({selectedSegmentIndex});
	},

	updateSegmentTiming: (
	index: number,
	start: number,
	end: number,
	deferSorting: boolean = false
	) => {
	const {
	currentSegments,
	transcription,
	selectedSegmentIndex,
	_recordHistoryDebounced,
	} = get();
	if (
	!currentSegments \|\|
	!transcription \|\|
	index < 0 \|\|
	index >= currentSegments.length
	)
	return;

	const updatedSegments = [...currentSegments];
	updatedSegments[index] = {
	...updatedSegments[index],
	start,
	end,
	duration: end - start,
	};

	// If deferSorting is true (during drag operations), just update without re-sorting
	if (deferSorting) {
	// Update both transcription and current segments without re-sorting
	const updatedTranscription = {
	...transcription,
	aligned_segments: updatedSegments,
	};

	set({
	transcription: updatedTranscription,
	currentSegments: updatedSegments,
	});

	// Don't record history during intermediate drag updates
	return;
	}

	// Normal operation: re-sort segments by start time to maintain chronological order
	const sortedSegments = [...updatedSegments].sort(
	(a, b) => a.start - b.start
	);

	// Find the new index of the moved segment after sorting
	const movedSegment = updatedSegments[index];
	const newIndex = sortedSegments.findIndex(
	(seg) =>
	seg.start === movedSegment.start &&
	seg.end === movedSegment.end &&
	seg.text === movedSegment.text
	);

	// Update selected segment index if it was the one being moved
	let newSelectedIndex = selectedSegmentIndex;
	if (selectedSegmentIndex === index) {
	newSelectedIndex = newIndex;
	} else if (selectedSegmentIndex !== null) {
	// Find where the currently selected segment ended up after sorting
	const selectedSegment = updatedSegments[selectedSegmentIndex];
	newSelectedIndex = sortedSegments.findIndex(
	(seg) =>
	seg.start === selectedSegment.start &&
	seg.end === selectedSegment.end &&
	seg.text === selectedSegment.text
	);
	}

	// Update both transcription and current segments
	const updatedTranscription = {
	...transcription,
	aligned_segments: sortedSegments,
	};

	set({
	transcription: updatedTranscription,
	currentSegments: sortedSegments,
	selectedSegmentIndex: newSelectedIndex,
	});

	// Record history with debounce for drag operations
	_recordHistoryDebounced(sortedSegments);
	},

	// New method to finalize segment positioning after drag operations
	finalizeSegmentPositioning: () => {
	const {currentSegments, transcription, selectedSegmentIndex} = get();
	if (!currentSegments \|\| !transcription) return;

	// Re-sort segments by start time
	const sortedSegments = [...currentSegments].sort(
	(a, b) => a.start - b.start
	);

	// Update selected segment index to reflect new position
	let newSelectedIndex = selectedSegmentIndex;
	if (selectedSegmentIndex !== null) {
	const selectedSegment = currentSegments[selectedSegmentIndex];
	newSelectedIndex = sortedSegments.findIndex(
	(seg) =>
	seg.start === selectedSegment.start &&
	seg.end === selectedSegment.end &&
	seg.text === selectedSegment.text
	);
	}

	// Update both transcription and current segments
	const updatedTranscription = {
	...transcription,
	aligned_segments: sortedSegments,
	};

	set({
	transcription: updatedTranscription,
	currentSegments: sortedSegments,
	selectedSegmentIndex: newSelectedIndex,
	});
	},

	updateSegmentText: (index: number, text: string) => {
	const {currentSegments, transcription, _recordHistoryDebounced} = get();
	if (
	!currentSegments \|\|
	!transcription \|\|
	index < 0 \|\|
	index >= currentSegments.length
	)
	return;

	const updatedSegments = [...currentSegments];
	updatedSegments[index] = {
	...updatedSegments[index],
	text,
	};

	// Update both transcription and current segments
	const updatedTranscription = {
	...transcription,
	aligned_segments: updatedSegments,
	};

	set({
	transcription: updatedTranscription,
	currentSegments: updatedSegments,
	});

	// Record history with debounce for text changes
	_recordHistoryDebounced(updatedSegments);
	},

	deleteSegment: (index: number) => {
	const {
	currentSegments,
	transcription,
	selectedSegmentIndex,
	_recordHistoryImmediate,
	} = get();
	if (
	!currentSegments \|\|
	!transcription \|\|
	index < 0 \|\|
	index >= currentSegments.length
	)
	return;

	const updatedSegments = currentSegments.filter(
	(_: AlignedSegment, i: number) => i !== index
	);

	// Adjust selected segment index if necessary
	let newSelectedIndex = selectedSegmentIndex;
	if (selectedSegmentIndex === index) {
	newSelectedIndex = null; // Clear selection if we deleted the selected segment
	} else if (
	selectedSegmentIndex !== null &&
	selectedSegmentIndex > index
	) {
	newSelectedIndex = selectedSegmentIndex - 1; // Adjust index if selected segment was after deleted one
	}

	// Update both transcription and current segments
	const updatedTranscription = {
	...transcription,
	aligned_segments: updatedSegments,
	};

	set({
	transcription: updatedTranscription,
	currentSegments: updatedSegments,
	selectedSegmentIndex: newSelectedIndex,
	});

	// Record history immediately for deletions since they're instant actions
	_recordHistoryImmediate(updatedSegments);
	},

	mergeSegmentsByProximity: (maxDurationSeconds: number) => {
	const {
	transcription,
	currentSegments,
	history,
	_recordHistoryDebounced,
	} = get();
	if (!transcription) return;

	console.log(`Merge threshold changed to: ${maxDurationSeconds}s`);

	// Always use current segments - this is the source of truth for user's changes
	if (!currentSegments) {
	console.warn("No currentSegments available for merging");
	return;
	}

	// const originalSegments = history.length > 0 ? [...history[0]] : [...transcription.aligned_segments];

	// // If threshold is 0, reset to original segments (clear all merge history)
	// if (maxDurationSeconds === 0) {
	// console.log(`Resetting to original ${originalSegments.length} segments`);

	// // Strip merge history from original segments
	// const cleanedOriginals = originalSegments.map(segment => ({
	// ...segment,
	// mergedFrom: undefined,
	// mergeThreshold: undefined,
	// }));

	// const updatedTranscription = {
	// ...transcription,
	// aligned_segments: cleanedOriginals,
	// };

	// set({
	// transcription: updatedTranscription,
	// currentSegments: cleanedOriginals,
	// selectedSegmentIndex: null,
	// });
	// return;
	// }

	// Step 1: Intelligently split segments that were merged at higher thresholds
	const splitSegmentsRecursively = (
	segment: AlignedSegment
	): AlignedSegment[] => {
	// If this segment has no merge history or was merged at/below current threshold, keep it
	if (
	!segment.mergedFrom \|\|
	!segment.mergeThreshold \|\|
	segment.mergeThreshold <= maxDurationSeconds
	) {
	return [segment];
	}

	// This segment was merged above the current threshold, split it
	console.log(
	`Splitting segment merged at ${segment.mergeThreshold}s (current threshold: ${maxDurationSeconds}s)`
	);

	// Calculate the relative positions of constituents within the current segment's timing
	const currentStart = segment.start;
	const currentEnd = segment.end;
	const currentDuration = currentEnd - currentStart;

	// Get the original combined duration when constituents were first merged
	const constituents = segment.mergedFrom;
	const originalTotalDuration = constituents.reduce(
	(sum, c) => sum + c.duration,
	0
	);

	// Position each constituent relative to current segment position
	let cumulativeTime = 0;
	const repositionedConstituents = constituents
	.map((constituent) => {
	const relativeStart = cumulativeTime / originalTotalDuration;
	const relativeEnd =
	(cumulativeTime + constituent.duration) / originalTotalDuration;

	const newStart = currentStart + relativeStart * currentDuration;
	const newEnd = currentStart + relativeEnd * currentDuration;

	cumulativeTime += constituent.duration;

	const repositioned: AlignedSegment = {
	...constituent,
	start: newStart,
	end: newEnd,
	duration: newEnd - newStart,
	};

	// Recursively split this constituent if needed
	return splitSegmentsRecursively(repositioned);
	})
	.flat();

	return repositionedConstituents;
	};

	let segments = currentSegments.flatMap((segment) =>
	splitSegmentsRecursively(segment)
	);

	console.log(`After splitting: ${segments.length} segments`);

	// Step 2: Merge segments that can be merged at the current threshold
	const originalCount = segments.length;
	let merged = true;

	while (merged && segments.length > 1) {
	merged = false;
	let closestDistance = Infinity;
	let closestPair = -1;

	// Find the closest pair of adjacent segments that can be merged
	for (let i = 0; i < segments.length - 1; i++) {
	const segment1 = segments[i];
	const segment2 = segments[i + 1];
	const gap = segment2.start - segment1.end;

	// Calculate what the duration would be if we merged these segments
	const mergedDuration = segment2.end - segment1.start;

	// Only consider this pair if the merged duration wouldn't exceed the threshold
	if (mergedDuration <= maxDurationSeconds && gap < closestDistance) {
	closestDistance = gap;
	closestPair = i;
	}
	}

	// Merge the closest pair if found
	if (closestPair !== -1) {
	const segment1 = segments[closestPair];
	const segment2 = segments[closestPair + 1];

	// Collect all constituent segments (handle nested merges)
	const getAllConstituents = (
	segment: AlignedSegment
	): AlignedSegment[] => {
	if (segment.mergedFrom) {
	return segment.mergedFrom.flatMap(getAllConstituents);
	}
	// Return atomic segment without merge history
	return [
	{
	start: segment.start,
	end: segment.end,
	duration: segment.duration,
	text: segment.text,
	chunk_index: segment.chunk_index,
	speech_segment_index: segment.speech_segment_index,
	},
	];
	};

	const constituents1 = getAllConstituents(segment1);
	const constituents2 = getAllConstituents(segment2);
	const allConstituents = [...constituents1, ...constituents2];

	const mergedSegment: AlignedSegment = {
	start: segment1.start,
	end: segment2.end,
	duration: segment2.end - segment1.start,
	text: `${segment1.text} ${segment2.text}`,
	chunk_index: segment1.chunk_index,
	speech_segment_index: segment1.speech_segment_index,
	mergedFrom: allConstituents,
	mergeThreshold: maxDurationSeconds,
	};

	segments = [
	...segments.slice(0, closestPair),
	mergedSegment,
	...segments.slice(closestPair + 2),
	];
	merged = true;
	console.log(
	`Merged segments: "${segment1.text}" + "${segment2.text}"`
	);
	}
	}

	console.log(
	`Final result: ${originalCount} → ${segments.length} segments`
	);

	// Update both transcription and current segments
	const updatedTranscription = {
	...transcription,
	aligned_segments: segments,
	};

	set({
	transcription: updatedTranscription,
	currentSegments: segments,
	selectedSegmentIndex: null,
	});

	// Record history with debounce for merge slider changes
	_recordHistoryDebounced(segments);
	},

	// Viewport actions
	setViewport: (start: number, end: number) => {
	set({
	viewportStart: start,
	viewportEnd: end,
	});
	},

	initializeViewport: (duration: number) => {
	const FIXED_VIEWPORT_DURATION = 30; // Fixed viewport window is always 30 seconds
	const viewportDuration = Math.min(FIXED_VIEWPORT_DURATION, duration);
	set({
	viewportStart: 0,
	viewportEnd: viewportDuration,
	});
	},

	// History actions
	undo: () => {
	const {history, historyIndex, transcription} = get();
	if (historyIndex > 0) {
	const newIndex = historyIndex - 1;
	const segments = history[newIndex];

	// Update both transcription and current segments
	const updatedTranscription = {
	...transcription!,
	aligned_segments: [...segments],
	};

	set({
	transcription: updatedTranscription,
	currentSegments: [...segments],
	historyIndex: newIndex,
	selectedSegmentIndex: null,
	canUndo: newIndex > 0,
	canRedo: newIndex < history.length - 1,
	});
	}
	},

	redo: () => {
	const {history, historyIndex, transcription} = get();
	if (historyIndex < history.length - 1) {
	const newIndex = historyIndex + 1;
	const segments = history[newIndex];

	// Update both transcription and current segments
	const updatedTranscription = {
	...transcription!,
	aligned_segments: [...segments],
	};

	set({
	transcription: updatedTranscription,
	currentSegments: [...segments],
	historyIndex: newIndex,
	selectedSegmentIndex: null,
	canUndo: newIndex > 0,
	canRedo: newIndex < history.length - 1,
	});
	}
	},

	// Complex actions
	handleFileSelect: (selectedFile: File) => {
	// Reject video files - only allow audio
	if (selectedFile.type.startsWith("video/")) {
	set({
	error:
	"Video files are not supported. Please upload an audio file only.",
	});
	return;
	}

	// Reject non-audio files
	if (!selectedFile.type.startsWith("audio/")) {
	set({
	error: "Invalid file type. Please upload an audio file.",
	});
	return;
	}

	const {mediaUrl} = get();

	// Clean up previous media URL
	if (mediaUrl) {
	URL.revokeObjectURL(mediaUrl);
	}

	// Create new object URL for media playback
	const url = URL.createObjectURL(selectedFile);

	// Track file upload
	const fileType = "audio";
	const fileSizeMB =
	Math.round((selectedFile.size / (1024 * 1024)) * 10) / 10; // Round to 1 decimal
	trackFileUpload(fileType, fileSizeMB);

	set({
	file: selectedFile,
	mediaUrl: url,
	transcription: null,
	currentTime: 0,
	activeSegmentIndex: null,
	error: null,
	isVideoFile: false,
	hasFile: true,
	hasTranscription: false,
	currentSegments: null,
	});
	},

	handleTranscribe: async () => {
	const {file, selectedLanguage, selectedScript, setTranscription} =
	get();
	if (!file) return;

	set({isLoading: true, error: null});

	// Track transcription start
	if (selectedLanguage) {
	trackTranscriptionStart(selectedLanguage);
	}

	const startTime = Date.now();

	try {
	const result = await transcribeAudio(
	file,
	selectedLanguage,
	selectedScript,
	(isProcessing) => set({isProcessingVideo: isProcessing})
	);

	// Track transcription completion
	if (selectedLanguage) {
	const duration = Math.round((Date.now() - startTime) / 1000); // Duration in seconds
	trackTranscriptionComplete(selectedLanguage, duration);
	}

	// Use setTranscription to properly initialize history
	setTranscription(result);
	} catch (err) {
	console.error("Transcription error:", err);

	// Track transcription error
	if (selectedLanguage) {
	const errorMessage =
	err instanceof Error ? err.message : "Unknown error";
	trackTranscriptionError(selectedLanguage, errorMessage);
	}

	set({
	error:
	err instanceof Error ? err.message : "An unknown error occurred",
	});
	} finally {
	set({isLoading: false, isProcessingVideo: false});
	}
	},

	handleDownloadVideoWithSubtitles: async () => {
	const {
	file,
	transcription,
	selectedLanguage,
	setIsDownloadingVideo,
	setError,
	} = get();
	if (!file \|\| !transcription) return;

	setIsDownloadingVideo(true);
	try {
	const srtContent = generateSRT(transcription.aligned_segments);
	const filename = file.name.replace(
	/\.[^/.]+$/,
	"_with_subtitles.mp4"
	);
	// Pass the selected language or fallback to 'eng'
	const language = selectedLanguage \|\| "eng";
	await downloadVideoWithSubtitles(
	file,
	srtContent,
	filename,
	language,
	"srt",
	"mp4"
	);

	// Track video with subtitles download
	if (selectedLanguage) {
	trackDownloadVideoWithSubtitles(selectedLanguage);
	}
	} catch (err) {
	console.error("Error creating video with subtitles:", err);
	setError("Failed to create video with subtitles");
	} finally {
	setIsDownloadingVideo(false);
	}
	},

	// Recording actions
	startRecording: (type: "audio" \| "video") => {
	set({
	isRecording: true,
	recordingType: type,
	recordedBlob: null,
	});
	},

	stopRecording: () => {
	set({
	isRecording: false,
	recordingType: null,
	});
	},

	setRecordedBlob: (blob: Blob \| null) => {
	if (blob) {
	// Store the current recording type before it gets cleared
	const currentRecordingType = get().recordingType;

	// Convert blob to ArrayBuffer first, then back to Blob to ensure data persistence
	blob
	.arrayBuffer()
	.then((arrayBuffer) => {
	const {mediaUrl: currentUrl, showWelcomeModal} = get();

	// Clean up previous media URL
	if (currentUrl) {
	URL.revokeObjectURL(currentUrl);
	}

	const extension =
	currentRecordingType === "video" ? "webm" : "webm";
	const mimeType =
	currentRecordingType === "video" ? "video/webm" : "audio/webm";

	// Create a new blob from the ArrayBuffer to ensure data persistence
	const persistentBlob = new Blob([arrayBuffer], {type: mimeType});

	// Convert to File object
	const file = new File(
	[persistentBlob],
	`recorded_${currentRecordingType}.${extension}`,
	{
	type: mimeType,
	lastModified: Date.now(),
	}
	);

	// Create URL from the persistent blob
	const url = URL.createObjectURL(persistentBlob);

	// Get duration from the blob by creating a temporary media element
	const tempElement =
	currentRecordingType === "video"
	? document.createElement("video")
	: document.createElement("audio");

	// Track file upload
	const fileType =
	currentRecordingType === "video" ? "video" : "audio";
	const fileSizeMB =
	Math.round((persistentBlob.size / (1024 * 1024)) * 10) / 10;
	trackFileUpload(fileType, fileSizeMB);

	// Set all the state - preserve existing media refs!
	const {audioRef, videoRef} = get();
	set({
	...initialState,
	audioRef, // Preserve existing audioRef
	videoRef, // Preserve existing videoRef
	recordedBlob: persistentBlob, // Store the persistent blob
	file: file,
	mediaUrl: url,
	isRecording: false,
	recordingType: null,
	isVideoFile: currentRecordingType === "video",
	hasFile: true,
	hasTranscription: false,
	showWelcomeModal,
	});
	})
	.catch((error) => {
	console.error("Failed to create persistent blob:", error);
	set({error: "Failed to process recorded media"});
	});
	} else {
	set({recordedBlob: blob});
	}
	},

	reset: () => {
	const {mediaUrl} = get();

	// Clean up media URL
	if (mediaUrl) {
	URL.revokeObjectURL(mediaUrl);
	}

	set({
	...initialState,
	isVideoFile: false,
	hasFile: false,
	hasTranscription: false,
	currentSegments: null,
	});
	},
	}),
	{
	name: "transcription-store", // Name for devtools
	}
	)
	);

	// Export the store hook directly - components should use useTranscriptionStore()
	// and destructure what they need directly from the store