Spaces:
Running
on
A100
Running
on
A100
| import {create} from "zustand"; | |
| import {devtools} from "zustand/middleware"; | |
| import debounce from "debounce"; | |
| import { | |
| TranscriptionResponse, | |
| PreprocessedAudio, | |
| transcribeAudio, | |
| AlignedSegment, | |
| ServerStatus, | |
| getServerStatus, | |
| HealthResponse, | |
| getServerHealth, | |
| } from "../services/transcriptionApi"; | |
| import {generateSRT, downloadVideoWithSubtitles} from "../utils/subtitleUtils"; | |
| import { | |
| trackTranscriptionStart, | |
| trackTranscriptionComplete, | |
| trackTranscriptionError, | |
| trackFileUpload, | |
| trackLanguageChange, | |
| trackDownloadVideoWithSubtitles, | |
| } from "../analytics/gaEvents"; | |
| // Helper function to find the active segment based on current time | |
| const findActiveSegmentIndex = ( | |
| segments: AlignedSegment[], | |
| currentTime: number | |
| ): number | null => { | |
| for (let i = 0; i < segments.length; i++) { | |
| const segment = segments[i]; | |
| if (currentTime >= segment.start && currentTime <= segment.end) { | |
| return i; | |
| } | |
| } | |
| return null; | |
| }; | |
| // Types for our store state | |
| interface TranscriptionState { | |
| // File and media state | |
| file: File | null; | |
| mediaUrl: string | null; | |
| // Recording state | |
| isRecording: boolean; | |
| recordingType: "audio" | "video" | null; | |
| recordedBlob: Blob | null; | |
| // Media refs for seeking (set by components) | |
| audioRef: React.RefObject<HTMLAudioElement> | null; | |
| videoRef: React.RefObject<HTMLVideoElement> | null; | |
| // Transcription state | |
| transcription: TranscriptionResponse | null; | |
| preprocessedAudio: PreprocessedAudio | null; | |
| currentTime: number; | |
| activeSegmentIndex: number | null; | |
| currentSegments: AlignedSegment[] | null; | |
| // Edit state | |
| selectedSegmentIndex: number | null; | |
| // Viewport state for minimap | |
| viewportStart: number; | |
| viewportEnd: number; | |
| // History state for undo/redo | |
| history: AlignedSegment[][]; | |
| historyIndex: number; | |
| // Loading and error state | |
| isLoading: boolean; | |
| isProcessingVideo: boolean; | |
| isDownloadingVideo: boolean; | |
| error: string | null; | |
| // Language selection | |
| selectedLanguage: string | null; | |
| selectedScript: string | null; | |
| // Server status | |
| serverStatus: ServerStatus | null; | |
| serverHealth: HealthResponse | null; | |
| isPollingStatus: boolean; | |
| statusPollingInterval: number | null; | |
| // Modal state | |
| showWelcomeModal: boolean; | |
| // Computed properties | |
| isVideoFile: boolean; | |
| hasFile: boolean; | |
| hasTranscription: boolean; | |
| // Actions | |
| setFile: (file: File | null) => void; | |
| setTranscription: (transcription: TranscriptionResponse | null) => void; | |
| // Recording actions | |
| startRecording: (type: "audio" | "video") => void; | |
| stopRecording: () => void; | |
| setRecordedBlob: (blob: Blob | null) => void; | |
| setCurrentTime: (time: number) => void; | |
| setActiveSegmentIndex: (index: number | null) => void; | |
| setIsLoading: (loading: boolean) => void; | |
| setIsProcessingVideo: (processing: boolean) => void; | |
| setIsDownloadingVideo: (downloading: boolean) => void; | |
| setError: (error: string | null) => void; | |
| setSelectedLanguage: (language: string | null) => void; | |
| setSelectedScript: (script: string | null) => void; | |
| setSelectedLanguageAndScript: ( | |
| language: string | null, | |
| script: string | null | |
| ) => void; | |
| // Modal actions | |
| setShowWelcomeModal: (show: boolean) => void; | |
| // Media control actions | |
| setMediaRefs: ( | |
| audioRef: React.RefObject<HTMLAudioElement>, | |
| videoRef: React.RefObject<HTMLVideoElement> | |
| ) => void; | |
| seekToTime: (time: number) => void; | |
| // Server status actions | |
| setServerStatus: (status: ServerStatus | null) => void; | |
| setServerHealth: (health: HealthResponse | null) => void; | |
| fetchServerStatus: () => Promise<void>; | |
| fetchServerHealth: () => Promise<void>; | |
| startStatusPolling: () => void; | |
| stopStatusPolling: () => void; | |
| // Edit actions | |
| setSelectedSegmentIndex: (index: number | null) => void; | |
| updateSegmentTiming: ( | |
| index: number, | |
| start: number, | |
| end: number, | |
| deferSorting?: boolean | |
| ) => void; | |
| updateSegmentText: (index: number, text: string) => void; | |
| deleteSegment: (index: number) => void; | |
| mergeSegmentsByProximity: (maxDurationSeconds: number) => void; | |
| finalizeSegmentPositioning: () => void; | |
| // Viewport actions | |
| setViewport: (start: number, end: number) => void; | |
| initializeViewport: (duration: number) => void; | |
| // History actions | |
| undo: () => void; | |
| redo: () => void; | |
| canUndo: boolean; | |
| canRedo: boolean; | |
| // Helper functions | |
| _recordHistoryImmediate: (segments: AlignedSegment[]) => void; | |
| _recordHistoryDebounced: (segments: AlignedSegment[]) => void; | |
| // Complex actions | |
| handleFileSelect: (file: File) => void; | |
| handleTranscribe: () => Promise<void>; | |
| handleTimeUpdate: () => void; | |
| handleDownloadVideoWithSubtitles: () => Promise<void>; | |
| reset: () => void; | |
| } | |
| // Initial state | |
| const initialState = { | |
| file: null, | |
| mediaUrl: null, | |
| audioRef: null, | |
| videoRef: null, | |
| // Recording state | |
| isRecording: false, | |
| recordingType: null, | |
| recordedBlob: null, | |
| transcription: null, | |
| preprocessedAudio: null, | |
| currentTime: 0, | |
| activeSegmentIndex: null, | |
| selectedSegmentIndex: null, | |
| history: [], | |
| historyIndex: -1, | |
| isLoading: false, | |
| isProcessingVideo: false, | |
| isDownloadingVideo: false, | |
| error: null, | |
| selectedLanguage: null, | |
| selectedScript: null, | |
| currentSegments: null, | |
| viewportStart: 0, | |
| viewportEnd: 30, // Default to first 30 seconds | |
| showWelcomeModal: true, // Show modal on app load | |
| }; | |
| export const useTranscriptionStore = create<TranscriptionState>()( | |
| devtools( | |
| (set, get) => ({ | |
| ...initialState, | |
| // Server status state | |
| serverStatus: null, | |
| serverHealth: null, | |
| isPollingStatus: false, | |
| statusPollingInterval: null, | |
| // Computed properties - these will be updated when relevant state changes | |
| isVideoFile: false, | |
| hasFile: false, | |
| hasTranscription: false, | |
| canUndo: false, | |
| canRedo: false, | |
| // Simple setters | |
| setFile: (file) => { | |
| const {mediaUrl, showWelcomeModal} = get(); | |
| // Clean up previous media URL | |
| if (mediaUrl) { | |
| URL.revokeObjectURL(mediaUrl); | |
| } | |
| set({ | |
| ...initialState, | |
| // Override only file-specific properties | |
| file, | |
| mediaUrl: file ? URL.createObjectURL(file) : null, | |
| isVideoFile: file?.type.startsWith("video/") ?? false, | |
| hasFile: !!file, | |
| hasTranscription: false, | |
| // Preserve the modal state - don't reset it | |
| showWelcomeModal, | |
| }); | |
| }, | |
| setTranscription: (transcription) => { | |
| set({ | |
| transcription, | |
| preprocessedAudio: transcription?.preprocessed_audio || null, | |
| hasTranscription: !!transcription, | |
| currentSegments: transcription?.aligned_segments || null, | |
| }); | |
| // Initialize history when transcription is first set | |
| if (transcription && transcription.aligned_segments) { | |
| const segments = [...transcription.aligned_segments]; | |
| set({ | |
| history: [segments], | |
| historyIndex: 0, | |
| canUndo: false, | |
| canRedo: false, | |
| }); | |
| } | |
| }, | |
| handleTimeUpdate: () => { | |
| const {audioRef, videoRef, transcription} = get(); | |
| const mediaElement = audioRef?.current || videoRef?.current; | |
| if (mediaElement && transcription) { | |
| const currentTime = mediaElement.currentTime; | |
| const activeIndex = findActiveSegmentIndex( | |
| transcription.aligned_segments, | |
| currentTime | |
| ); | |
| set({ | |
| currentTime, | |
| activeSegmentIndex: activeIndex, | |
| }); | |
| } | |
| }, | |
| setCurrentTime: (currentTime) => set({currentTime}), | |
| setActiveSegmentIndex: (activeSegmentIndex) => set({activeSegmentIndex}), | |
| setIsLoading: (isLoading) => set({isLoading}), | |
| setIsProcessingVideo: (isProcessingVideo) => set({isProcessingVideo}), | |
| setIsDownloadingVideo: (isDownloadingVideo) => set({isDownloadingVideo}), | |
| setError: (error) => set({error}), | |
| setSelectedLanguage: (selectedLanguage) => { | |
| // Track language selection | |
| if (selectedLanguage) { | |
| trackLanguageChange(selectedLanguage); | |
| } | |
| set({selectedLanguage}); | |
| }, | |
| setSelectedScript: (selectedScript) => set({selectedScript}), | |
| setSelectedLanguageAndScript: (selectedLanguage, selectedScript) => { | |
| // Track language selection | |
| if (selectedLanguage) { | |
| trackLanguageChange(selectedLanguage); | |
| } | |
| set({selectedLanguage, selectedScript}); | |
| }, | |
| // Modal actions | |
| setShowWelcomeModal: (showWelcomeModal) => set({showWelcomeModal}), | |
| // Media control actions | |
| setMediaRefs: (audioRef, videoRef) => set({audioRef, videoRef}), | |
| seekToTime: (time) => { | |
| const {audioRef, videoRef} = get(); | |
| const mediaElement = audioRef?.current || videoRef?.current; | |
| if (mediaElement) { | |
| const seekTime = Math.max( | |
| 0, | |
| Math.min(time, mediaElement.duration || Infinity) | |
| ); | |
| mediaElement.currentTime = seekTime; | |
| // Immediately update current time to trigger auto-scroll | |
| set({currentTime: seekTime}); | |
| } | |
| }, | |
| // Server status actions | |
| setServerStatus: (serverStatus) => set({serverStatus}), | |
| setServerHealth: (serverHealth) => set({serverHealth}), | |
| fetchServerStatus: async () => { | |
| try { | |
| const status = await getServerStatus(); | |
| set({serverStatus: status}); | |
| } catch (error) { | |
| console.error("Failed to fetch server status:", error); | |
| } | |
| }, | |
| fetchServerHealth: async () => { | |
| try { | |
| const health = await getServerHealth(); | |
| set({serverHealth: health}); | |
| } catch (error) { | |
| console.error("Failed to fetch server health:", error); | |
| } | |
| }, | |
| startStatusPolling: () => { | |
| const {isPollingStatus, statusPollingInterval} = get(); | |
| if (isPollingStatus) { | |
| return; // Already polling | |
| } | |
| // Clear any existing interval | |
| if (statusPollingInterval) { | |
| clearInterval(statusPollingInterval); | |
| } | |
| const {fetchServerStatus} = get(); | |
| // Fetch immediately | |
| fetchServerStatus(); | |
| // Then poll every 2 seconds | |
| const interval = setInterval(() => { | |
| fetchServerStatus(); | |
| }, 2000); | |
| set({ | |
| isPollingStatus: true, | |
| statusPollingInterval: interval, | |
| }); | |
| }, | |
| stopStatusPolling: () => { | |
| const {statusPollingInterval} = get(); | |
| if (statusPollingInterval) { | |
| clearInterval(statusPollingInterval); | |
| } | |
| set({ | |
| isPollingStatus: false, | |
| statusPollingInterval: null, | |
| }); | |
| }, | |
| // Helper function to record history immediately (for instant actions like delete) | |
| _recordHistoryImmediate: (segments: AlignedSegment[]) => { | |
| const {history, historyIndex} = get(); | |
| // Remove any history after current index (when we're not at the end) | |
| const newHistory = history.slice(0, historyIndex + 1); | |
| // Add new state to history | |
| newHistory.push([...segments]); | |
| // Limit history size to prevent memory issues (keep last 50 states) | |
| const maxHistorySize = 50; | |
| const newIndex = newHistory.length - 1; | |
| if (newHistory.length > maxHistorySize) { | |
| newHistory.shift(); | |
| const adjustedIndex = newIndex - 1; | |
| set({ | |
| history: newHistory, | |
| historyIndex: adjustedIndex, | |
| canUndo: adjustedIndex > 0, | |
| canRedo: false, // Always false when adding new history | |
| }); | |
| } else { | |
| set({ | |
| history: newHistory, | |
| historyIndex: newIndex, | |
| canUndo: newIndex > 0, | |
| canRedo: false, // Always false when adding new history | |
| }); | |
| } | |
| }, | |
| // Debounced history recording method | |
| _recordHistoryDebounced: debounce((segments: AlignedSegment[]) => { | |
| const {_recordHistoryImmediate} = get(); | |
| _recordHistoryImmediate(segments); | |
| }, 500), | |
| // Edit mode actions | |
| // Initialize history for undo/redo (called automatically when transcription is set) | |
| initializeHistory: () => { | |
| const {transcription, history} = get(); | |
| if (!transcription || history.length > 0) return; | |
| const segments = [...transcription.aligned_segments]; | |
| set({ | |
| history: [segments], | |
| historyIndex: 0, | |
| canUndo: false, | |
| canRedo: false, | |
| }); | |
| }, | |
| setSelectedSegmentIndex: (selectedSegmentIndex) => { | |
| set({selectedSegmentIndex}); | |
| }, | |
| updateSegmentTiming: ( | |
| index: number, | |
| start: number, | |
| end: number, | |
| deferSorting: boolean = false | |
| ) => { | |
| const { | |
| currentSegments, | |
| transcription, | |
| selectedSegmentIndex, | |
| _recordHistoryDebounced, | |
| } = get(); | |
| if ( | |
| !currentSegments || | |
| !transcription || | |
| index < 0 || | |
| index >= currentSegments.length | |
| ) | |
| return; | |
| const updatedSegments = [...currentSegments]; | |
| updatedSegments[index] = { | |
| ...updatedSegments[index], | |
| start, | |
| end, | |
| duration: end - start, | |
| }; | |
| // If deferSorting is true (during drag operations), just update without re-sorting | |
| if (deferSorting) { | |
| // Update both transcription and current segments without re-sorting | |
| const updatedTranscription = { | |
| ...transcription, | |
| aligned_segments: updatedSegments, | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: updatedSegments, | |
| }); | |
| // Don't record history during intermediate drag updates | |
| return; | |
| } | |
| // Normal operation: re-sort segments by start time to maintain chronological order | |
| const sortedSegments = [...updatedSegments].sort( | |
| (a, b) => a.start - b.start | |
| ); | |
| // Find the new index of the moved segment after sorting | |
| const movedSegment = updatedSegments[index]; | |
| const newIndex = sortedSegments.findIndex( | |
| (seg) => | |
| seg.start === movedSegment.start && | |
| seg.end === movedSegment.end && | |
| seg.text === movedSegment.text | |
| ); | |
| // Update selected segment index if it was the one being moved | |
| let newSelectedIndex = selectedSegmentIndex; | |
| if (selectedSegmentIndex === index) { | |
| newSelectedIndex = newIndex; | |
| } else if (selectedSegmentIndex !== null) { | |
| // Find where the currently selected segment ended up after sorting | |
| const selectedSegment = updatedSegments[selectedSegmentIndex]; | |
| newSelectedIndex = sortedSegments.findIndex( | |
| (seg) => | |
| seg.start === selectedSegment.start && | |
| seg.end === selectedSegment.end && | |
| seg.text === selectedSegment.text | |
| ); | |
| } | |
| // Update both transcription and current segments | |
| const updatedTranscription = { | |
| ...transcription, | |
| aligned_segments: sortedSegments, | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: sortedSegments, | |
| selectedSegmentIndex: newSelectedIndex, | |
| }); | |
| // Record history with debounce for drag operations | |
| _recordHistoryDebounced(sortedSegments); | |
| }, | |
| // New method to finalize segment positioning after drag operations | |
| finalizeSegmentPositioning: () => { | |
| const {currentSegments, transcription, selectedSegmentIndex} = get(); | |
| if (!currentSegments || !transcription) return; | |
| // Re-sort segments by start time | |
| const sortedSegments = [...currentSegments].sort( | |
| (a, b) => a.start - b.start | |
| ); | |
| // Update selected segment index to reflect new position | |
| let newSelectedIndex = selectedSegmentIndex; | |
| if (selectedSegmentIndex !== null) { | |
| const selectedSegment = currentSegments[selectedSegmentIndex]; | |
| newSelectedIndex = sortedSegments.findIndex( | |
| (seg) => | |
| seg.start === selectedSegment.start && | |
| seg.end === selectedSegment.end && | |
| seg.text === selectedSegment.text | |
| ); | |
| } | |
| // Update both transcription and current segments | |
| const updatedTranscription = { | |
| ...transcription, | |
| aligned_segments: sortedSegments, | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: sortedSegments, | |
| selectedSegmentIndex: newSelectedIndex, | |
| }); | |
| }, | |
| updateSegmentText: (index: number, text: string) => { | |
| const {currentSegments, transcription, _recordHistoryDebounced} = get(); | |
| if ( | |
| !currentSegments || | |
| !transcription || | |
| index < 0 || | |
| index >= currentSegments.length | |
| ) | |
| return; | |
| const updatedSegments = [...currentSegments]; | |
| updatedSegments[index] = { | |
| ...updatedSegments[index], | |
| text, | |
| }; | |
| // Update both transcription and current segments | |
| const updatedTranscription = { | |
| ...transcription, | |
| aligned_segments: updatedSegments, | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: updatedSegments, | |
| }); | |
| // Record history with debounce for text changes | |
| _recordHistoryDebounced(updatedSegments); | |
| }, | |
| deleteSegment: (index: number) => { | |
| const { | |
| currentSegments, | |
| transcription, | |
| selectedSegmentIndex, | |
| _recordHistoryImmediate, | |
| } = get(); | |
| if ( | |
| !currentSegments || | |
| !transcription || | |
| index < 0 || | |
| index >= currentSegments.length | |
| ) | |
| return; | |
| const updatedSegments = currentSegments.filter( | |
| (_: AlignedSegment, i: number) => i !== index | |
| ); | |
| // Adjust selected segment index if necessary | |
| let newSelectedIndex = selectedSegmentIndex; | |
| if (selectedSegmentIndex === index) { | |
| newSelectedIndex = null; // Clear selection if we deleted the selected segment | |
| } else if ( | |
| selectedSegmentIndex !== null && | |
| selectedSegmentIndex > index | |
| ) { | |
| newSelectedIndex = selectedSegmentIndex - 1; // Adjust index if selected segment was after deleted one | |
| } | |
| // Update both transcription and current segments | |
| const updatedTranscription = { | |
| ...transcription, | |
| aligned_segments: updatedSegments, | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: updatedSegments, | |
| selectedSegmentIndex: newSelectedIndex, | |
| }); | |
| // Record history immediately for deletions since they're instant actions | |
| _recordHistoryImmediate(updatedSegments); | |
| }, | |
| mergeSegmentsByProximity: (maxDurationSeconds: number) => { | |
| const { | |
| transcription, | |
| currentSegments, | |
| history, | |
| _recordHistoryDebounced, | |
| } = get(); | |
| if (!transcription) return; | |
| console.log(`Merge threshold changed to: ${maxDurationSeconds}s`); | |
| // Always use current segments - this is the source of truth for user's changes | |
| if (!currentSegments) { | |
| console.warn("No currentSegments available for merging"); | |
| return; | |
| } | |
| // const originalSegments = history.length > 0 ? [...history[0]] : [...transcription.aligned_segments]; | |
| // // If threshold is 0, reset to original segments (clear all merge history) | |
| // if (maxDurationSeconds === 0) { | |
| // console.log(`Resetting to original ${originalSegments.length} segments`); | |
| // // Strip merge history from original segments | |
| // const cleanedOriginals = originalSegments.map(segment => ({ | |
| // ...segment, | |
| // mergedFrom: undefined, | |
| // mergeThreshold: undefined, | |
| // })); | |
| // const updatedTranscription = { | |
| // ...transcription, | |
| // aligned_segments: cleanedOriginals, | |
| // }; | |
| // set({ | |
| // transcription: updatedTranscription, | |
| // currentSegments: cleanedOriginals, | |
| // selectedSegmentIndex: null, | |
| // }); | |
| // return; | |
| // } | |
| // Step 1: Intelligently split segments that were merged at higher thresholds | |
| const splitSegmentsRecursively = ( | |
| segment: AlignedSegment | |
| ): AlignedSegment[] => { | |
| // If this segment has no merge history or was merged at/below current threshold, keep it | |
| if ( | |
| !segment.mergedFrom || | |
| !segment.mergeThreshold || | |
| segment.mergeThreshold <= maxDurationSeconds | |
| ) { | |
| return [segment]; | |
| } | |
| // This segment was merged above the current threshold, split it | |
| console.log( | |
| `Splitting segment merged at ${segment.mergeThreshold}s (current threshold: ${maxDurationSeconds}s)` | |
| ); | |
| // Calculate the relative positions of constituents within the current segment's timing | |
| const currentStart = segment.start; | |
| const currentEnd = segment.end; | |
| const currentDuration = currentEnd - currentStart; | |
| // Get the original combined duration when constituents were first merged | |
| const constituents = segment.mergedFrom; | |
| const originalTotalDuration = constituents.reduce( | |
| (sum, c) => sum + c.duration, | |
| 0 | |
| ); | |
| // Position each constituent relative to current segment position | |
| let cumulativeTime = 0; | |
| const repositionedConstituents = constituents | |
| .map((constituent) => { | |
| const relativeStart = cumulativeTime / originalTotalDuration; | |
| const relativeEnd = | |
| (cumulativeTime + constituent.duration) / originalTotalDuration; | |
| const newStart = currentStart + relativeStart * currentDuration; | |
| const newEnd = currentStart + relativeEnd * currentDuration; | |
| cumulativeTime += constituent.duration; | |
| const repositioned: AlignedSegment = { | |
| ...constituent, | |
| start: newStart, | |
| end: newEnd, | |
| duration: newEnd - newStart, | |
| }; | |
| // Recursively split this constituent if needed | |
| return splitSegmentsRecursively(repositioned); | |
| }) | |
| .flat(); | |
| return repositionedConstituents; | |
| }; | |
| let segments = currentSegments.flatMap((segment) => | |
| splitSegmentsRecursively(segment) | |
| ); | |
| console.log(`After splitting: ${segments.length} segments`); | |
| // Step 2: Merge segments that can be merged at the current threshold | |
| const originalCount = segments.length; | |
| let merged = true; | |
| while (merged && segments.length > 1) { | |
| merged = false; | |
| let closestDistance = Infinity; | |
| let closestPair = -1; | |
| // Find the closest pair of adjacent segments that can be merged | |
| for (let i = 0; i < segments.length - 1; i++) { | |
| const segment1 = segments[i]; | |
| const segment2 = segments[i + 1]; | |
| const gap = segment2.start - segment1.end; | |
| // Calculate what the duration would be if we merged these segments | |
| const mergedDuration = segment2.end - segment1.start; | |
| // Only consider this pair if the merged duration wouldn't exceed the threshold | |
| if (mergedDuration <= maxDurationSeconds && gap < closestDistance) { | |
| closestDistance = gap; | |
| closestPair = i; | |
| } | |
| } | |
| // Merge the closest pair if found | |
| if (closestPair !== -1) { | |
| const segment1 = segments[closestPair]; | |
| const segment2 = segments[closestPair + 1]; | |
| // Collect all constituent segments (handle nested merges) | |
| const getAllConstituents = ( | |
| segment: AlignedSegment | |
| ): AlignedSegment[] => { | |
| if (segment.mergedFrom) { | |
| return segment.mergedFrom.flatMap(getAllConstituents); | |
| } | |
| // Return atomic segment without merge history | |
| return [ | |
| { | |
| start: segment.start, | |
| end: segment.end, | |
| duration: segment.duration, | |
| text: segment.text, | |
| chunk_index: segment.chunk_index, | |
| speech_segment_index: segment.speech_segment_index, | |
| }, | |
| ]; | |
| }; | |
| const constituents1 = getAllConstituents(segment1); | |
| const constituents2 = getAllConstituents(segment2); | |
| const allConstituents = [...constituents1, ...constituents2]; | |
| const mergedSegment: AlignedSegment = { | |
| start: segment1.start, | |
| end: segment2.end, | |
| duration: segment2.end - segment1.start, | |
| text: `${segment1.text} ${segment2.text}`, | |
| chunk_index: segment1.chunk_index, | |
| speech_segment_index: segment1.speech_segment_index, | |
| mergedFrom: allConstituents, | |
| mergeThreshold: maxDurationSeconds, | |
| }; | |
| segments = [ | |
| ...segments.slice(0, closestPair), | |
| mergedSegment, | |
| ...segments.slice(closestPair + 2), | |
| ]; | |
| merged = true; | |
| console.log( | |
| `Merged segments: "${segment1.text}" + "${segment2.text}"` | |
| ); | |
| } | |
| } | |
| console.log( | |
| `Final result: ${originalCount} → ${segments.length} segments` | |
| ); | |
| // Update both transcription and current segments | |
| const updatedTranscription = { | |
| ...transcription, | |
| aligned_segments: segments, | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: segments, | |
| selectedSegmentIndex: null, | |
| }); | |
| // Record history with debounce for merge slider changes | |
| _recordHistoryDebounced(segments); | |
| }, | |
| // Viewport actions | |
| setViewport: (start: number, end: number) => { | |
| set({ | |
| viewportStart: start, | |
| viewportEnd: end, | |
| }); | |
| }, | |
| initializeViewport: (duration: number) => { | |
| const FIXED_VIEWPORT_DURATION = 30; // Fixed viewport window is always 30 seconds | |
| const viewportDuration = Math.min(FIXED_VIEWPORT_DURATION, duration); | |
| set({ | |
| viewportStart: 0, | |
| viewportEnd: viewportDuration, | |
| }); | |
| }, | |
| // History actions | |
| undo: () => { | |
| const {history, historyIndex, transcription} = get(); | |
| if (historyIndex > 0) { | |
| const newIndex = historyIndex - 1; | |
| const segments = history[newIndex]; | |
| // Update both transcription and current segments | |
| const updatedTranscription = { | |
| ...transcription!, | |
| aligned_segments: [...segments], | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: [...segments], | |
| historyIndex: newIndex, | |
| selectedSegmentIndex: null, | |
| canUndo: newIndex > 0, | |
| canRedo: newIndex < history.length - 1, | |
| }); | |
| } | |
| }, | |
| redo: () => { | |
| const {history, historyIndex, transcription} = get(); | |
| if (historyIndex < history.length - 1) { | |
| const newIndex = historyIndex + 1; | |
| const segments = history[newIndex]; | |
| // Update both transcription and current segments | |
| const updatedTranscription = { | |
| ...transcription!, | |
| aligned_segments: [...segments], | |
| }; | |
| set({ | |
| transcription: updatedTranscription, | |
| currentSegments: [...segments], | |
| historyIndex: newIndex, | |
| selectedSegmentIndex: null, | |
| canUndo: newIndex > 0, | |
| canRedo: newIndex < history.length - 1, | |
| }); | |
| } | |
| }, | |
| // Complex actions | |
| handleFileSelect: (selectedFile: File) => { | |
| // Reject video files - only allow audio | |
| if (selectedFile.type.startsWith("video/")) { | |
| set({ | |
| error: | |
| "Video files are not supported. Please upload an audio file only.", | |
| }); | |
| return; | |
| } | |
| // Reject non-audio files | |
| if (!selectedFile.type.startsWith("audio/")) { | |
| set({ | |
| error: "Invalid file type. Please upload an audio file.", | |
| }); | |
| return; | |
| } | |
| const {mediaUrl} = get(); | |
| // Clean up previous media URL | |
| if (mediaUrl) { | |
| URL.revokeObjectURL(mediaUrl); | |
| } | |
| // Create new object URL for media playback | |
| const url = URL.createObjectURL(selectedFile); | |
| // Track file upload | |
| const fileType = "audio"; | |
| const fileSizeMB = | |
| Math.round((selectedFile.size / (1024 * 1024)) * 10) / 10; // Round to 1 decimal | |
| trackFileUpload(fileType, fileSizeMB); | |
| set({ | |
| file: selectedFile, | |
| mediaUrl: url, | |
| transcription: null, | |
| currentTime: 0, | |
| activeSegmentIndex: null, | |
| error: null, | |
| isVideoFile: false, | |
| hasFile: true, | |
| hasTranscription: false, | |
| currentSegments: null, | |
| }); | |
| }, | |
| handleTranscribe: async () => { | |
| const {file, selectedLanguage, selectedScript, setTranscription} = | |
| get(); | |
| if (!file) return; | |
| set({isLoading: true, error: null}); | |
| // Track transcription start | |
| if (selectedLanguage) { | |
| trackTranscriptionStart(selectedLanguage); | |
| } | |
| const startTime = Date.now(); | |
| try { | |
| const result = await transcribeAudio( | |
| file, | |
| selectedLanguage, | |
| selectedScript, | |
| (isProcessing) => set({isProcessingVideo: isProcessing}) | |
| ); | |
| // Track transcription completion | |
| if (selectedLanguage) { | |
| const duration = Math.round((Date.now() - startTime) / 1000); // Duration in seconds | |
| trackTranscriptionComplete(selectedLanguage, duration); | |
| } | |
| // Use setTranscription to properly initialize history | |
| setTranscription(result); | |
| } catch (err) { | |
| console.error("Transcription error:", err); | |
| // Track transcription error | |
| if (selectedLanguage) { | |
| const errorMessage = | |
| err instanceof Error ? err.message : "Unknown error"; | |
| trackTranscriptionError(selectedLanguage, errorMessage); | |
| } | |
| set({ | |
| error: | |
| err instanceof Error ? err.message : "An unknown error occurred", | |
| }); | |
| } finally { | |
| set({isLoading: false, isProcessingVideo: false}); | |
| } | |
| }, | |
| handleDownloadVideoWithSubtitles: async () => { | |
| const { | |
| file, | |
| transcription, | |
| selectedLanguage, | |
| setIsDownloadingVideo, | |
| setError, | |
| } = get(); | |
| if (!file || !transcription) return; | |
| setIsDownloadingVideo(true); | |
| try { | |
| const srtContent = generateSRT(transcription.aligned_segments); | |
| const filename = file.name.replace( | |
| /\.[^/.]+$/, | |
| "_with_subtitles.mp4" | |
| ); | |
| // Pass the selected language or fallback to 'eng' | |
| const language = selectedLanguage || "eng"; | |
| await downloadVideoWithSubtitles( | |
| file, | |
| srtContent, | |
| filename, | |
| language, | |
| "srt", | |
| "mp4" | |
| ); | |
| // Track video with subtitles download | |
| if (selectedLanguage) { | |
| trackDownloadVideoWithSubtitles(selectedLanguage); | |
| } | |
| } catch (err) { | |
| console.error("Error creating video with subtitles:", err); | |
| setError("Failed to create video with subtitles"); | |
| } finally { | |
| setIsDownloadingVideo(false); | |
| } | |
| }, | |
| // Recording actions | |
| startRecording: (type: "audio" | "video") => { | |
| set({ | |
| isRecording: true, | |
| recordingType: type, | |
| recordedBlob: null, | |
| }); | |
| }, | |
| stopRecording: () => { | |
| set({ | |
| isRecording: false, | |
| recordingType: null, | |
| }); | |
| }, | |
| setRecordedBlob: (blob: Blob | null) => { | |
| if (blob) { | |
| // Store the current recording type before it gets cleared | |
| const currentRecordingType = get().recordingType; | |
| // Convert blob to ArrayBuffer first, then back to Blob to ensure data persistence | |
| blob | |
| .arrayBuffer() | |
| .then((arrayBuffer) => { | |
| const {mediaUrl: currentUrl, showWelcomeModal} = get(); | |
| // Clean up previous media URL | |
| if (currentUrl) { | |
| URL.revokeObjectURL(currentUrl); | |
| } | |
| const extension = | |
| currentRecordingType === "video" ? "webm" : "webm"; | |
| const mimeType = | |
| currentRecordingType === "video" ? "video/webm" : "audio/webm"; | |
| // Create a new blob from the ArrayBuffer to ensure data persistence | |
| const persistentBlob = new Blob([arrayBuffer], {type: mimeType}); | |
| // Convert to File object | |
| const file = new File( | |
| [persistentBlob], | |
| `recorded_${currentRecordingType}.${extension}`, | |
| { | |
| type: mimeType, | |
| lastModified: Date.now(), | |
| } | |
| ); | |
| // Create URL from the persistent blob | |
| const url = URL.createObjectURL(persistentBlob); | |
| // Get duration from the blob by creating a temporary media element | |
| const tempElement = | |
| currentRecordingType === "video" | |
| ? document.createElement("video") | |
| : document.createElement("audio"); | |
| // Track file upload | |
| const fileType = | |
| currentRecordingType === "video" ? "video" : "audio"; | |
| const fileSizeMB = | |
| Math.round((persistentBlob.size / (1024 * 1024)) * 10) / 10; | |
| trackFileUpload(fileType, fileSizeMB); | |
| // Set all the state - preserve existing media refs! | |
| const {audioRef, videoRef} = get(); | |
| set({ | |
| ...initialState, | |
| audioRef, // Preserve existing audioRef | |
| videoRef, // Preserve existing videoRef | |
| recordedBlob: persistentBlob, // Store the persistent blob | |
| file: file, | |
| mediaUrl: url, | |
| isRecording: false, | |
| recordingType: null, | |
| isVideoFile: currentRecordingType === "video", | |
| hasFile: true, | |
| hasTranscription: false, | |
| showWelcomeModal, | |
| }); | |
| }) | |
| .catch((error) => { | |
| console.error("Failed to create persistent blob:", error); | |
| set({error: "Failed to process recorded media"}); | |
| }); | |
| } else { | |
| set({recordedBlob: blob}); | |
| } | |
| }, | |
| reset: () => { | |
| const {mediaUrl} = get(); | |
| // Clean up media URL | |
| if (mediaUrl) { | |
| URL.revokeObjectURL(mediaUrl); | |
| } | |
| set({ | |
| ...initialState, | |
| isVideoFile: false, | |
| hasFile: false, | |
| hasTranscription: false, | |
| currentSegments: null, | |
| }); | |
| }, | |
| }), | |
| { | |
| name: "transcription-store", // Name for devtools | |
| } | |
| ) | |
| ); | |
| // Export the store hook directly - components should use useTranscriptionStore() | |
| // and destructure what they need directly from the store | |