omniasr-transcriptions / frontend /src /stores /transcriptionStore.ts
jeanma's picture
Omnilingual ASR transcription demo
ae238b3 verified
import {create} from "zustand";
import {devtools} from "zustand/middleware";
import debounce from "debounce";
import {
TranscriptionResponse,
PreprocessedAudio,
transcribeAudio,
AlignedSegment,
ServerStatus,
getServerStatus,
HealthResponse,
getServerHealth,
} from "../services/transcriptionApi";
import {generateSRT, downloadVideoWithSubtitles} from "../utils/subtitleUtils";
import {
trackTranscriptionStart,
trackTranscriptionComplete,
trackTranscriptionError,
trackFileUpload,
trackLanguageChange,
trackDownloadVideoWithSubtitles,
} from "../analytics/gaEvents";
// Helper function to find the active segment based on current time
const findActiveSegmentIndex = (
segments: AlignedSegment[],
currentTime: number
): number | null => {
for (let i = 0; i < segments.length; i++) {
const segment = segments[i];
if (currentTime >= segment.start && currentTime <= segment.end) {
return i;
}
}
return null;
};
// Types for our store state
interface TranscriptionState {
// File and media state
file: File | null;
mediaUrl: string | null;
// Recording state
isRecording: boolean;
recordingType: "audio" | "video" | null;
recordedBlob: Blob | null;
// Media refs for seeking (set by components)
audioRef: React.RefObject<HTMLAudioElement> | null;
videoRef: React.RefObject<HTMLVideoElement> | null;
// Transcription state
transcription: TranscriptionResponse | null;
preprocessedAudio: PreprocessedAudio | null;
currentTime: number;
activeSegmentIndex: number | null;
currentSegments: AlignedSegment[] | null;
// Edit state
selectedSegmentIndex: number | null;
// Viewport state for minimap
viewportStart: number;
viewportEnd: number;
// History state for undo/redo
history: AlignedSegment[][];
historyIndex: number;
// Loading and error state
isLoading: boolean;
isProcessingVideo: boolean;
isDownloadingVideo: boolean;
error: string | null;
// Language selection
selectedLanguage: string | null;
selectedScript: string | null;
// Server status
serverStatus: ServerStatus | null;
serverHealth: HealthResponse | null;
isPollingStatus: boolean;
statusPollingInterval: number | null;
// Modal state
showWelcomeModal: boolean;
// Computed properties
isVideoFile: boolean;
hasFile: boolean;
hasTranscription: boolean;
// Actions
setFile: (file: File | null) => void;
setTranscription: (transcription: TranscriptionResponse | null) => void;
// Recording actions
startRecording: (type: "audio" | "video") => void;
stopRecording: () => void;
setRecordedBlob: (blob: Blob | null) => void;
setCurrentTime: (time: number) => void;
setActiveSegmentIndex: (index: number | null) => void;
setIsLoading: (loading: boolean) => void;
setIsProcessingVideo: (processing: boolean) => void;
setIsDownloadingVideo: (downloading: boolean) => void;
setError: (error: string | null) => void;
setSelectedLanguage: (language: string | null) => void;
setSelectedScript: (script: string | null) => void;
setSelectedLanguageAndScript: (
language: string | null,
script: string | null
) => void;
// Modal actions
setShowWelcomeModal: (show: boolean) => void;
// Media control actions
setMediaRefs: (
audioRef: React.RefObject<HTMLAudioElement>,
videoRef: React.RefObject<HTMLVideoElement>
) => void;
seekToTime: (time: number) => void;
// Server status actions
setServerStatus: (status: ServerStatus | null) => void;
setServerHealth: (health: HealthResponse | null) => void;
fetchServerStatus: () => Promise<void>;
fetchServerHealth: () => Promise<void>;
startStatusPolling: () => void;
stopStatusPolling: () => void;
// Edit actions
setSelectedSegmentIndex: (index: number | null) => void;
updateSegmentTiming: (
index: number,
start: number,
end: number,
deferSorting?: boolean
) => void;
updateSegmentText: (index: number, text: string) => void;
deleteSegment: (index: number) => void;
mergeSegmentsByProximity: (maxDurationSeconds: number) => void;
finalizeSegmentPositioning: () => void;
// Viewport actions
setViewport: (start: number, end: number) => void;
initializeViewport: (duration: number) => void;
// History actions
undo: () => void;
redo: () => void;
canUndo: boolean;
canRedo: boolean;
// Helper functions
_recordHistoryImmediate: (segments: AlignedSegment[]) => void;
_recordHistoryDebounced: (segments: AlignedSegment[]) => void;
// Complex actions
handleFileSelect: (file: File) => void;
handleTranscribe: () => Promise<void>;
handleTimeUpdate: () => void;
handleDownloadVideoWithSubtitles: () => Promise<void>;
reset: () => void;
}
// Initial state
const initialState = {
file: null,
mediaUrl: null,
audioRef: null,
videoRef: null,
// Recording state
isRecording: false,
recordingType: null,
recordedBlob: null,
transcription: null,
preprocessedAudio: null,
currentTime: 0,
activeSegmentIndex: null,
selectedSegmentIndex: null,
history: [],
historyIndex: -1,
isLoading: false,
isProcessingVideo: false,
isDownloadingVideo: false,
error: null,
selectedLanguage: null,
selectedScript: null,
currentSegments: null,
viewportStart: 0,
viewportEnd: 30, // Default to first 30 seconds
showWelcomeModal: true, // Show modal on app load
};
export const useTranscriptionStore = create<TranscriptionState>()(
devtools(
(set, get) => ({
...initialState,
// Server status state
serverStatus: null,
serverHealth: null,
isPollingStatus: false,
statusPollingInterval: null,
// Computed properties - these will be updated when relevant state changes
isVideoFile: false,
hasFile: false,
hasTranscription: false,
canUndo: false,
canRedo: false,
// Simple setters
setFile: (file) => {
const {mediaUrl, showWelcomeModal} = get();
// Clean up previous media URL
if (mediaUrl) {
URL.revokeObjectURL(mediaUrl);
}
set({
...initialState,
// Override only file-specific properties
file,
mediaUrl: file ? URL.createObjectURL(file) : null,
isVideoFile: file?.type.startsWith("video/") ?? false,
hasFile: !!file,
hasTranscription: false,
// Preserve the modal state - don't reset it
showWelcomeModal,
});
},
setTranscription: (transcription) => {
set({
transcription,
preprocessedAudio: transcription?.preprocessed_audio || null,
hasTranscription: !!transcription,
currentSegments: transcription?.aligned_segments || null,
});
// Initialize history when transcription is first set
if (transcription && transcription.aligned_segments) {
const segments = [...transcription.aligned_segments];
set({
history: [segments],
historyIndex: 0,
canUndo: false,
canRedo: false,
});
}
},
handleTimeUpdate: () => {
const {audioRef, videoRef, transcription} = get();
const mediaElement = audioRef?.current || videoRef?.current;
if (mediaElement && transcription) {
const currentTime = mediaElement.currentTime;
const activeIndex = findActiveSegmentIndex(
transcription.aligned_segments,
currentTime
);
set({
currentTime,
activeSegmentIndex: activeIndex,
});
}
},
setCurrentTime: (currentTime) => set({currentTime}),
setActiveSegmentIndex: (activeSegmentIndex) => set({activeSegmentIndex}),
setIsLoading: (isLoading) => set({isLoading}),
setIsProcessingVideo: (isProcessingVideo) => set({isProcessingVideo}),
setIsDownloadingVideo: (isDownloadingVideo) => set({isDownloadingVideo}),
setError: (error) => set({error}),
setSelectedLanguage: (selectedLanguage) => {
// Track language selection
if (selectedLanguage) {
trackLanguageChange(selectedLanguage);
}
set({selectedLanguage});
},
setSelectedScript: (selectedScript) => set({selectedScript}),
setSelectedLanguageAndScript: (selectedLanguage, selectedScript) => {
// Track language selection
if (selectedLanguage) {
trackLanguageChange(selectedLanguage);
}
set({selectedLanguage, selectedScript});
},
// Modal actions
setShowWelcomeModal: (showWelcomeModal) => set({showWelcomeModal}),
// Media control actions
setMediaRefs: (audioRef, videoRef) => set({audioRef, videoRef}),
seekToTime: (time) => {
const {audioRef, videoRef} = get();
const mediaElement = audioRef?.current || videoRef?.current;
if (mediaElement) {
const seekTime = Math.max(
0,
Math.min(time, mediaElement.duration || Infinity)
);
mediaElement.currentTime = seekTime;
// Immediately update current time to trigger auto-scroll
set({currentTime: seekTime});
}
},
// Server status actions
setServerStatus: (serverStatus) => set({serverStatus}),
setServerHealth: (serverHealth) => set({serverHealth}),
fetchServerStatus: async () => {
try {
const status = await getServerStatus();
set({serverStatus: status});
} catch (error) {
console.error("Failed to fetch server status:", error);
}
},
fetchServerHealth: async () => {
try {
const health = await getServerHealth();
set({serverHealth: health});
} catch (error) {
console.error("Failed to fetch server health:", error);
}
},
startStatusPolling: () => {
const {isPollingStatus, statusPollingInterval} = get();
if (isPollingStatus) {
return; // Already polling
}
// Clear any existing interval
if (statusPollingInterval) {
clearInterval(statusPollingInterval);
}
const {fetchServerStatus} = get();
// Fetch immediately
fetchServerStatus();
// Then poll every 2 seconds
const interval = setInterval(() => {
fetchServerStatus();
}, 2000);
set({
isPollingStatus: true,
statusPollingInterval: interval,
});
},
stopStatusPolling: () => {
const {statusPollingInterval} = get();
if (statusPollingInterval) {
clearInterval(statusPollingInterval);
}
set({
isPollingStatus: false,
statusPollingInterval: null,
});
},
// Helper function to record history immediately (for instant actions like delete)
_recordHistoryImmediate: (segments: AlignedSegment[]) => {
const {history, historyIndex} = get();
// Remove any history after current index (when we're not at the end)
const newHistory = history.slice(0, historyIndex + 1);
// Add new state to history
newHistory.push([...segments]);
// Limit history size to prevent memory issues (keep last 50 states)
const maxHistorySize = 50;
const newIndex = newHistory.length - 1;
if (newHistory.length > maxHistorySize) {
newHistory.shift();
const adjustedIndex = newIndex - 1;
set({
history: newHistory,
historyIndex: adjustedIndex,
canUndo: adjustedIndex > 0,
canRedo: false, // Always false when adding new history
});
} else {
set({
history: newHistory,
historyIndex: newIndex,
canUndo: newIndex > 0,
canRedo: false, // Always false when adding new history
});
}
},
// Debounced history recording method
_recordHistoryDebounced: debounce((segments: AlignedSegment[]) => {
const {_recordHistoryImmediate} = get();
_recordHistoryImmediate(segments);
}, 500),
// Edit mode actions
// Initialize history for undo/redo (called automatically when transcription is set)
initializeHistory: () => {
const {transcription, history} = get();
if (!transcription || history.length > 0) return;
const segments = [...transcription.aligned_segments];
set({
history: [segments],
historyIndex: 0,
canUndo: false,
canRedo: false,
});
},
setSelectedSegmentIndex: (selectedSegmentIndex) => {
set({selectedSegmentIndex});
},
updateSegmentTiming: (
index: number,
start: number,
end: number,
deferSorting: boolean = false
) => {
const {
currentSegments,
transcription,
selectedSegmentIndex,
_recordHistoryDebounced,
} = get();
if (
!currentSegments ||
!transcription ||
index < 0 ||
index >= currentSegments.length
)
return;
const updatedSegments = [...currentSegments];
updatedSegments[index] = {
...updatedSegments[index],
start,
end,
duration: end - start,
};
// If deferSorting is true (during drag operations), just update without re-sorting
if (deferSorting) {
// Update both transcription and current segments without re-sorting
const updatedTranscription = {
...transcription,
aligned_segments: updatedSegments,
};
set({
transcription: updatedTranscription,
currentSegments: updatedSegments,
});
// Don't record history during intermediate drag updates
return;
}
// Normal operation: re-sort segments by start time to maintain chronological order
const sortedSegments = [...updatedSegments].sort(
(a, b) => a.start - b.start
);
// Find the new index of the moved segment after sorting
const movedSegment = updatedSegments[index];
const newIndex = sortedSegments.findIndex(
(seg) =>
seg.start === movedSegment.start &&
seg.end === movedSegment.end &&
seg.text === movedSegment.text
);
// Update selected segment index if it was the one being moved
let newSelectedIndex = selectedSegmentIndex;
if (selectedSegmentIndex === index) {
newSelectedIndex = newIndex;
} else if (selectedSegmentIndex !== null) {
// Find where the currently selected segment ended up after sorting
const selectedSegment = updatedSegments[selectedSegmentIndex];
newSelectedIndex = sortedSegments.findIndex(
(seg) =>
seg.start === selectedSegment.start &&
seg.end === selectedSegment.end &&
seg.text === selectedSegment.text
);
}
// Update both transcription and current segments
const updatedTranscription = {
...transcription,
aligned_segments: sortedSegments,
};
set({
transcription: updatedTranscription,
currentSegments: sortedSegments,
selectedSegmentIndex: newSelectedIndex,
});
// Record history with debounce for drag operations
_recordHistoryDebounced(sortedSegments);
},
// New method to finalize segment positioning after drag operations
finalizeSegmentPositioning: () => {
const {currentSegments, transcription, selectedSegmentIndex} = get();
if (!currentSegments || !transcription) return;
// Re-sort segments by start time
const sortedSegments = [...currentSegments].sort(
(a, b) => a.start - b.start
);
// Update selected segment index to reflect new position
let newSelectedIndex = selectedSegmentIndex;
if (selectedSegmentIndex !== null) {
const selectedSegment = currentSegments[selectedSegmentIndex];
newSelectedIndex = sortedSegments.findIndex(
(seg) =>
seg.start === selectedSegment.start &&
seg.end === selectedSegment.end &&
seg.text === selectedSegment.text
);
}
// Update both transcription and current segments
const updatedTranscription = {
...transcription,
aligned_segments: sortedSegments,
};
set({
transcription: updatedTranscription,
currentSegments: sortedSegments,
selectedSegmentIndex: newSelectedIndex,
});
},
updateSegmentText: (index: number, text: string) => {
const {currentSegments, transcription, _recordHistoryDebounced} = get();
if (
!currentSegments ||
!transcription ||
index < 0 ||
index >= currentSegments.length
)
return;
const updatedSegments = [...currentSegments];
updatedSegments[index] = {
...updatedSegments[index],
text,
};
// Update both transcription and current segments
const updatedTranscription = {
...transcription,
aligned_segments: updatedSegments,
};
set({
transcription: updatedTranscription,
currentSegments: updatedSegments,
});
// Record history with debounce for text changes
_recordHistoryDebounced(updatedSegments);
},
deleteSegment: (index: number) => {
const {
currentSegments,
transcription,
selectedSegmentIndex,
_recordHistoryImmediate,
} = get();
if (
!currentSegments ||
!transcription ||
index < 0 ||
index >= currentSegments.length
)
return;
const updatedSegments = currentSegments.filter(
(_: AlignedSegment, i: number) => i !== index
);
// Adjust selected segment index if necessary
let newSelectedIndex = selectedSegmentIndex;
if (selectedSegmentIndex === index) {
newSelectedIndex = null; // Clear selection if we deleted the selected segment
} else if (
selectedSegmentIndex !== null &&
selectedSegmentIndex > index
) {
newSelectedIndex = selectedSegmentIndex - 1; // Adjust index if selected segment was after deleted one
}
// Update both transcription and current segments
const updatedTranscription = {
...transcription,
aligned_segments: updatedSegments,
};
set({
transcription: updatedTranscription,
currentSegments: updatedSegments,
selectedSegmentIndex: newSelectedIndex,
});
// Record history immediately for deletions since they're instant actions
_recordHistoryImmediate(updatedSegments);
},
mergeSegmentsByProximity: (maxDurationSeconds: number) => {
const {
transcription,
currentSegments,
history,
_recordHistoryDebounced,
} = get();
if (!transcription) return;
console.log(`Merge threshold changed to: ${maxDurationSeconds}s`);
// Always use current segments - this is the source of truth for user's changes
if (!currentSegments) {
console.warn("No currentSegments available for merging");
return;
}
// const originalSegments = history.length > 0 ? [...history[0]] : [...transcription.aligned_segments];
// // If threshold is 0, reset to original segments (clear all merge history)
// if (maxDurationSeconds === 0) {
// console.log(`Resetting to original ${originalSegments.length} segments`);
// // Strip merge history from original segments
// const cleanedOriginals = originalSegments.map(segment => ({
// ...segment,
// mergedFrom: undefined,
// mergeThreshold: undefined,
// }));
// const updatedTranscription = {
// ...transcription,
// aligned_segments: cleanedOriginals,
// };
// set({
// transcription: updatedTranscription,
// currentSegments: cleanedOriginals,
// selectedSegmentIndex: null,
// });
// return;
// }
// Step 1: Intelligently split segments that were merged at higher thresholds
const splitSegmentsRecursively = (
segment: AlignedSegment
): AlignedSegment[] => {
// If this segment has no merge history or was merged at/below current threshold, keep it
if (
!segment.mergedFrom ||
!segment.mergeThreshold ||
segment.mergeThreshold <= maxDurationSeconds
) {
return [segment];
}
// This segment was merged above the current threshold, split it
console.log(
`Splitting segment merged at ${segment.mergeThreshold}s (current threshold: ${maxDurationSeconds}s)`
);
// Calculate the relative positions of constituents within the current segment's timing
const currentStart = segment.start;
const currentEnd = segment.end;
const currentDuration = currentEnd - currentStart;
// Get the original combined duration when constituents were first merged
const constituents = segment.mergedFrom;
const originalTotalDuration = constituents.reduce(
(sum, c) => sum + c.duration,
0
);
// Position each constituent relative to current segment position
let cumulativeTime = 0;
const repositionedConstituents = constituents
.map((constituent) => {
const relativeStart = cumulativeTime / originalTotalDuration;
const relativeEnd =
(cumulativeTime + constituent.duration) / originalTotalDuration;
const newStart = currentStart + relativeStart * currentDuration;
const newEnd = currentStart + relativeEnd * currentDuration;
cumulativeTime += constituent.duration;
const repositioned: AlignedSegment = {
...constituent,
start: newStart,
end: newEnd,
duration: newEnd - newStart,
};
// Recursively split this constituent if needed
return splitSegmentsRecursively(repositioned);
})
.flat();
return repositionedConstituents;
};
let segments = currentSegments.flatMap((segment) =>
splitSegmentsRecursively(segment)
);
console.log(`After splitting: ${segments.length} segments`);
// Step 2: Merge segments that can be merged at the current threshold
const originalCount = segments.length;
let merged = true;
while (merged && segments.length > 1) {
merged = false;
let closestDistance = Infinity;
let closestPair = -1;
// Find the closest pair of adjacent segments that can be merged
for (let i = 0; i < segments.length - 1; i++) {
const segment1 = segments[i];
const segment2 = segments[i + 1];
const gap = segment2.start - segment1.end;
// Calculate what the duration would be if we merged these segments
const mergedDuration = segment2.end - segment1.start;
// Only consider this pair if the merged duration wouldn't exceed the threshold
if (mergedDuration <= maxDurationSeconds && gap < closestDistance) {
closestDistance = gap;
closestPair = i;
}
}
// Merge the closest pair if found
if (closestPair !== -1) {
const segment1 = segments[closestPair];
const segment2 = segments[closestPair + 1];
// Collect all constituent segments (handle nested merges)
const getAllConstituents = (
segment: AlignedSegment
): AlignedSegment[] => {
if (segment.mergedFrom) {
return segment.mergedFrom.flatMap(getAllConstituents);
}
// Return atomic segment without merge history
return [
{
start: segment.start,
end: segment.end,
duration: segment.duration,
text: segment.text,
chunk_index: segment.chunk_index,
speech_segment_index: segment.speech_segment_index,
},
];
};
const constituents1 = getAllConstituents(segment1);
const constituents2 = getAllConstituents(segment2);
const allConstituents = [...constituents1, ...constituents2];
const mergedSegment: AlignedSegment = {
start: segment1.start,
end: segment2.end,
duration: segment2.end - segment1.start,
text: `${segment1.text} ${segment2.text}`,
chunk_index: segment1.chunk_index,
speech_segment_index: segment1.speech_segment_index,
mergedFrom: allConstituents,
mergeThreshold: maxDurationSeconds,
};
segments = [
...segments.slice(0, closestPair),
mergedSegment,
...segments.slice(closestPair + 2),
];
merged = true;
console.log(
`Merged segments: "${segment1.text}" + "${segment2.text}"`
);
}
}
console.log(
`Final result: ${originalCount}${segments.length} segments`
);
// Update both transcription and current segments
const updatedTranscription = {
...transcription,
aligned_segments: segments,
};
set({
transcription: updatedTranscription,
currentSegments: segments,
selectedSegmentIndex: null,
});
// Record history with debounce for merge slider changes
_recordHistoryDebounced(segments);
},
// Viewport actions
setViewport: (start: number, end: number) => {
set({
viewportStart: start,
viewportEnd: end,
});
},
initializeViewport: (duration: number) => {
const FIXED_VIEWPORT_DURATION = 30; // Fixed viewport window is always 30 seconds
const viewportDuration = Math.min(FIXED_VIEWPORT_DURATION, duration);
set({
viewportStart: 0,
viewportEnd: viewportDuration,
});
},
// History actions
undo: () => {
const {history, historyIndex, transcription} = get();
if (historyIndex > 0) {
const newIndex = historyIndex - 1;
const segments = history[newIndex];
// Update both transcription and current segments
const updatedTranscription = {
...transcription!,
aligned_segments: [...segments],
};
set({
transcription: updatedTranscription,
currentSegments: [...segments],
historyIndex: newIndex,
selectedSegmentIndex: null,
canUndo: newIndex > 0,
canRedo: newIndex < history.length - 1,
});
}
},
redo: () => {
const {history, historyIndex, transcription} = get();
if (historyIndex < history.length - 1) {
const newIndex = historyIndex + 1;
const segments = history[newIndex];
// Update both transcription and current segments
const updatedTranscription = {
...transcription!,
aligned_segments: [...segments],
};
set({
transcription: updatedTranscription,
currentSegments: [...segments],
historyIndex: newIndex,
selectedSegmentIndex: null,
canUndo: newIndex > 0,
canRedo: newIndex < history.length - 1,
});
}
},
// Complex actions
handleFileSelect: (selectedFile: File) => {
// Reject video files - only allow audio
if (selectedFile.type.startsWith("video/")) {
set({
error:
"Video files are not supported. Please upload an audio file only.",
});
return;
}
// Reject non-audio files
if (!selectedFile.type.startsWith("audio/")) {
set({
error: "Invalid file type. Please upload an audio file.",
});
return;
}
const {mediaUrl} = get();
// Clean up previous media URL
if (mediaUrl) {
URL.revokeObjectURL(mediaUrl);
}
// Create new object URL for media playback
const url = URL.createObjectURL(selectedFile);
// Track file upload
const fileType = "audio";
const fileSizeMB =
Math.round((selectedFile.size / (1024 * 1024)) * 10) / 10; // Round to 1 decimal
trackFileUpload(fileType, fileSizeMB);
set({
file: selectedFile,
mediaUrl: url,
transcription: null,
currentTime: 0,
activeSegmentIndex: null,
error: null,
isVideoFile: false,
hasFile: true,
hasTranscription: false,
currentSegments: null,
});
},
handleTranscribe: async () => {
const {file, selectedLanguage, selectedScript, setTranscription} =
get();
if (!file) return;
set({isLoading: true, error: null});
// Track transcription start
if (selectedLanguage) {
trackTranscriptionStart(selectedLanguage);
}
const startTime = Date.now();
try {
const result = await transcribeAudio(
file,
selectedLanguage,
selectedScript,
(isProcessing) => set({isProcessingVideo: isProcessing})
);
// Track transcription completion
if (selectedLanguage) {
const duration = Math.round((Date.now() - startTime) / 1000); // Duration in seconds
trackTranscriptionComplete(selectedLanguage, duration);
}
// Use setTranscription to properly initialize history
setTranscription(result);
} catch (err) {
console.error("Transcription error:", err);
// Track transcription error
if (selectedLanguage) {
const errorMessage =
err instanceof Error ? err.message : "Unknown error";
trackTranscriptionError(selectedLanguage, errorMessage);
}
set({
error:
err instanceof Error ? err.message : "An unknown error occurred",
});
} finally {
set({isLoading: false, isProcessingVideo: false});
}
},
handleDownloadVideoWithSubtitles: async () => {
const {
file,
transcription,
selectedLanguage,
setIsDownloadingVideo,
setError,
} = get();
if (!file || !transcription) return;
setIsDownloadingVideo(true);
try {
const srtContent = generateSRT(transcription.aligned_segments);
const filename = file.name.replace(
/\.[^/.]+$/,
"_with_subtitles.mp4"
);
// Pass the selected language or fallback to 'eng'
const language = selectedLanguage || "eng";
await downloadVideoWithSubtitles(
file,
srtContent,
filename,
language,
"srt",
"mp4"
);
// Track video with subtitles download
if (selectedLanguage) {
trackDownloadVideoWithSubtitles(selectedLanguage);
}
} catch (err) {
console.error("Error creating video with subtitles:", err);
setError("Failed to create video with subtitles");
} finally {
setIsDownloadingVideo(false);
}
},
// Recording actions
startRecording: (type: "audio" | "video") => {
set({
isRecording: true,
recordingType: type,
recordedBlob: null,
});
},
stopRecording: () => {
set({
isRecording: false,
recordingType: null,
});
},
setRecordedBlob: (blob: Blob | null) => {
if (blob) {
// Store the current recording type before it gets cleared
const currentRecordingType = get().recordingType;
// Convert blob to ArrayBuffer first, then back to Blob to ensure data persistence
blob
.arrayBuffer()
.then((arrayBuffer) => {
const {mediaUrl: currentUrl, showWelcomeModal} = get();
// Clean up previous media URL
if (currentUrl) {
URL.revokeObjectURL(currentUrl);
}
const extension =
currentRecordingType === "video" ? "webm" : "webm";
const mimeType =
currentRecordingType === "video" ? "video/webm" : "audio/webm";
// Create a new blob from the ArrayBuffer to ensure data persistence
const persistentBlob = new Blob([arrayBuffer], {type: mimeType});
// Convert to File object
const file = new File(
[persistentBlob],
`recorded_${currentRecordingType}.${extension}`,
{
type: mimeType,
lastModified: Date.now(),
}
);
// Create URL from the persistent blob
const url = URL.createObjectURL(persistentBlob);
// Get duration from the blob by creating a temporary media element
const tempElement =
currentRecordingType === "video"
? document.createElement("video")
: document.createElement("audio");
// Track file upload
const fileType =
currentRecordingType === "video" ? "video" : "audio";
const fileSizeMB =
Math.round((persistentBlob.size / (1024 * 1024)) * 10) / 10;
trackFileUpload(fileType, fileSizeMB);
// Set all the state - preserve existing media refs!
const {audioRef, videoRef} = get();
set({
...initialState,
audioRef, // Preserve existing audioRef
videoRef, // Preserve existing videoRef
recordedBlob: persistentBlob, // Store the persistent blob
file: file,
mediaUrl: url,
isRecording: false,
recordingType: null,
isVideoFile: currentRecordingType === "video",
hasFile: true,
hasTranscription: false,
showWelcomeModal,
});
})
.catch((error) => {
console.error("Failed to create persistent blob:", error);
set({error: "Failed to process recorded media"});
});
} else {
set({recordedBlob: blob});
}
},
reset: () => {
const {mediaUrl} = get();
// Clean up media URL
if (mediaUrl) {
URL.revokeObjectURL(mediaUrl);
}
set({
...initialState,
isVideoFile: false,
hasFile: false,
hasTranscription: false,
currentSegments: null,
});
},
}),
{
name: "transcription-store", // Name for devtools
}
)
);
// Export the store hook directly - components should use useTranscriptionStore()
// and destructure what they need directly from the store