Spaces:
Running
Running
| import { useState, useRef, useEffect, useCallback } from "react"; | |
| import WebcamCapture from "./WebcamCapture"; | |
| import PromptInput from "./PromptInput"; | |
| import LiveCaption, { type HistoryEntry } from "./LiveCaption"; | |
| import { useVLMContext } from "../context/useVLMContext"; | |
| import { PROMPTS, TIMING } from "../constants"; | |
| interface CaptioningViewProps { | |
| videoRef: React.RefObject<HTMLVideoElement | null>; | |
| } | |
| function useCaptioningLoop( | |
| videoRef: React.RefObject<HTMLVideoElement | null>, | |
| isRunning: boolean, | |
| promptRef: React.RefObject<string>, | |
| onCaptionUpdate: (caption: string) => void, | |
| onError: (error: string) => void, | |
| onGenerationComplete: (caption: string) => void, | |
| onStatsUpdate: (stats: { tps?: number; ttft?: number }) => void, | |
| ) { | |
| const { isLoaded, runInference } = useVLMContext(); | |
| const abortControllerRef = useRef<AbortController | null>(null); | |
| const onCaptionUpdateRef = useRef(onCaptionUpdate); | |
| const onErrorRef = useRef(onError); | |
| const onGenerationCompleteRef = useRef(onGenerationComplete); | |
| const onStatsUpdateRef = useRef(onStatsUpdate); | |
| useEffect(() => { | |
| onCaptionUpdateRef.current = onCaptionUpdate; | |
| }, [onCaptionUpdate]); | |
| useEffect(() => { | |
| onErrorRef.current = onError; | |
| }, [onError]); | |
| useEffect(() => { | |
| onGenerationCompleteRef.current = onGenerationComplete; | |
| }, [onGenerationComplete]); | |
| useEffect(() => { | |
| onStatsUpdateRef.current = onStatsUpdate; | |
| }, [onStatsUpdate]); | |
| useEffect(() => { | |
| abortControllerRef.current?.abort(); | |
| if (!isRunning || !isLoaded) return; | |
| abortControllerRef.current = new AbortController(); | |
| const signal = abortControllerRef.current.signal; | |
| const video = videoRef.current; | |
| const captureLoop = async () => { | |
| while (!signal.aborted) { | |
| if ( | |
| video && | |
| video.readyState >= 2 && | |
| !video.paused && | |
| video.videoWidth > 0 | |
| ) { | |
| try { | |
| const currentPrompt = promptRef.current || ""; | |
| const result = await runInference( | |
| video, | |
| currentPrompt, | |
| onCaptionUpdateRef.current, | |
| (stats) => onStatsUpdateRef.current(stats), | |
| ); | |
| if (result && !signal.aborted) { | |
| onCaptionUpdateRef.current(result); | |
| onGenerationCompleteRef.current(result); | |
| } | |
| } catch (error) { | |
| if (!signal.aborted) { | |
| const message = | |
| error instanceof Error ? error.message : String(error); | |
| onErrorRef.current(message); | |
| console.error("Error processing frame:", error); | |
| } | |
| } | |
| } | |
| if (signal.aborted) break; | |
| await new Promise((resolve) => | |
| setTimeout(resolve, TIMING.FRAME_CAPTURE_DELAY), | |
| ); | |
| } | |
| }; | |
| // NB: Wrap with a setTimeout to ensure abort controller can run before starting the loop | |
| // This is necessary for React's strict mode which calls effects twice in development. | |
| setTimeout(captureLoop, 0); | |
| return () => { | |
| abortControllerRef.current?.abort(); | |
| }; | |
| }, [isRunning, isLoaded, runInference, promptRef, videoRef]); | |
| } | |
| export default function CaptioningView({ videoRef }: CaptioningViewProps) { | |
| const { imageSize, setImageSize } = useVLMContext(); | |
| const [caption, setCaption] = useState<string>(""); | |
| const [isLoopRunning, setIsLoopRunning] = useState<boolean>(true); | |
| const [currentPrompt, setCurrentPrompt] = useState<string>(PROMPTS.default); | |
| const [error, setError] = useState<string | null>(null); | |
| const [history, setHistory] = useState<HistoryEntry[]>([]); | |
| const [stats, setStats] = useState<{ tps?: number; ttft?: number }>({}); | |
| // Use ref to store current prompt to avoid loop restarts | |
| const promptRef = useRef<string>(currentPrompt); | |
| // Update prompt ref when state changes | |
| useEffect(() => { | |
| promptRef.current = currentPrompt; | |
| }, [currentPrompt]); | |
| const handleCaptionUpdate = useCallback((newCaption: string) => { | |
| setCaption(newCaption); | |
| setError(null); | |
| }, []); | |
| const handleError = useCallback((errorMessage: string) => { | |
| setError(errorMessage); | |
| setCaption(`Error: ${errorMessage}`); | |
| }, []); | |
| const handleGenerationComplete = useCallback((text: string) => { | |
| const now = new Date(); | |
| const timeString = now.toLocaleTimeString("en-US", { | |
| hour12: false, | |
| hour: "2-digit", | |
| minute: "2-digit", | |
| second: "2-digit", | |
| }); | |
| setHistory((prev) => | |
| [ | |
| { | |
| timestamp: timeString, | |
| text: text, | |
| }, | |
| ...prev, | |
| ].slice(0, 50), | |
| ); | |
| }, []); | |
| const handleStatsUpdate = useCallback( | |
| (newStats: { tps?: number; ttft?: number }) => { | |
| setStats((prev) => ({ ...prev, ...newStats })); | |
| }, | |
| [], | |
| ); | |
| useCaptioningLoop( | |
| videoRef, | |
| isLoopRunning, | |
| promptRef, | |
| handleCaptionUpdate, | |
| handleError, | |
| handleGenerationComplete, | |
| handleStatsUpdate, | |
| ); | |
| const handlePromptChange = useCallback((prompt: string) => { | |
| setCurrentPrompt(prompt); | |
| setError(null); | |
| }, []); | |
| const handleToggleLoop = useCallback(() => { | |
| setIsLoopRunning((prev) => !prev); | |
| if (error) setError(null); | |
| }, [error]); | |
| return ( | |
| <div className="absolute inset-0 text-white"> | |
| <div className="relative w-full h-full"> | |
| <WebcamCapture | |
| isRunning={isLoopRunning} | |
| onToggleRunning={handleToggleLoop} | |
| error={error} | |
| imageSize={imageSize} | |
| onImageSizeChange={setImageSize} | |
| /> | |
| {/* Prompt Input - Bottom Left */} | |
| <div className="absolute bottom-5 left-5 z-30 w-[540px]"> | |
| <PromptInput onPromptChange={handlePromptChange} /> | |
| </div> | |
| {/* Live Caption - Bottom Right */} | |
| <div className="absolute bottom-5 right-5 z-30 w-[720px]"> | |
| <LiveCaption | |
| caption={caption} | |
| isRunning={isLoopRunning} | |
| error={error} | |
| history={history} | |
| stats={stats} | |
| /> | |
| </div> | |
| </div> | |
| </div> | |
| ); | |
| } | |