Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,623 Bytes
b5bcf5a 6a31985 b5bcf5a 6a31985 b5bcf5a 6a31985 45a53c4 67d411a eab0adb 45a53c4 67d411a 45a53c4 67d411a 45a53c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import torch
from huggingface_hub import login
from collections.abc import Iterator
from transformers import Gemma3ForConditionalGeneration, TextIteratorStreamer, Gemma3Processor
import spaces
from threading import Thread
import gradio as gr
import os
from dotenv import load_dotenv, find_dotenv
import cv2
from loguru import logger
from PIL import Image
dotenv_path = find_dotenv()
load_dotenv(dotenv_path)
model_id = os.getenv("MODEL_ID", "google/gemma-3-4b-it")
input_processor = Gemma3Processor.from_pretrained(model_id)
model = Gemma3ForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
attn_implementation="eager",
)
def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, float]]:
frames: list[tuple[Image.Image, float]] = []
capture = cv2.VideoCapture(video_path)
if not capture.isOpened():
raise ValueError(f"Could not open video file: {video_path}")
fps = capture.get(cv2.CAP_PROP_FPS)
total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_interval = max(total_frames // max_images, 1)
for i in range(0, min(total_frames, max_images * frame_interval), frame_interval):
if len(frames) >= max_images:
break
capture.set(cv2.CAP_PROP_POS_FRAMES, i)
success, image = capture.read()
if success:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(image)
timestamp = round(i / fps, 2)
frames.append((pil_image, timestamp))
capture.release()
return frames |