Update app.py
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ moondream = AutoModelForCausalLM.from_pretrained(
|
|
| 16 |
device_map={"": "cuda"},
|
| 17 |
)
|
| 18 |
moondream.compile()
|
| 19 |
-
|
| 20 |
|
| 21 |
|
| 22 |
"""
|
|
@@ -50,7 +50,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 50 |
@spaces.GPU(durtion="150")
|
| 51 |
def answer_questions(image_tuples, prompt_text):
|
| 52 |
# Encode image once
|
| 53 |
-
image =
|
| 54 |
encoded = moondream.encode_image(image)
|
| 55 |
|
| 56 |
# Reuse the encoding for multiple queries
|
|
@@ -61,30 +61,31 @@ def answer_questions(image_tuples, prompt_text):
|
|
| 61 |
]
|
| 62 |
|
| 63 |
for q in questions:
|
| 64 |
-
|
| 65 |
print(f"Q: {q}")
|
| 66 |
-
print(f"A: {
|
| 67 |
|
| 68 |
# Also works with other skills
|
| 69 |
caption = moondream.caption(encoded, length="normal")
|
| 70 |
objects = moondream.detect(encoded, "poop")
|
| 71 |
pointe = moondream.point(encoded, "grass")
|
| 72 |
-
print(f"caption: {
|
| 73 |
|
| 74 |
# Segment an object
|
| 75 |
-
|
| 76 |
-
svg_path =
|
| 77 |
-
bbox =
|
| 78 |
|
| 79 |
print(f"SVG Path: {svg_path[:100]}...")
|
| 80 |
print(f"Bounding box: {bbox}")
|
| 81 |
|
| 82 |
# With spatial hint (point) to guide segmentation
|
| 83 |
-
|
| 84 |
-
|
| 85 |
# With spatial hint (bounding box)
|
| 86 |
-
|
| 87 |
-
|
|
|
|
| 88 |
result = ""
|
| 89 |
Q_and_A = ""
|
| 90 |
prompts = [p.strip() for p in prompt_text.split('?')]
|
|
@@ -106,7 +107,7 @@ def answer_questions(image_tuples, prompt_text):
|
|
| 106 |
Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
|
| 107 |
|
| 108 |
result = {'headers': prompts, 'data': answers}
|
| 109 |
-
|
| 110 |
return Q_and_A, result
|
| 111 |
|
| 112 |
"""
|
|
|
|
| 16 |
device_map={"": "cuda"},
|
| 17 |
)
|
| 18 |
moondream.compile()
|
| 19 |
+
tokenizer = AutoTokenizer.from_pretrained("moondream/moondream3-preview")
|
| 20 |
|
| 21 |
|
| 22 |
"""
|
|
|
|
| 50 |
@spaces.GPU(durtion="150")
|
| 51 |
def answer_questions(image_tuples, prompt_text):
|
| 52 |
# Encode image once
|
| 53 |
+
image = [img[0] for img in image_tuples if img[0] is not none]
|
| 54 |
encoded = moondream.encode_image(image)
|
| 55 |
|
| 56 |
# Reuse the encoding for multiple queries
|
|
|
|
| 61 |
]
|
| 62 |
|
| 63 |
for q in questions:
|
| 64 |
+
result1 = moondream.query(image=encoded, question=q, reasoning=False)
|
| 65 |
print(f"Q: {q}")
|
| 66 |
+
print(f"A: {result1['answer']}\n")
|
| 67 |
|
| 68 |
# Also works with other skills
|
| 69 |
caption = moondream.caption(encoded, length="normal")
|
| 70 |
objects = moondream.detect(encoded, "poop")
|
| 71 |
pointe = moondream.point(encoded, "grass")
|
| 72 |
+
print(f"caption: {caption}, objects:{objects}, point:{pointe}")
|
| 73 |
|
| 74 |
# Segment an object
|
| 75 |
+
result2 = moondream.segment(image, "cat")
|
| 76 |
+
svg_path = result2["path"]
|
| 77 |
+
bbox = result2["bbox"]
|
| 78 |
|
| 79 |
print(f"SVG Path: {svg_path[:100]}...")
|
| 80 |
print(f"Bounding box: {bbox}")
|
| 81 |
|
| 82 |
# With spatial hint (point) to guide segmentation
|
| 83 |
+
result3 = model.segment(image, "cat", spatial_refs=[[0.5, 0.3]])
|
| 84 |
+
print(result1)
|
| 85 |
# With spatial hint (bounding box)
|
| 86 |
+
result3 = model.segment(image, "cat", spatial_refs=[[0.2, 0.1, 0.8, 0.9]])
|
| 87 |
+
print(result3)
|
| 88 |
+
|
| 89 |
result = ""
|
| 90 |
Q_and_A = ""
|
| 91 |
prompts = [p.strip() for p in prompt_text.split('?')]
|
|
|
|
| 107 |
Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
|
| 108 |
|
| 109 |
result = {'headers': prompts, 'data': answers}
|
| 110 |
+
print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
|
| 111 |
return Q_and_A, result
|
| 112 |
|
| 113 |
"""
|