Spaces:

Csplk
/

moondream2-batch-processing

Paused

App Files Files Community

Csplk commited on 18 days ago

Commit

3a2d213

verified ·

1 Parent(s): 0741da1

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -13

app.py CHANGED Viewed

@@ -16,7 +16,7 @@ moondream = AutoModelForCausalLM.from_pretrained(
     device_map={"": "cuda"},
 )
 moondream.compile()
 """
@@ -50,7 +50,7 @@ model = AutoModelForCausalLM.from_pretrained(
 @spaces.GPU(durtion="150")
 def answer_questions(image_tuples, prompt_text):
     # Encode image once
-    image = Image.open("complex_scene.jpg")
     encoded = moondream.encode_image(image)
     # Reuse the encoding for multiple queries
@@ -61,30 +61,31 @@ def answer_questions(image_tuples, prompt_text):
     ]
     for q in questions:
-        result = moondream.query(image=encoded, question=q, reasoning=False)
         print(f"Q: {q}")
-        print(f"A: {result['answer']}\n")
     # Also works with other skills
     caption = moondream.caption(encoded, length="normal")
     objects = moondream.detect(encoded, "poop")
     pointe = moondream.point(encoded, "grass")
-    print(f"caption: {e}, objects:{g}, point:{h}")
     # Segment an object
-    result = moondream.segment(image, "cat")
-    svg_path = result["path"]
-    bbox = result["bbox"]
     print(f"SVG Path: {svg_path[:100]}...")
     print(f"Bounding box: {bbox}")
     # With spatial hint (point) to guide segmentation
-    result = model.segment(image, "cat", spatial_refs=[[0.5, 0.3]])
     # With spatial hint (bounding box)
-    result = model.segment(image, "cat", spatial_refs=[[0.2, 0.1, 0.8, 0.9]])
     result = ""
     Q_and_A = ""
     prompts = [p.strip() for p in prompt_text.split('?')]
@@ -106,7 +107,7 @@ def answer_questions(image_tuples, prompt_text):
             Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
     result = {'headers': prompts, 'data': answers}
-    #print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
     return Q_and_A, result
 """

     device_map={"": "cuda"},
 )
 moondream.compile()
+tokenizer = AutoTokenizer.from_pretrained("moondream/moondream3-preview")
 """
 @spaces.GPU(durtion="150")
 def answer_questions(image_tuples, prompt_text):
     # Encode image once
+    image = [img[0] for img in image_tuples if img[0] is not none]
     encoded = moondream.encode_image(image)
     # Reuse the encoding for multiple queries
     ]
     for q in questions:
+        result1 = moondream.query(image=encoded, question=q, reasoning=False)
         print(f"Q: {q}")
+        print(f"A: {result1['answer']}\n")
     # Also works with other skills
     caption = moondream.caption(encoded, length="normal")
     objects = moondream.detect(encoded, "poop")
     pointe = moondream.point(encoded, "grass")
+    print(f"caption: {caption}, objects:{objects}, point:{pointe}")
     # Segment an object
+    result2 = moondream.segment(image, "cat")
+    svg_path = result2["path"]
+    bbox = result2["bbox"]
     print(f"SVG Path: {svg_path[:100]}...")
     print(f"Bounding box: {bbox}")
     # With spatial hint (point) to guide segmentation
+    result3 = model.segment(image, "cat", spatial_refs=[[0.5, 0.3]])
+    print(result1)
     # With spatial hint (bounding box)
+    result3 = model.segment(image, "cat", spatial_refs=[[0.2, 0.1, 0.8, 0.9]])
+    print(result3)
     result = ""
     Q_and_A = ""
     prompts = [p.strip() for p in prompt_text.split('?')]
             Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
     result = {'headers': prompts, 'data': answers}
+    print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
     return Q_and_A, result
 """