Csplk commited on
Commit
3a2d213
Β·
verified Β·
1 Parent(s): 0741da1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -13
app.py CHANGED
@@ -16,7 +16,7 @@ moondream = AutoModelForCausalLM.from_pretrained(
16
  device_map={"": "cuda"},
17
  )
18
  moondream.compile()
19
-
20
 
21
 
22
  """
@@ -50,7 +50,7 @@ model = AutoModelForCausalLM.from_pretrained(
50
  @spaces.GPU(durtion="150")
51
  def answer_questions(image_tuples, prompt_text):
52
  # Encode image once
53
- image = Image.open("complex_scene.jpg")
54
  encoded = moondream.encode_image(image)
55
 
56
  # Reuse the encoding for multiple queries
@@ -61,30 +61,31 @@ def answer_questions(image_tuples, prompt_text):
61
  ]
62
 
63
  for q in questions:
64
- result = moondream.query(image=encoded, question=q, reasoning=False)
65
  print(f"Q: {q}")
66
- print(f"A: {result['answer']}\n")
67
 
68
  # Also works with other skills
69
  caption = moondream.caption(encoded, length="normal")
70
  objects = moondream.detect(encoded, "poop")
71
  pointe = moondream.point(encoded, "grass")
72
- print(f"caption: {e}, objects:{g}, point:{h}")
73
 
74
  # Segment an object
75
- result = moondream.segment(image, "cat")
76
- svg_path = result["path"]
77
- bbox = result["bbox"]
78
 
79
  print(f"SVG Path: {svg_path[:100]}...")
80
  print(f"Bounding box: {bbox}")
81
 
82
  # With spatial hint (point) to guide segmentation
83
- result = model.segment(image, "cat", spatial_refs=[[0.5, 0.3]])
84
-
85
  # With spatial hint (bounding box)
86
- result = model.segment(image, "cat", spatial_refs=[[0.2, 0.1, 0.8, 0.9]])
87
-
 
88
  result = ""
89
  Q_and_A = ""
90
  prompts = [p.strip() for p in prompt_text.split('?')]
@@ -106,7 +107,7 @@ def answer_questions(image_tuples, prompt_text):
106
  Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
107
 
108
  result = {'headers': prompts, 'data': answers}
109
- #print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
110
  return Q_and_A, result
111
 
112
  """
 
16
  device_map={"": "cuda"},
17
  )
18
  moondream.compile()
19
+ tokenizer = AutoTokenizer.from_pretrained("moondream/moondream3-preview")
20
 
21
 
22
  """
 
50
  @spaces.GPU(durtion="150")
51
  def answer_questions(image_tuples, prompt_text):
52
  # Encode image once
53
+ image = [img[0] for img in image_tuples if img[0] is not none]
54
  encoded = moondream.encode_image(image)
55
 
56
  # Reuse the encoding for multiple queries
 
61
  ]
62
 
63
  for q in questions:
64
+ result1 = moondream.query(image=encoded, question=q, reasoning=False)
65
  print(f"Q: {q}")
66
+ print(f"A: {result1['answer']}\n")
67
 
68
  # Also works with other skills
69
  caption = moondream.caption(encoded, length="normal")
70
  objects = moondream.detect(encoded, "poop")
71
  pointe = moondream.point(encoded, "grass")
72
+ print(f"caption: {caption}, objects:{objects}, point:{pointe}")
73
 
74
  # Segment an object
75
+ result2 = moondream.segment(image, "cat")
76
+ svg_path = result2["path"]
77
+ bbox = result2["bbox"]
78
 
79
  print(f"SVG Path: {svg_path[:100]}...")
80
  print(f"Bounding box: {bbox}")
81
 
82
  # With spatial hint (point) to guide segmentation
83
+ result3 = model.segment(image, "cat", spatial_refs=[[0.5, 0.3]])
84
+ print(result1)
85
  # With spatial hint (bounding box)
86
+ result3 = model.segment(image, "cat", spatial_refs=[[0.2, 0.1, 0.8, 0.9]])
87
+ print(result3)
88
+
89
  result = ""
90
  Q_and_A = ""
91
  prompts = [p.strip() for p in prompt_text.split('?')]
 
107
  Q_and_A += f"**{image_name} A:** \n {answer_text} \n"
108
 
109
  result = {'headers': prompts, 'data': answers}
110
+ print("result\n{}\n\nQ_and_A\n{}\n\n".format(result, Q_and_A))
111
  return Q_and_A, result
112
 
113
  """