vikhyatk commited on
Commit
aba9ee1
·
verified ·
1 Parent(s): 9a0f51c

Upload HfMoondream

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9414434ab3afb560b37bbd5d3972ae944679e7773a60ece538e4231d2cf142f
3
  size 4907406296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd4b3d0d6daae9c4212056cd64f02f408ff083bbb0244114eecd05fcba30037e
3
  size 4907406296
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0433bb359387b93502680ac120913f46e0d6d62940f74ef75759a085edcad86
3
  size 4736548872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf6d17391db58801b61173510ba629875679dbcbe4bfd3cb38ac0958b3c70a0
3
  size 4736548872
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82b15aaadff6efa4013788ccaa321d496993fe41240305c7b8dd8e8cfbc4fa69
3
  size 4502742464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4391c6d6b46ed49aa00afddf1f7df9dd0845cbc681fdaf424e727b01ea2d3e4
3
  size 4502742464
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fde1839f3766d227b30cfa07521cc0126bfacac08e91f135defdc7624405977f
3
  size 4390620392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af14858bdd7cdea5d19d786726e48434b02a3c0c52a771a0f25b6a8ca640187
3
  size 4390620392
moondream.py CHANGED
@@ -51,7 +51,7 @@ ObjectSamplingSettings = TypedDict(
51
  DEFAULT_MAX_TOKENS = 768
52
  DEFAULT_TEMPERATURE = 0.5
53
  DEFAULT_TOP_P = 0.9
54
- DEFAULT_MAX_OBJECTS = 50
55
 
56
 
57
  @dataclass(frozen=True)
@@ -590,10 +590,6 @@ class MoondreamModel(nn.Module):
590
  logits_BV, _ = self._decode_one_tok(next_emb, mask, pos_ids, lora)
591
  logits_BV[:, self.config.tokenizer.answer_id] = float("-inf")
592
 
593
- # Suppress EOS for the first token to ensure at least one answer token
594
- if generated_tokens == 0:
595
- logits_BV[:, eos_id] = float("-inf")
596
-
597
  pos += 1
598
 
599
  if temperature == 0:
@@ -620,7 +616,7 @@ class MoondreamModel(nn.Module):
620
  self,
621
  image: Optional[Union[Image.Image, EncodedImage]] = None,
622
  question: str = None,
623
- reasoning: bool = False,
624
  spatial_refs: Optional[SpatialRefs] = None,
625
  stream: bool = False,
626
  settings: Optional[TextSamplingSettings] = None,
@@ -662,10 +658,7 @@ class MoondreamModel(nn.Module):
662
  spatial_toks.extend([coord_id, coord_id, size_id])
663
 
664
  prompt_tokens = [
665
- prompt_toks
666
- + spatial_toks
667
- + self.tokenizer.encode(question).ids
668
- + self.config.tokenizer.templates["query"]["suffix"]
669
  ]
670
 
671
  if reasoning:
 
51
  DEFAULT_MAX_TOKENS = 768
52
  DEFAULT_TEMPERATURE = 0.5
53
  DEFAULT_TOP_P = 0.9
54
+ DEFAULT_MAX_OBJECTS = 150
55
 
56
 
57
  @dataclass(frozen=True)
 
590
  logits_BV, _ = self._decode_one_tok(next_emb, mask, pos_ids, lora)
591
  logits_BV[:, self.config.tokenizer.answer_id] = float("-inf")
592
 
 
 
 
 
593
  pos += 1
594
 
595
  if temperature == 0:
 
616
  self,
617
  image: Optional[Union[Image.Image, EncodedImage]] = None,
618
  question: str = None,
619
+ reasoning: bool = True,
620
  spatial_refs: Optional[SpatialRefs] = None,
621
  stream: bool = False,
622
  settings: Optional[TextSamplingSettings] = None,
 
658
  spatial_toks.extend([coord_id, coord_id, size_id])
659
 
660
  prompt_tokens = [
661
+ prompt_toks + spatial_toks + self.tokenizer.encode(question).ids
 
 
 
662
  ]
663
 
664
  if reasoning: