Spaces:

inclusionAI
/

ling-mini-2.0-local

Sleeping

雷娃 commited on Sep 10

Commit

fd5c246

1 Parent(s): 92e3e51

add install.sh

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,7 +29,7 @@ def install_vllm_from_patch():
         return False
 #install vllm from patch file
-install_vllm_from_patch()
 # load model and tokenizer
@@ -76,7 +76,11 @@ def respond(
     model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
-    model_inputs.update(dict(max_new_tokens=512,streamer=streamer))
     # Start a separate thread for model generation to allow streaming output
     thread = Thread(

         return False
 #install vllm from patch file
+#install_vllm_from_patch()
 # load model and tokenizer
     model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
+    model_inputs.update(dict(max_new_tokens=max_tokens,
+                             temperature = temperature,
+                             top_p = top_p,
+                             presence_penalty = 1.5,
+                             streamer=streamer))
     # Start a separate thread for model generation to allow streaming output
     thread = Thread(