Spaces:
Sleeping
Sleeping
雷娃
commited on
Commit
·
fd5c246
1
Parent(s):
92e3e51
add install.sh
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ def install_vllm_from_patch():
|
|
| 29 |
return False
|
| 30 |
|
| 31 |
#install vllm from patch file
|
| 32 |
-
install_vllm_from_patch()
|
| 33 |
|
| 34 |
|
| 35 |
# load model and tokenizer
|
|
@@ -76,7 +76,11 @@ def respond(
|
|
| 76 |
|
| 77 |
model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
|
| 78 |
|
| 79 |
-
model_inputs.update(dict(max_new_tokens=
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
# Start a separate thread for model generation to allow streaming output
|
| 82 |
thread = Thread(
|
|
|
|
| 29 |
return False
|
| 30 |
|
| 31 |
#install vllm from patch file
|
| 32 |
+
#install_vllm_from_patch()
|
| 33 |
|
| 34 |
|
| 35 |
# load model and tokenizer
|
|
|
|
| 76 |
|
| 77 |
model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
|
| 78 |
|
| 79 |
+
model_inputs.update(dict(max_new_tokens=max_tokens,
|
| 80 |
+
temperature = temperature,
|
| 81 |
+
top_p = top_p,
|
| 82 |
+
presence_penalty = 1.5,
|
| 83 |
+
streamer=streamer))
|
| 84 |
|
| 85 |
# Start a separate thread for model generation to allow streaming output
|
| 86 |
thread = Thread(
|