雷娃 commited on
Commit
fd5c246
·
1 Parent(s): 92e3e51

add install.sh

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -29,7 +29,7 @@ def install_vllm_from_patch():
29
  return False
30
 
31
  #install vllm from patch file
32
- install_vllm_from_patch()
33
 
34
 
35
  # load model and tokenizer
@@ -76,7 +76,11 @@ def respond(
76
 
77
  model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
78
 
79
- model_inputs.update(dict(max_new_tokens=512,streamer=streamer))
 
 
 
 
80
 
81
  # Start a separate thread for model generation to allow streaming output
82
  thread = Thread(
 
29
  return False
30
 
31
  #install vllm from patch file
32
+ #install_vllm_from_patch()
33
 
34
 
35
  # load model and tokenizer
 
76
 
77
  model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
78
 
79
+ model_inputs.update(dict(max_new_tokens=max_tokens,
80
+ temperature = temperature,
81
+ top_p = top_p,
82
+ presence_penalty = 1.5,
83
+ streamer=streamer))
84
 
85
  # Start a separate thread for model generation to allow streaming output
86
  thread = Thread(