Spaces:

VLAI-AIVN
/

AIO2024M10_Tutorial_Tool_Calling

Running

App Files Files Community

AIO2024M10_Tutorial_Tool_Calling / src /llm /chat.py

wjnwjn59

update parsing

99b9f93 7 months ago

raw

history blame contribute delete

2.92 kB

	import ast, sys, json, torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
	from ..tools import run_detection, run_segmentation, FUNCTION_SCHEMA

	TOOLS = {"run_detection": run_detection, "run_segmentation": run_segmentation}

	SYSTEM_PROMPT = """
	You are an expert in composing functions. You are given a question and a set of possible functions.
	Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
	If none of the function can be used, point it out. If the given question lacks the parameters required by the function,
	also point it out. You should only return the function call in tools call sections.

	If you decide to invoke any of the function(s), you MUST put it in the format of [func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]\n
	You SHOULD NOT include any other text in the response.

	Here is a list of functions in JSON format that you can invoke.\n\n{functions}\n""".format(functions=FUNCTION_SCHEMA)


	device = "cuda" if torch.cuda.is_available() else "cpu"

	class FunctionCallingChat:
	def __init__(self, model_id: str = "meta-llama/Llama-3.2-1B-Instruct", temperature: float = 0.7):
	self.tokenizer = AutoTokenizer.from_pretrained(model_id)
	self.model = AutoModelForCausalLM.from_pretrained(
	model_id, device_map=device, torch_dtype=torch.bfloat16
	)
	self.temperature = temperature

	def __call__(self, user_msg: str) -> dict:
	messages = [
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": user_msg},
	]

	generation_cfg = GenerationConfig(
	max_new_tokens=128, temperature=self.temperature, top_p=0.95, do_sample=True
	)

	tokenized = self.tokenizer.apply_chat_template(
	messages, tokenize=True, add_generation_prompt=True,
	return_attention_mask=True, return_tensors="pt"
	).to(device)

	output = self.model.generate(tokenized, generation_config=generation_cfg)
	raw = self.tokenizer.decode(output[0], skip_special_tokens=True)
	tool_calls_lst_str = raw.split("assistant")[-1]

	try:
	tree = ast.parse(tool_calls_lst_str, mode="eval")
	call_nodes = tree.body.elts
	except SyntaxError:

	return {"raw_tool_call": tool_calls_lst_str,
	"results": "Cannot parse the function call."}

	tool_calls_result = []
	for call in call_nodes:
	function_name = call.func.id
	parameters = {kw.arg: ast.literal_eval(kw.value)
	for kw in call.keywords}

	result = TOOLS[function_name](**parameters)
	tool_calls_result.append(result)

	return {"raw_tool_call": tool_calls_lst_str,
	"results": tool_calls_result}