Spaces:

CausalNLP
/

causal-agent

Running

App Files Files Community

causal-agent / auto_causal /tools /input_parser_tool.py

FireShadow

Initial clean commit

1721aea 4 months ago

raw

history blame contribute delete

4.19 kB

	"""
	Tool for parsing causal inference queries.

	This module provides a LangChain tool for parsing causal inference queries,
	extracting key elements, and guiding the workflow to the next step.
	"""

	import logging
	import re
	from typing import Dict, Any, Optional
	from langchain_core.tools import tool

	from auto_causal.components.input_parser import parse_input
	from auto_causal.config import get_llm_client
	from auto_causal.components.state_manager import create_workflow_state_update
	import json
	logger = logging.getLogger(__name__)

	@tool
	def input_parser_tool(input_text: str) -> Dict[str, Any]:
	"""
	Parse the user's initial input text to extract query, dataset path, and description.

	This tool uses regex to find structured information within the input text
	and then leverages an LLM for more complex NLP tasks on the extracted query.

	Args:
	input_text: The combined initial input string from the user/system.

	Returns:
	Dict containing parsed query information, path, description, and workflow state.
	"""
	logger.info(f"Running input_parser_tool on input: '{input_text[:100]}...'")

	# --- Extract structured info using Regex ---
	query = None
	dataset_path = None
	dataset_description = None

	query_match = re.search(r"My question is: (.*?)\n", input_text, re.IGNORECASE)
	if query_match:
	query = query_match.group(1).strip()

	path_match = re.search(r"The dataset is located at: (.*?)\n", input_text, re.IGNORECASE)
	if path_match:
	dataset_path = path_match.group(1).strip()

	# Use re.search to find the description potentially anywhere after its label
	desc_match = re.search(r"Dataset Description: (.*)", input_text, re.DOTALL \| re.IGNORECASE)
	if desc_match:
	# Strip leading/trailing whitespace/newlines from the captured group
	dataset_description = desc_match.group(1).strip()

	if not query:
	logger.warning("Could not extract query from input_text using regex. Attempting full text as query.")
	# Fallback: This is risky if input_text contains boilerplate
	query = input_text

	logger.info(f"Extracted - Query: '{query[:50]}...', Path: '{dataset_path}', Desc: '{str(dataset_description)[:50]}...'")

	# --- Get LLM and Parse Query ---
	try:
	llm_instance = get_llm_client()
	except Exception as e:
	logger.error(f"Failed to initialize LLM for input_parser_tool: {e}")
	return {"error": f"LLM Initialization failed: {e}", "workflow_state": {}}

	# Call the component function to parse the extracted query
	try:
	parsed_info = parse_input(
	query=query,
	dataset_path_arg=dataset_path, # Use extracted path
	dataset_info=None, # This arg seems unused by parse_input now
	llm=llm_instance
	)
	except Exception as e:
	logger.error(f"Error during parse_input execution: {e}", exc_info=True)
	return {"error": f"Input parsing failed: {e}", "workflow_state": {}}

	# Create workflow state update
	workflow_update = create_workflow_state_update(
	current_step="input_processing",
	step_completed_flag="query_parsed",
	next_tool="dataset_analyzer_tool",
	next_step_reason="Now that we understand the query, we need to analyze the dataset structure"
	)

	# Combine results with workflow state
	result = {
	"original_query": parsed_info.get("original_query", query), # Fallback to regex query
	"dataset_path": parsed_info.get("dataset_path") or dataset_path, # Use extracted if component missed it
	"query_type": parsed_info.get("query_type"),
	"extracted_variables": parsed_info.get("extracted_variables", {}),
	"constraints": parsed_info.get("constraints", []),
	# Pass dataset_description along
	"dataset_description": dataset_description
	}
	print('before workflow: ', result)
	# Add workflow state to the result
	result.update(workflow_update)
	print('after workflow: ', result)
	logger.info("input_parser_tool finished successfully.")
	return result