Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions fix_issue_74.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```json
{
"solution_code": "# ============================================================\n# FILE: app/services/nlq_service.py\n# Natural Language Query Service for FinMind\n# ============================================================\n\nimport json\nimport re\nfrom datetime import datetime, date, timedelta\nfrom typing import Optional\nfrom dateutil.relativedelta import relativedelta\n\ntry:\n import openai\n OPENAI_AVAILABLE = True\nexcept ImportError:\n OPENAI_AVAILABLE = False\n\n\nclass NLQService:\n \"\"\"\n Natural Language Query Service.\n Parses user questions like 'How much did I spend on food last quarter?'\n into structured queries, executes them against the DB, and returns answers.\n \"\"\"\n\n SYSTEM_PROMPT = \"\"\"\nYou are a financial data assistant. Parse the user's natural language finance query into a JSON object.\n\nOutput ONLY valid JSON with these fields:\n{\n \"intent\": \"spend_summary\" | \"spend_by_category\" | \"bill_summary\" | \"top_categories\" | \"transaction_list\",\n \"category\": <string or null>,\n \"date_range\": {\n \"start\": \"YYYY-MM-DD\",\n \"end\": \"YYYY-MM-DD\"\n },\n \"limit\": <integer or null>\n}\n\nDate range rules (relative to today = {today}):\n- \"last month\" = first to last day of previous calendar month\n- \"last quarter\" = first to last day of the previous calendar quarter\n- \"this month\" = first day of current month to today\n- \"this year\" = Jan 1 of current year to today\n- \"last year\" = Jan 1 to Dec 31 of previous year\n- \"last 30 days\" / \"past 30 days\" = today-30 days to today\n- \"last week\" = Monday to Sunday of previous week\n- \"yesterday\" = yesterday only\n\nCategory examples: food, groceries, transport, entertainment, utilities, health, shopping, rent.\nIf no category mentioned, set category to null.\nIf no time period mentioned, default to current month.\n\"\"\"\n\n def __init__(self, db_session, redis_client=None, openai_api_key=None, openai_model='gpt-3.5-turbo'):\n self.db = db_session\n self.redis = redis_client\n self.openai_api_key = openai_api_key\n self.openai_model = openai_model\n if openai_api_key and OPENAI_AVAILABLE:\n openai.api_key = openai_api_key\n\n # ------------------------------------------------------------------\n # Public entry point\n # ------------------------------------------------------------------\n\n def answer(self, user_id: int, question: str) -> dict:\n \"\"\"\n Main entry point. Returns:\n {\n \"answer\": str,\n \"structured_query\": dict,\n \"data\": list | dict\n }\n \"\"\"\n parsed = self._parse_query(question)\n result = self._execute_query(user_id, parsed)\n answer_text = self._format_answer(question, parsed, result)\n return {\n \"answer\": answer_text,\n \"structured_query\": parsed,\n \"data\": result\n }\n\n # ------------------------------------------------------------------\n # Step 1: Parse NL -> structured query\n # ------------------------------------------------------------------\n\n def _parse_query(self, question: str) -> dict:\n \"\"\"Try LLM first, fall back to rule-based parser.\"\"\"\n if OPENAI_AVAILABLE and self.openai_api_key:\n try:\n return self._parse_with_llm(question)\n except Exception:\n pass\n return self._parse_with_rules(question)\n\n def _parse_with_llm(self, question: str) -> dict:\n today = date.today().isoformat()\n prompt = self.SYSTEM_PROMPT.format(today=today)\n response = openai.ChatCompletion.create(\n model=self.openai_model,\n messages=[\n {\"role\": \"system\", \"content\": prompt},\n {\"role\": \"user\", \"content\": question}\n ],\n temperature=0,\n max_tokens=256\n )\n raw = response.choices[0].message.content.strip()\n # Strip markdown code fences if present\n raw = re.sub(r'^```json|^```|```$', '', raw, flags=re.MULTILINE).strip()\n parsed = json.loads(raw)\n self._validate_parsed(parsed)\n return parsed\n\n def _parse_with_rules(self, question: str) -> dict:\n \"\"\"Rule-based fallback parser.\"\"\"\n q = question.lower()\n today = date.today()\n\n # Detect intent\n intent = 'spend_summary'\n if any(w in q for w in ['top', 'most', 'biggest', 'largest']):\n intent = 'top_categories'\n elif any(w in q for w in ['list', 'show', 'transactions', 'expenses']):\n intent = 'transaction_list'\n elif any(w in q for w in ['bill', 'bills', 'due']):\n intent = 'bill_summary'\n elif 'categor' in q or 'breakdown' in q:\n intent = 'spend_by_category'\n\n # Detect date range\n start, end = self._resolve_date_range(q, today)\n\n # Detect category\n known_categories = [\n 'food', 'groceries', 'grocery', 'transport', 'transportation',\n 'entertainment', 'utilities', 'health', 'healthcare', 'shopping',\n 'rent', 'housing', 'travel', 'education', 'clothing', 'restaurant',\n 'dining', 'coffee', 'subscriptions', 'insurance'\n ]\n category = None\n for cat in known_categories:\n if cat in q:\n # Normalize aliases\n cat_map = {\n 'grocery': 'groceries',\n 'healthcare': 'health',\n 'transportation': 'transport',\n 'dining': 'food',\n 'restaurant': 'food',\n }\n category = cat_map.get(cat, cat)\n if category:\n intent = 'spend_by_category'\n break\n\n # Detect limit\n limit = None\n m = re.search(r'top\\s+(\\d+)', q)\n if m:\n limit = int(m.group(1))\n\n return {\n \"intent\": intent,\n \"category\": category,\n \"date_range\": {\n \"start\": start.isoformat(),\n \"end\": end.isoformat()\n },\n \"limit\": limit\n }\n\n def _resolve_date_range(self, q: str, today: date):\n \"\"\"Return (start_date, end_date) tuple.\"\"\"\n # last quarter\n if 'last quarter' in q or 'previous quarter' in q:\n current_quarter = (today.month - 1) // 3 + 1\n if current_quarter == 1:\n start = date(today.year - 1, 10, 1)\n end = date(today.year - 1, 12, 31)\n else:\n start_month = (current_quarter - 2) * 3 + 1\n start = date(today.year, start_month, 1)\n end = date(today.year, start_month + 2, 1) + relativedelta(months=1) - timedelta(days=1)\n return start, end\n\n # this quarter\n if 'this quarter' in q or 'current quarter' in q:\n current_quarter = (today.month - 1) // 3 + 1\n start_month = (current_quarter - 1) * 3 + 1\n start = date(today.year, start_month, 1)\n return start, today\n\n # last year\n if 'last year' in q or 'previous year' in q:\n start = date(today.year - 1, 1, 1)\n end = date(today.year - 1, 12, 31)\n return start, end\n\n # this year\n if 'this year' in q or 'current year' in q or 'year to date' in q or 'ytd' in q:\n return date(today.year, 1, 1), today\n\n # last month / previous month\n if 'last month' in q or 'previous month' in q:\n first_of_current = date(today.year, today.month, 1)\n end = first_of_current - timedelta(days=1)\n start = date(end.year, end.month, 1)\n return start, end\n\n # this month / current month\n if 'this month' in q or 'current month' in q:\n return date(today.year, today.month, 1), today\n\n # last week / previous week\n if 'last week' in q or 'previous week' in q:\n start = today - timedelta(days=today.weekday() + 7)\n end = start + timedelta(days=6)\n return start, end\n\n # last N days\n m = re.search(r'last\\s+(\\d+)\\s+days?', q)\n if m:\n n = int(m.group(1))\n return today - timedelta(days=n), today\n\n # past N days\n m = re.search(r'past\\s+(\\d+)\\s+days?', q)\n if m:\n n = int(m.group(1))\n return today - timedelta(days=n), today\n\n # yesterday\n if 'yesterday' in q:\n yesterday = today - timedelta(days=1)\n return yesterday, yesterday\n\n # today\n if 'today' in q:\n return today, today\n\n # Default: current month\n return date(today.year, today.month, 1), today\n\n def _validate_parsed(self, parsed: dict):\n \"\"\"Basic validation of LLM output.\"\"\"\n assert 'intent' in parsed\n assert 'date_range' in parsed\n assert 'start' in parsed['date_range']\n assert 'end' in parsed['date_range']\n # Validate date format\n datetime.strptime(parsed['date_range']['start'], '%Y-%m-%d')\n datetime.strptime(parsed['date_range']['end'], '%Y-%m-%d')\n\n # ------------------------------------------------------------------\n # Step 2: Execute structured query\n # ------------------------------------------------------------------\n\n def _execute_query(self, user_id: int, parsed: dict) -> dict:\n intent = parsed.get('intent', 'spend_summary')\n start = parsed['date_range']['start']\n end = parsed['date_range']['end']\n category = parsed.get('category')\n limit = parsed.get('limit') or 5\n\n dispatch = {\n 'spend_summary