Skip to content
Merged
Show file tree
Hide file tree
Changes from 57 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
8df0da4
Update name
ppinchuk Feb 1, 2026
df44191
Rename function and update logic
ppinchuk Feb 1, 2026
cad91fb
Use new methods
ppinchuk Feb 1, 2026
3fe35e1
Update example
ppinchuk Feb 1, 2026
a64bda5
Add label
ppinchuk Feb 1, 2026
5111a0c
Add label tag
ppinchuk Feb 2, 2026
0e2ca91
Update techs to use plugin
ppinchuk Feb 4, 2026
fdff00c
Jurisdiction now tracks website URL
ppinchuk Feb 4, 2026
d70e287
Drop tech specs
ppinchuk Feb 4, 2026
01c4170
New cost computation utilities
ppinchuk Feb 4, 2026
904c227
New default categories
ppinchuk Feb 4, 2026
0f80f53
Minor updates
ppinchuk Feb 4, 2026
dfebad0
Add `BaseExtractionPlugin` interface
ppinchuk Feb 4, 2026
eb368da
MVP of `ExtractionPlugin`
ppinchuk Feb 4, 2026
50849a3
Add ordinance plugin utility classes
ppinchuk Feb 4, 2026
ea7ab26
Populate namespace
ppinchuk Feb 4, 2026
2479ea5
Update import
ppinchuk Feb 4, 2026
afbd958
Add exception
ppinchuk Feb 4, 2026
ce9b159
Remove unused code
ppinchuk Feb 4, 2026
3efd82e
Docstring update
ppinchuk Feb 4, 2026
5de50e9
Add test file
ppinchuk Feb 4, 2026
3459210
Fix up tests
ppinchuk Feb 4, 2026
1d6df1e
Minor update to example
ppinchuk Feb 4, 2026
14f1d2d
Add more models
ppinchuk Feb 4, 2026
eedb3ca
Drop function
ppinchuk Feb 5, 2026
9873f71
db now has source and date columns when first saved to disk
ppinchuk Feb 5, 2026
dd6cdb2
MOre generalized file moving functions
ppinchuk Feb 5, 2026
e09ff80
find docs now gets a context
ppinchuk Feb 5, 2026
f532726
New COMPASS exception
ppinchuk Feb 5, 2026
a0556c7
docstring
ppinchuk Feb 5, 2026
f94d1e7
Docstring updates
ppinchuk Feb 5, 2026
71bf677
docstrings
ppinchuk Feb 5, 2026
1bdbe8d
More classes
ppinchuk Feb 5, 2026
d34f129
Add context class
ppinchuk Feb 5, 2026
9e70c2d
Update import
ppinchuk Feb 5, 2026
36dc7ef
Add more tests
ppinchuk Feb 5, 2026
f6af95d
Simpler parametrize
ppinchuk Feb 5, 2026
ed21ca9
Interface now uses extraction context
ppinchuk Feb 5, 2026
5059f5b
update tests
ppinchuk Feb 5, 2026
ad142d7
decouple jurisdiction from context in function
ppinchuk Feb 5, 2026
38288a2
Docstrings
ppinchuk Feb 5, 2026
1532b13
Minor tweaks
ppinchuk Feb 5, 2026
b40fada
Merge location into jurisdictions module
ppinchuk Feb 5, 2026
e1a7319
Add `jurisdictions_from_df` func
ppinchuk Feb 5, 2026
6a0b622
Update water extraction code
ppinchuk Feb 5, 2026
91507ed
Processing now handles writing ord db
ppinchuk Feb 5, 2026
c122fd5
Align func with others from module
ppinchuk Feb 5, 2026
9ae2f82
Minor adjustment
ppinchuk Feb 5, 2026
6c830f4
Remove unused func
ppinchuk Feb 5, 2026
3c68290
Add abstract method to base class
ppinchuk Feb 5, 2026
4802a09
Remove unused code
ppinchuk Feb 5, 2026
ca2ff31
Add tests
ppinchuk Feb 5, 2026
81ef703
Update script to correctly drive processing
ppinchuk Feb 5, 2026
a0166c6
Fix tests
ppinchuk Feb 5, 2026
c9c44fa
Add costs tests
ppinchuk Feb 5, 2026
fa73fe4
Fix tests
ppinchuk Feb 5, 2026
acfb0d3
Rename attribute
ppinchuk Feb 5, 2026
e5c9650
PR review
ppinchuk Feb 5, 2026
7edc449
Fix docs
ppinchuk Feb 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions compass/_cli/finalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from rich.console import Console

from compass.utilities import Directories
from compass.utilities.location import Jurisdiction
from compass.utilities.jurisdictions import Jurisdiction
from compass.utilities.parsing import load_config
from compass.utilities.finalize import save_run_meta, doc_infos_to_db, save_db
from compass.scripts.process import _initialize_model_params
Expand Down Expand Up @@ -116,16 +116,7 @@ def _compile_db(jurisdictions, dirs):

doc_info = doc_info[0]
all_doc_infos.append(
{
"ord_db_fp": ord_db_fp,
"source": doc_info.get("source"),
"date": (
doc_info.get("effective_year"),
doc_info.get("effective_month"),
doc_info.get("effective_day"),
),
"jurisdiction": jurisdiction,
}
{"ord_db_fp": ord_db_fp, "jurisdiction": jurisdiction}
)

db, __ = doc_infos_to_db(all_doc_infos)
Expand Down
1 change: 0 additions & 1 deletion compass/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from .base import (
EXTRACT_ORIGINAL_SETBACK_TEXT_PROMPT,
BaseTextExtractor,
empty_output,
llm_response_starts_with_no,
llm_response_starts_with_yes,
Expand Down
69 changes: 1 addition & 68 deletions compass/common/base.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
"""Common ordinance extraction components"""

import asyncio
import logging
from datetime import datetime

import networkx as nx
from elm import ApiBase

from compass.common.tree import AsyncDecisionTree
from compass.utilities import llm_response_as_json
from compass.utilities.enums import LLMUsageCategory
from compass.utilities.parsing import (
merge_overlapping_texts,
clean_backticks_from_llm_response,
)

from compass.exceptions import COMPASSRuntimeError


Expand Down Expand Up @@ -970,64 +964,3 @@ def setup_graph_permitted_use_districts(**kwargs):
),
)
return G


class BaseTextExtractor:
"""Base implementation for a text extractor"""

SYSTEM_MESSAGE = (
"You are a text extraction assistant. Your job is to extract only "
"verbatim, **unmodified** excerpts from provided legal or policy "
"documents. Do not interpret or paraphrase. Do not summarize. Only "
"return exactly copied segments that match the specified scope. If "
"the relevant content appears within a table, return the entire "
"table, including headers and footers, exactly as formatted."
)
"""System message for text extraction LLM calls"""
_USAGE_LABEL = LLMUsageCategory.DOCUMENT_ORDINANCE_SUMMARY

def __init__(self, llm_caller):
"""

Parameters
----------
llm_caller : LLMCaller
LLM Caller instance used to extract ordinance info with.
"""
self.llm_caller = llm_caller

async def _process(self, text_chunks, instructions, is_valid_chunk):
"""Perform extraction processing"""
logger.info(
"Extracting summary text from %d text chunks asynchronously...",
len(text_chunks),
)
logger.debug("Model instructions are:\n%s", instructions)
outer_task_name = asyncio.current_task().get_name()
summaries = [
asyncio.create_task(
self.llm_caller.call(
sys_msg=self.SYSTEM_MESSAGE,
content=f"{instructions}\n\n# TEXT #\n\n{chunk}",
usage_sub_label=self._USAGE_LABEL,
),
name=outer_task_name,
)
for chunk in text_chunks
]
summary_chunks = await asyncio.gather(*summaries)
summary_chunks = [
clean_backticks_from_llm_response(chunk)
for chunk in summary_chunks
if is_valid_chunk(chunk)
]

text_summary = merge_overlapping_texts(summary_chunks)
logger.debug(
"Final summary contains %d tokens",
ApiBase.count_tokens(
text_summary,
model=self.llm_caller.kwargs.get("model", "gpt-4"),
),
)
return text_summary
2 changes: 1 addition & 1 deletion compass/data/tx_water_districts.csv
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Texas,,High Plains,Underground Water Conservation District,36,
Texas,,Hill Country,Underground Water Conservation District,37,
Texas,,Hudspeth County,Underground Water Conservation District,38,
Texas,,Irion County,WConservation District,39,
Texas,,Jeff Davis,County Underground Water Conservation District,40,
Texas,,Jeff Davis County,Underground Water Conservation District,40,
Texas,,Kenedy County,Groundwater Conservation District,41,
Texas,,Kimble County,Groundwater Conservation District,42,
Texas,,Kinney County,Groundwater Conservation District,43,
Expand Down
8 changes: 8 additions & 0 deletions compass/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,17 @@ class COMPASSNotInitializedError(COMPASSError):
"""COMPASS not initialized error"""


class COMPASSTypeError(COMPASSError, TypeError):
"""COMPASS TypeError"""


class COMPASSValueError(COMPASSError, ValueError):
"""COMPASS ValueError"""


class COMPASSRuntimeError(COMPASSError, RuntimeError):
"""COMPASS RuntimeError"""


class COMPASSPluginConfigurationError(COMPASSRuntimeError):
"""COMPASS Plugin Configuration Error"""
6 changes: 3 additions & 3 deletions compass/extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Ordinance text extraction tooling"""

from .apply import (
check_for_ordinance_info,
check_for_relevant_text,
extract_date,
extract_ordinance_text_with_llm,
extract_ordinance_text_with_ngram_validation,
extract_relevant_text_with_llm,
extract_relevant_text_with_ngram_validation,
extract_ordinance_values,
)
Loading
Loading