allowing multiple summaries per bill text

This commit is contained in:
2026-05-08 18:30:07 -04:00
parent de9e59b5f4
commit d3fe6dba56
9 changed files with 483 additions and 25 deletions
+36
View File
@@ -0,0 +1,36 @@
from pipelines.orm.data_science_dev.congress import BillText, BillTextSummary
def test_default_summary_prefers_primary_summary() -> None:
primary_summary = BillTextSummary(id=1, bill_text_id=10, summary="primary")
latest_summary = BillTextSummary(id=2, bill_text_id=10, summary="latest")
bill_text = BillText(
id=10,
bill_id=5,
version_code="ih",
summaries=[latest_summary],
primary_summary=primary_summary,
)
assert bill_text.default_summary() is primary_summary
def test_default_summary_falls_back_to_latest_summary() -> None:
latest_summary = BillTextSummary(id=2, bill_text_id=10, summary="latest")
older_summary = BillTextSummary(id=1, bill_text_id=10, summary="older")
bill_text = BillText(
id=10,
bill_id=5,
version_code="ih",
summaries=[latest_summary, older_summary],
)
assert bill_text.latest_summary() is latest_summary
assert bill_text.default_summary() is latest_summary
def test_default_summary_is_none_without_summaries() -> None:
bill_text = BillText(id=10, bill_id=5, version_code="ih")
assert bill_text.latest_summary() is None
assert bill_text.default_summary() is None
+71
View File
@@ -0,0 +1,71 @@
from sqlalchemy.dialects import postgresql
from pipelines.jobs.extract_bill_topics import (
_select_bill_text_for_topic_extraction,
create_select_bills_for_topic_extraction,
)
from pipelines.orm.data_science_dev.congress import Bill, BillText, BillTextSummary
def _compile_sql(statement: object) -> str:
return str(
statement.compile(
dialect=postgresql.dialect(),
compile_kwargs={"literal_binds": True},
)
)
def test_select_bill_text_for_topic_extraction_uses_primary_summary() -> None:
primary_summary = BillTextSummary(id=1, bill_text_id=10, summary="primary")
newest_summary = BillTextSummary(id=2, bill_text_id=10, summary="newest")
bill_text = BillText(
id=10,
bill_id=5,
version_code="ih",
summaries=[newest_summary],
primary_summary=primary_summary,
)
bill = Bill(
id=5,
congress=119,
bill_type="hr",
number=1,
bill_texts=[bill_text],
)
selected = _select_bill_text_for_topic_extraction(bill)
assert selected is bill_text
assert selected.default_summary() is primary_summary
def test_select_bill_text_for_topic_extraction_uses_latest_summary_without_primary() -> None:
newest_summary = BillTextSummary(id=2, bill_text_id=10, summary="newest")
older_summary = BillTextSummary(id=1, bill_text_id=10, summary="older")
bill_text = BillText(
id=10,
bill_id=5,
version_code="ih",
summaries=[newest_summary, older_summary],
)
bill = Bill(
id=5,
congress=119,
bill_type="hr",
number=1,
bill_texts=[bill_text],
)
selected = _select_bill_text_for_topic_extraction(bill)
assert selected is bill_text
assert selected.default_summary() is newest_summary
def test_create_select_bills_for_topic_extraction_uses_summary_exists_subquery() -> None:
sql = _compile_sql(create_select_bills_for_topic_extraction())
assert "bill_text_summary" in sql
assert "EXISTS" in sql
assert "bill_text.summary" not in sql
+58
View File
@@ -0,0 +1,58 @@
from sqlalchemy.dialects import postgresql
from pipelines.jobs.summarize_bills import (
create_select_bill_texts_for_summarization,
store_bill_summary_result,
)
from pipelines.orm.data_science_dev.congress import BillText, BillTextSummary
class FakeSession:
def __init__(self) -> None:
self.added: list[object] = []
def add(self, value: object) -> None:
self.added.append(value)
def _compile_sql(statement: object) -> str:
return str(
statement.compile(
dialect=postgresql.dialect(),
compile_kwargs={"literal_binds": True},
)
)
def test_store_bill_summary_result_creates_summary_row() -> None:
session = FakeSession()
bill_text = BillText(id=10, bill_id=5, version_code="ih")
summary_row = store_bill_summary_result(
session=session,
bill_text=bill_text,
summary="A summary",
model="gpt-5.4-mini",
)
assert session.added == [summary_row]
assert isinstance(summary_row, BillTextSummary)
assert summary_row.bill_text is bill_text
assert summary_row.summary == "A summary"
assert summary_row.summarization_model == "gpt-5.4-mini"
assert summary_row.summarization_system_prompt_version == "v1.2"
assert summary_row.summarization_user_prompt_version == "v1"
def test_create_select_bill_texts_for_summarization_excludes_existing_summaries() -> None:
sql = _compile_sql(create_select_bill_texts_for_summarization(force=False))
assert "bill_text_summary" in sql
assert "NOT (EXISTS" in sql or "NOT EXISTS" in sql
assert "bill_text.summary" not in sql
def test_create_select_bill_texts_for_summarization_force_skips_summary_filter() -> None:
sql = _compile_sql(create_select_bill_texts_for_summarization(force=True))
assert "bill_text_summary" not in sql