allowing multiple summaries per bill text
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
from pipelines.orm.data_science_dev.congress import BillText, BillTextSummary
|
||||
|
||||
|
||||
def test_default_summary_prefers_primary_summary() -> None:
|
||||
primary_summary = BillTextSummary(id=1, bill_text_id=10, summary="primary")
|
||||
latest_summary = BillTextSummary(id=2, bill_text_id=10, summary="latest")
|
||||
bill_text = BillText(
|
||||
id=10,
|
||||
bill_id=5,
|
||||
version_code="ih",
|
||||
summaries=[latest_summary],
|
||||
primary_summary=primary_summary,
|
||||
)
|
||||
|
||||
assert bill_text.default_summary() is primary_summary
|
||||
|
||||
|
||||
def test_default_summary_falls_back_to_latest_summary() -> None:
|
||||
latest_summary = BillTextSummary(id=2, bill_text_id=10, summary="latest")
|
||||
older_summary = BillTextSummary(id=1, bill_text_id=10, summary="older")
|
||||
bill_text = BillText(
|
||||
id=10,
|
||||
bill_id=5,
|
||||
version_code="ih",
|
||||
summaries=[latest_summary, older_summary],
|
||||
)
|
||||
|
||||
assert bill_text.latest_summary() is latest_summary
|
||||
assert bill_text.default_summary() is latest_summary
|
||||
|
||||
|
||||
def test_default_summary_is_none_without_summaries() -> None:
|
||||
bill_text = BillText(id=10, bill_id=5, version_code="ih")
|
||||
|
||||
assert bill_text.latest_summary() is None
|
||||
assert bill_text.default_summary() is None
|
||||
@@ -0,0 +1,71 @@
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
from pipelines.jobs.extract_bill_topics import (
|
||||
_select_bill_text_for_topic_extraction,
|
||||
create_select_bills_for_topic_extraction,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress import Bill, BillText, BillTextSummary
|
||||
|
||||
|
||||
def _compile_sql(statement: object) -> str:
|
||||
return str(
|
||||
statement.compile(
|
||||
dialect=postgresql.dialect(),
|
||||
compile_kwargs={"literal_binds": True},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_select_bill_text_for_topic_extraction_uses_primary_summary() -> None:
|
||||
primary_summary = BillTextSummary(id=1, bill_text_id=10, summary="primary")
|
||||
newest_summary = BillTextSummary(id=2, bill_text_id=10, summary="newest")
|
||||
bill_text = BillText(
|
||||
id=10,
|
||||
bill_id=5,
|
||||
version_code="ih",
|
||||
summaries=[newest_summary],
|
||||
primary_summary=primary_summary,
|
||||
)
|
||||
bill = Bill(
|
||||
id=5,
|
||||
congress=119,
|
||||
bill_type="hr",
|
||||
number=1,
|
||||
bill_texts=[bill_text],
|
||||
)
|
||||
|
||||
selected = _select_bill_text_for_topic_extraction(bill)
|
||||
|
||||
assert selected is bill_text
|
||||
assert selected.default_summary() is primary_summary
|
||||
|
||||
|
||||
def test_select_bill_text_for_topic_extraction_uses_latest_summary_without_primary() -> None:
|
||||
newest_summary = BillTextSummary(id=2, bill_text_id=10, summary="newest")
|
||||
older_summary = BillTextSummary(id=1, bill_text_id=10, summary="older")
|
||||
bill_text = BillText(
|
||||
id=10,
|
||||
bill_id=5,
|
||||
version_code="ih",
|
||||
summaries=[newest_summary, older_summary],
|
||||
)
|
||||
bill = Bill(
|
||||
id=5,
|
||||
congress=119,
|
||||
bill_type="hr",
|
||||
number=1,
|
||||
bill_texts=[bill_text],
|
||||
)
|
||||
|
||||
selected = _select_bill_text_for_topic_extraction(bill)
|
||||
|
||||
assert selected is bill_text
|
||||
assert selected.default_summary() is newest_summary
|
||||
|
||||
|
||||
def test_create_select_bills_for_topic_extraction_uses_summary_exists_subquery() -> None:
|
||||
sql = _compile_sql(create_select_bills_for_topic_extraction())
|
||||
|
||||
assert "bill_text_summary" in sql
|
||||
assert "EXISTS" in sql
|
||||
assert "bill_text.summary" not in sql
|
||||
@@ -0,0 +1,58 @@
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
from pipelines.jobs.summarize_bills import (
|
||||
create_select_bill_texts_for_summarization,
|
||||
store_bill_summary_result,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress import BillText, BillTextSummary
|
||||
|
||||
|
||||
class FakeSession:
|
||||
def __init__(self) -> None:
|
||||
self.added: list[object] = []
|
||||
|
||||
def add(self, value: object) -> None:
|
||||
self.added.append(value)
|
||||
|
||||
|
||||
def _compile_sql(statement: object) -> str:
|
||||
return str(
|
||||
statement.compile(
|
||||
dialect=postgresql.dialect(),
|
||||
compile_kwargs={"literal_binds": True},
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_store_bill_summary_result_creates_summary_row() -> None:
|
||||
session = FakeSession()
|
||||
bill_text = BillText(id=10, bill_id=5, version_code="ih")
|
||||
|
||||
summary_row = store_bill_summary_result(
|
||||
session=session,
|
||||
bill_text=bill_text,
|
||||
summary="A summary",
|
||||
model="gpt-5.4-mini",
|
||||
)
|
||||
|
||||
assert session.added == [summary_row]
|
||||
assert isinstance(summary_row, BillTextSummary)
|
||||
assert summary_row.bill_text is bill_text
|
||||
assert summary_row.summary == "A summary"
|
||||
assert summary_row.summarization_model == "gpt-5.4-mini"
|
||||
assert summary_row.summarization_system_prompt_version == "v1.2"
|
||||
assert summary_row.summarization_user_prompt_version == "v1"
|
||||
|
||||
|
||||
def test_create_select_bill_texts_for_summarization_excludes_existing_summaries() -> None:
|
||||
sql = _compile_sql(create_select_bill_texts_for_summarization(force=False))
|
||||
|
||||
assert "bill_text_summary" in sql
|
||||
assert "NOT (EXISTS" in sql or "NOT EXISTS" in sql
|
||||
assert "bill_text.summary" not in sql
|
||||
|
||||
|
||||
def test_create_select_bill_texts_for_summarization_force_skips_summary_filter() -> None:
|
||||
sql = _compile_sql(create_select_bill_texts_for_summarization(force=True))
|
||||
|
||||
assert "bill_text_summary" not in sql
|
||||
Reference in New Issue
Block a user