allowing multiple summaries per bill text

This commit is contained in:
2026-05-08 18:30:07 -04:00
parent de9e59b5f4
commit d3fe6dba56
9 changed files with 483 additions and 25 deletions
@@ -0,0 +1,211 @@
"""move bill text summaries into a child table.
Revision ID: 4b2e1c9d8f70
Revises: b9360b0b0c22
Create Date: 2026-05-03 00:00:00.000000
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import sqlalchemy as sa
from alembic import op
from pipelines.orm import DataScienceDevBase
if TYPE_CHECKING:
from collections.abc import Sequence
# revision identifiers, used by Alembic.
revision: str = "4b2e1c9d8f70"
down_revision: str | None = "b9360b0b0c22"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
schema = DataScienceDevBase.schema_name
def upgrade() -> None:
"""Upgrade."""
op.create_table(
"bill_text_summary",
sa.Column("bill_text_id", sa.Integer(), nullable=False),
sa.Column("summary", sa.String(), nullable=False),
sa.Column("summarization_model", sa.String(), nullable=True),
sa.Column("summarization_user_prompt_version", sa.String(), nullable=True),
sa.Column("summarization_system_prompt_version", sa.String(), nullable=True),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column(
"created",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.Column(
"updated",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=False,
),
sa.ForeignKeyConstraint(
["bill_text_id"],
[f"{schema}.bill_text.id"],
name=op.f("fk_bill_text_summary_bill_text_id_bill_text"),
ondelete="CASCADE",
),
sa.PrimaryKeyConstraint("id", name=op.f("pk_bill_text_summary")),
schema=schema,
)
op.create_index(
"ix_bill_text_summary_bill_text_id",
"bill_text_summary",
["bill_text_id"],
unique=False,
schema=schema,
)
op.create_index(
"ix_bill_text_summary_bill_text_id_created",
"bill_text_summary",
["bill_text_id", "created"],
unique=False,
schema=schema,
)
op.add_column(
"bill_text",
sa.Column("primary_summary_id", sa.Integer(), nullable=True),
schema=schema,
)
op.create_foreign_key(
op.f("fk_bill_text_primary_summary_id_bill_text_summary"),
"bill_text",
"bill_text_summary",
["primary_summary_id"],
["id"],
source_schema=schema,
referent_schema=schema,
ondelete="SET NULL",
)
op.execute(
sa.text(
f"""
INSERT INTO {schema}.bill_text_summary (
bill_text_id,
summary,
summarization_model,
summarization_user_prompt_version,
summarization_system_prompt_version,
created,
updated
)
SELECT
bill_text.id,
bill_text.summary,
bill_text.summarization_model,
bill_text.summarization_user_prompt_version,
bill_text.summarization_system_prompt_version,
COALESCE(bill_text.updated, bill_text.created, now()),
COALESCE(bill_text.updated, bill_text.created, now())
FROM {schema}.bill_text
WHERE bill_text.summary IS NOT NULL
AND btrim(bill_text.summary) <> ''
"""
)
)
op.drop_column("bill_text", "summary", schema=schema)
op.drop_column("bill_text", "summarization_model", schema=schema)
op.drop_column("bill_text", "summarization_user_prompt_version", schema=schema)
op.drop_column("bill_text", "summarization_system_prompt_version", schema=schema)
def downgrade() -> None:
"""Downgrade."""
op.add_column(
"bill_text",
sa.Column("summarization_system_prompt_version", sa.String(), nullable=True),
schema=schema,
)
op.add_column(
"bill_text",
sa.Column("summarization_user_prompt_version", sa.String(), nullable=True),
schema=schema,
)
op.add_column(
"bill_text",
sa.Column("summarization_model", sa.String(), nullable=True),
schema=schema,
)
op.add_column(
"bill_text",
sa.Column("summary", sa.String(), nullable=True),
schema=schema,
)
op.execute(
sa.text(
f"""
WITH ranked AS (
SELECT
bts.*,
row_number() OVER (
PARTITION BY bts.bill_text_id
ORDER BY bts.created DESC, bts.id DESC
) AS rn
FROM {schema}.bill_text_summary AS bts
),
chosen AS (
SELECT
bill_text.id AS bill_text_id,
COALESCE(ps.summary, ls.summary) AS summary,
COALESCE(
ps.summarization_model,
ls.summarization_model
) AS summarization_model,
COALESCE(
ps.summarization_user_prompt_version,
ls.summarization_user_prompt_version
) AS summarization_user_prompt_version,
COALESCE(
ps.summarization_system_prompt_version,
ls.summarization_system_prompt_version
) AS summarization_system_prompt_version
FROM {schema}.bill_text
LEFT JOIN {schema}.bill_text_summary AS ps
ON ps.id = bill_text.primary_summary_id
LEFT JOIN ranked AS ls
ON ls.bill_text_id = bill_text.id
AND ls.rn = 1
)
UPDATE {schema}.bill_text
SET
summary = chosen.summary,
summarization_model = chosen.summarization_model,
summarization_user_prompt_version = chosen.summarization_user_prompt_version,
summarization_system_prompt_version = chosen.summarization_system_prompt_version
FROM chosen
WHERE chosen.bill_text_id = bill_text.id
"""
)
)
op.drop_constraint(
op.f("fk_bill_text_primary_summary_id_bill_text_summary"),
"bill_text",
schema=schema,
type_="foreignkey",
)
op.drop_column("bill_text", "primary_summary_id", schema=schema)
op.drop_index(
"ix_bill_text_summary_bill_text_id_created",
table_name="bill_text_summary",
schema=schema,
)
op.drop_index(
"ix_bill_text_summary_bill_text_id",
table_name="bill_text_summary",
schema=schema,
)
op.drop_table("bill_text_summary", schema=schema)