allowing multiple summaries per bill text
This commit is contained in:
@@ -0,0 +1,211 @@
|
||||
"""move bill text summaries into a child table.
|
||||
|
||||
Revision ID: 4b2e1c9d8f70
|
||||
Revises: b9360b0b0c22
|
||||
Create Date: 2026-05-03 00:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from alembic import op
|
||||
from pipelines.orm import DataScienceDevBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "4b2e1c9d8f70"
|
||||
down_revision: str | None = "b9360b0b0c22"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
schema = DataScienceDevBase.schema_name
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade."""
|
||||
op.create_table(
|
||||
"bill_text_summary",
|
||||
sa.Column("bill_text_id", sa.Integer(), nullable=False),
|
||||
sa.Column("summary", sa.String(), nullable=False),
|
||||
sa.Column("summarization_model", sa.String(), nullable=True),
|
||||
sa.Column("summarization_user_prompt_version", sa.String(), nullable=True),
|
||||
sa.Column("summarization_system_prompt_version", sa.String(), nullable=True),
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column(
|
||||
"created",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
"updated",
|
||||
sa.DateTime(timezone=True),
|
||||
server_default=sa.text("now()"),
|
||||
nullable=False,
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["bill_text_id"],
|
||||
[f"{schema}.bill_text.id"],
|
||||
name=op.f("fk_bill_text_summary_bill_text_id_bill_text"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_bill_text_summary")),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_bill_text_summary_bill_text_id",
|
||||
"bill_text_summary",
|
||||
["bill_text_id"],
|
||||
unique=False,
|
||||
schema=schema,
|
||||
)
|
||||
op.create_index(
|
||||
"ix_bill_text_summary_bill_text_id_created",
|
||||
"bill_text_summary",
|
||||
["bill_text_id", "created"],
|
||||
unique=False,
|
||||
schema=schema,
|
||||
)
|
||||
op.add_column(
|
||||
"bill_text",
|
||||
sa.Column("primary_summary_id", sa.Integer(), nullable=True),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_foreign_key(
|
||||
op.f("fk_bill_text_primary_summary_id_bill_text_summary"),
|
||||
"bill_text",
|
||||
"bill_text_summary",
|
||||
["primary_summary_id"],
|
||||
["id"],
|
||||
source_schema=schema,
|
||||
referent_schema=schema,
|
||||
ondelete="SET NULL",
|
||||
)
|
||||
|
||||
op.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
INSERT INTO {schema}.bill_text_summary (
|
||||
bill_text_id,
|
||||
summary,
|
||||
summarization_model,
|
||||
summarization_user_prompt_version,
|
||||
summarization_system_prompt_version,
|
||||
created,
|
||||
updated
|
||||
)
|
||||
SELECT
|
||||
bill_text.id,
|
||||
bill_text.summary,
|
||||
bill_text.summarization_model,
|
||||
bill_text.summarization_user_prompt_version,
|
||||
bill_text.summarization_system_prompt_version,
|
||||
COALESCE(bill_text.updated, bill_text.created, now()),
|
||||
COALESCE(bill_text.updated, bill_text.created, now())
|
||||
FROM {schema}.bill_text
|
||||
WHERE bill_text.summary IS NOT NULL
|
||||
AND btrim(bill_text.summary) <> ''
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
op.drop_column("bill_text", "summary", schema=schema)
|
||||
op.drop_column("bill_text", "summarization_model", schema=schema)
|
||||
op.drop_column("bill_text", "summarization_user_prompt_version", schema=schema)
|
||||
op.drop_column("bill_text", "summarization_system_prompt_version", schema=schema)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade."""
|
||||
op.add_column(
|
||||
"bill_text",
|
||||
sa.Column("summarization_system_prompt_version", sa.String(), nullable=True),
|
||||
schema=schema,
|
||||
)
|
||||
op.add_column(
|
||||
"bill_text",
|
||||
sa.Column("summarization_user_prompt_version", sa.String(), nullable=True),
|
||||
schema=schema,
|
||||
)
|
||||
op.add_column(
|
||||
"bill_text",
|
||||
sa.Column("summarization_model", sa.String(), nullable=True),
|
||||
schema=schema,
|
||||
)
|
||||
op.add_column(
|
||||
"bill_text",
|
||||
sa.Column("summary", sa.String(), nullable=True),
|
||||
schema=schema,
|
||||
)
|
||||
|
||||
op.execute(
|
||||
sa.text(
|
||||
f"""
|
||||
WITH ranked AS (
|
||||
SELECT
|
||||
bts.*,
|
||||
row_number() OVER (
|
||||
PARTITION BY bts.bill_text_id
|
||||
ORDER BY bts.created DESC, bts.id DESC
|
||||
) AS rn
|
||||
FROM {schema}.bill_text_summary AS bts
|
||||
),
|
||||
chosen AS (
|
||||
SELECT
|
||||
bill_text.id AS bill_text_id,
|
||||
COALESCE(ps.summary, ls.summary) AS summary,
|
||||
COALESCE(
|
||||
ps.summarization_model,
|
||||
ls.summarization_model
|
||||
) AS summarization_model,
|
||||
COALESCE(
|
||||
ps.summarization_user_prompt_version,
|
||||
ls.summarization_user_prompt_version
|
||||
) AS summarization_user_prompt_version,
|
||||
COALESCE(
|
||||
ps.summarization_system_prompt_version,
|
||||
ls.summarization_system_prompt_version
|
||||
) AS summarization_system_prompt_version
|
||||
FROM {schema}.bill_text
|
||||
LEFT JOIN {schema}.bill_text_summary AS ps
|
||||
ON ps.id = bill_text.primary_summary_id
|
||||
LEFT JOIN ranked AS ls
|
||||
ON ls.bill_text_id = bill_text.id
|
||||
AND ls.rn = 1
|
||||
)
|
||||
UPDATE {schema}.bill_text
|
||||
SET
|
||||
summary = chosen.summary,
|
||||
summarization_model = chosen.summarization_model,
|
||||
summarization_user_prompt_version = chosen.summarization_user_prompt_version,
|
||||
summarization_system_prompt_version = chosen.summarization_system_prompt_version
|
||||
FROM chosen
|
||||
WHERE chosen.bill_text_id = bill_text.id
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
||||
op.drop_constraint(
|
||||
op.f("fk_bill_text_primary_summary_id_bill_text_summary"),
|
||||
"bill_text",
|
||||
schema=schema,
|
||||
type_="foreignkey",
|
||||
)
|
||||
op.drop_column("bill_text", "primary_summary_id", schema=schema)
|
||||
op.drop_index(
|
||||
"ix_bill_text_summary_bill_text_id_created",
|
||||
table_name="bill_text_summary",
|
||||
schema=schema,
|
||||
)
|
||||
op.drop_index(
|
||||
"ix_bill_text_summary_bill_text_id",
|
||||
table_name="bill_text_summary",
|
||||
schema=schema,
|
||||
)
|
||||
op.drop_table("bill_text_summary", schema=schema)
|
||||
Reference in New Issue
Block a user