From 4b768049c06dbfbe323e3496799ebb1436ac3713 Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Tue, 21 Apr 2026 21:42:13 -0400
Subject: [PATCH] added summarization metadata to the DB

---
 ...ext_summarization_metadata_7d15f9b7c8a2.py | 55 +++++++++++++++++++
 .../orm/data_science_dev/congress/bill.py     |  3 +
 .../data_science_dev/congress/legislator.py   |  6 +-
 pipelines/tools/summarization_prompts.py      | 34 ------------
 4 files changed, 61 insertions(+), 37 deletions(-)
 create mode 100644 alembic/data_science_dev/versions/2026_04_22-add_bill_text_summarization_metadata_7d15f9b7c8a2.py
 delete mode 100644 pipelines/tools/summarization_prompts.py

diff --git a/alembic/data_science_dev/versions/2026_04_22-add_bill_text_summarization_metadata_7d15f9b7c8a2.py b/alembic/data_science_dev/versions/2026_04_22-add_bill_text_summarization_metadata_7d15f9b7c8a2.py
new file mode 100644
index 0000000..30141d2
--- /dev/null
+++ b/alembic/data_science_dev/versions/2026_04_22-add_bill_text_summarization_metadata_7d15f9b7c8a2.py
@@ -0,0 +1,55 @@
+"""add bill_text summarization metadata.
+
+Revision ID: 7d15f9b7c8a2
+Revises: ef4bc5411176
+Create Date: 2026-04-22 00:00:00.000000
+
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import sqlalchemy as sa
+
+from alembic import op
+from pipelines.orm import DataScienceDevBase
+
+if TYPE_CHECKING:
+    from collections.abc import Sequence
+
+# revision identifiers, used by Alembic.
+revision: str = "7d15f9b7c8a2"
+down_revision: str | None = "ef4bc5411176"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+schema = DataScienceDevBase.schema_name
+
+summarization_model
+def upgrade() -> None:
+    """Upgrade."""
+    op.add_column(
+        "bill_text",
+        sa.Column("summarization_model", sa.String(), nullable=True),
+        schema=schema,
+    )
+    op.add_column(
+        "bill_text",
+        sa.Column("summarization_user_prompt_version", sa.String(), nullable=True),
+        schema=schema,
+    )
+    op.add_column(
+        "bill_text",
+        sa.Column("summarization_system_prompt_version", sa.String(), nullable=True),
+        schema=schema,
+    )
+
+
+def downgrade() -> None:
+    """Downgrade."""
+    op.drop_column(
+        "bill_text", "summarization_system_prompt_version", schema=schema
+    )
+    op.drop_column("bill_text", "summarization_user_prompt_version", schema=schema)
+    op.drop_column("bill_text", "summarization_model", schema=schema)
diff --git a/pipelines/orm/data_science_dev/congress/bill.py b/pipelines/orm/data_science_dev/congress/bill.py
index e9a11f0..0ace4bf 100644
--- a/pipelines/orm/data_science_dev/congress/bill.py
+++ b/pipelines/orm/data_science_dev/congress/bill.py
@@ -86,6 +86,9 @@ class BillText(DataScienceDevTableBase):
     version_name: Mapped[str | None]
     text_content: Mapped[str | None]
     summary: Mapped[str | None]
+    summarization_model: Mapped[str | None]
+    summarization_user_prompt_version: Mapped[str | None]
+    summarization_system_prompt_version: Mapped[str | None]
     date: Mapped[date | None]
 
     bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
diff --git a/pipelines/orm/data_science_dev/congress/legislator.py b/pipelines/orm/data_science_dev/congress/legislator.py
index 474b8f4..0176f47 100644
--- a/pipelines/orm/data_science_dev/congress/legislator.py
+++ b/pipelines/orm/data_science_dev/congress/legislator.py
@@ -104,7 +104,6 @@ class LegislatorScore(DataScienceDevTableBase):
     legislator: Mapped[Legislator] = relationship(back_populates="scores")
 
 
-
 class LegislatorBillScore(DataScienceDevTableBase):
     """Per-bill source score used to maintain aggregate legislator scores."""
 
@@ -136,6 +135,7 @@ class LegislatorBillScore(DataScienceDevTableBase):
     score: Mapped[float]
 
     bill: Mapped[Bill] = relationship(back_populates="legislator_bill_scores")
-    bill_topic: Mapped[BillTopic] = relationship(back_populates="legislator_bill_scores")
+    bill_topic: Mapped[BillTopic] = relationship(
+        back_populates="legislator_bill_scores",
+    )
     legislator: Mapped[Legislator] = relationship(back_populates="bill_scores")
-
diff --git a/pipelines/tools/summarization_prompts.py b/pipelines/tools/summarization_prompts.py
deleted file mode 100644
index bfdd5a5..0000000
--- a/pipelines/tools/summarization_prompts.py
+++ /dev/null
@@ -1,34 +0,0 @@
-SUMMARIZATION_SYSTEM_PROMPT = """You are a legislative analyst extracting policy substance from Congressional bill text.
-
-Your job is to compress a bill into a dense, neutral structured summary that captures every distinct policy action — including secondary effects that might be buried in subsections.
-
-EXTRACTION RULES:
-- IGNORE: whereas clauses, congressional findings that are purely political statements, recitals, preambles, citations of existing law by number alone, and procedural boilerplate.
-- FOCUS ON: operative verbs — what the bill SHALL do, PROHIBIT, REQUIRE, AUTHORIZE, AMEND, APPROPRIATE, or ESTABLISH.
-- SURFACE ALL THREADS: If the bill touches multiple policy areas, list each thread separately. Do not collapse them.
-- BE CONCRETE: Name the affected population, the mechanism, and the direction (expands/restricts/maintains).
-- STAY NEUTRAL: No political framing. Describe what the text does, not what its sponsors claim it does.
-
-OUTPUT FORMAT — plain structured text, not JSON:
-
-OPERATIVE ACTIONS:
-[Numbered list of what the bill actually does, one action per line, max 20 words each]
-
-AFFECTED POPULATIONS:
-[Who gains something, who loses something, or whose behavior is regulated]
-
-MECHANISMS:
-[How it works: new funding, mandate, prohibition, amendment to existing statute, grant program, study commission, etc.]
-
-POLICY THREADS:
-[List each distinct policy domain this bill touches, even minor ones. Use plain language, not domain codes.]
-
-SYMBOLIC/PROCEDURAL ONLY:
-[Yes or No — is this bill primarily a resolution, designation, or awareness declaration with no operative effect?]
-
-LENGTH TARGET: 150-250 words total. Be ruthless about cutting. Density over completeness."""
-
-SUMMARIZATION_USER_TEMPLATE = """Summarize the following Congressional bill according to your instructions.
-
-BILL TEXT:
-{text_content}"""