added summarization metadata to the DB
This commit is contained in:
@@ -0,0 +1,55 @@
|
|||||||
|
"""add bill_text summarization metadata.
|
||||||
|
|
||||||
|
Revision ID: 7d15f9b7c8a2
|
||||||
|
Revises: ef4bc5411176
|
||||||
|
Create Date: 2026-04-22 00:00:00.000000
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
from pipelines.orm import DataScienceDevBase
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "7d15f9b7c8a2"
|
||||||
|
down_revision: str | None = "ef4bc5411176"
|
||||||
|
branch_labels: str | Sequence[str] | None = None
|
||||||
|
depends_on: str | Sequence[str] | None = None
|
||||||
|
|
||||||
|
schema = DataScienceDevBase.schema_name
|
||||||
|
|
||||||
|
summarization_model
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Upgrade."""
|
||||||
|
op.add_column(
|
||||||
|
"bill_text",
|
||||||
|
sa.Column("summarization_model", sa.String(), nullable=True),
|
||||||
|
schema=schema,
|
||||||
|
)
|
||||||
|
op.add_column(
|
||||||
|
"bill_text",
|
||||||
|
sa.Column("summarization_user_prompt_version", sa.String(), nullable=True),
|
||||||
|
schema=schema,
|
||||||
|
)
|
||||||
|
op.add_column(
|
||||||
|
"bill_text",
|
||||||
|
sa.Column("summarization_system_prompt_version", sa.String(), nullable=True),
|
||||||
|
schema=schema,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Downgrade."""
|
||||||
|
op.drop_column(
|
||||||
|
"bill_text", "summarization_system_prompt_version", schema=schema
|
||||||
|
)
|
||||||
|
op.drop_column("bill_text", "summarization_user_prompt_version", schema=schema)
|
||||||
|
op.drop_column("bill_text", "summarization_model", schema=schema)
|
||||||
@@ -86,6 +86,9 @@ class BillText(DataScienceDevTableBase):
|
|||||||
version_name: Mapped[str | None]
|
version_name: Mapped[str | None]
|
||||||
text_content: Mapped[str | None]
|
text_content: Mapped[str | None]
|
||||||
summary: Mapped[str | None]
|
summary: Mapped[str | None]
|
||||||
|
summarization_model: Mapped[str | None]
|
||||||
|
summarization_user_prompt_version: Mapped[str | None]
|
||||||
|
summarization_system_prompt_version: Mapped[str | None]
|
||||||
date: Mapped[date | None]
|
date: Mapped[date | None]
|
||||||
|
|
||||||
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
|
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
|
||||||
|
|||||||
@@ -104,7 +104,6 @@ class LegislatorScore(DataScienceDevTableBase):
|
|||||||
legislator: Mapped[Legislator] = relationship(back_populates="scores")
|
legislator: Mapped[Legislator] = relationship(back_populates="scores")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class LegislatorBillScore(DataScienceDevTableBase):
|
class LegislatorBillScore(DataScienceDevTableBase):
|
||||||
"""Per-bill source score used to maintain aggregate legislator scores."""
|
"""Per-bill source score used to maintain aggregate legislator scores."""
|
||||||
|
|
||||||
@@ -136,6 +135,7 @@ class LegislatorBillScore(DataScienceDevTableBase):
|
|||||||
score: Mapped[float]
|
score: Mapped[float]
|
||||||
|
|
||||||
bill: Mapped[Bill] = relationship(back_populates="legislator_bill_scores")
|
bill: Mapped[Bill] = relationship(back_populates="legislator_bill_scores")
|
||||||
bill_topic: Mapped[BillTopic] = relationship(back_populates="legislator_bill_scores")
|
bill_topic: Mapped[BillTopic] = relationship(
|
||||||
|
back_populates="legislator_bill_scores",
|
||||||
|
)
|
||||||
legislator: Mapped[Legislator] = relationship(back_populates="bill_scores")
|
legislator: Mapped[Legislator] = relationship(back_populates="bill_scores")
|
||||||
|
|
||||||
|
|||||||
@@ -1,34 +0,0 @@
|
|||||||
SUMMARIZATION_SYSTEM_PROMPT = """You are a legislative analyst extracting policy substance from Congressional bill text.
|
|
||||||
|
|
||||||
Your job is to compress a bill into a dense, neutral structured summary that captures every distinct policy action — including secondary effects that might be buried in subsections.
|
|
||||||
|
|
||||||
EXTRACTION RULES:
|
|
||||||
- IGNORE: whereas clauses, congressional findings that are purely political statements, recitals, preambles, citations of existing law by number alone, and procedural boilerplate.
|
|
||||||
- FOCUS ON: operative verbs — what the bill SHALL do, PROHIBIT, REQUIRE, AUTHORIZE, AMEND, APPROPRIATE, or ESTABLISH.
|
|
||||||
- SURFACE ALL THREADS: If the bill touches multiple policy areas, list each thread separately. Do not collapse them.
|
|
||||||
- BE CONCRETE: Name the affected population, the mechanism, and the direction (expands/restricts/maintains).
|
|
||||||
- STAY NEUTRAL: No political framing. Describe what the text does, not what its sponsors claim it does.
|
|
||||||
|
|
||||||
OUTPUT FORMAT — plain structured text, not JSON:
|
|
||||||
|
|
||||||
OPERATIVE ACTIONS:
|
|
||||||
[Numbered list of what the bill actually does, one action per line, max 20 words each]
|
|
||||||
|
|
||||||
AFFECTED POPULATIONS:
|
|
||||||
[Who gains something, who loses something, or whose behavior is regulated]
|
|
||||||
|
|
||||||
MECHANISMS:
|
|
||||||
[How it works: new funding, mandate, prohibition, amendment to existing statute, grant program, study commission, etc.]
|
|
||||||
|
|
||||||
POLICY THREADS:
|
|
||||||
[List each distinct policy domain this bill touches, even minor ones. Use plain language, not domain codes.]
|
|
||||||
|
|
||||||
SYMBOLIC/PROCEDURAL ONLY:
|
|
||||||
[Yes or No — is this bill primarily a resolution, designation, or awareness declaration with no operative effect?]
|
|
||||||
|
|
||||||
LENGTH TARGET: 150-250 words total. Be ruthless about cutting. Density over completeness."""
|
|
||||||
|
|
||||||
SUMMARIZATION_USER_TEMPLATE = """Summarize the following Congressional bill according to your instructions.
|
|
||||||
|
|
||||||
BILL TEXT:
|
|
||||||
{text_content}"""
|
|
||||||
Reference in New Issue
Block a user