added summarization metadata to the DB

This commit is contained in:
2026-04-21 21:42:13 -04:00
parent 674edafe94
commit 4b768049c0
4 changed files with 61 additions and 37 deletions
@@ -86,6 +86,9 @@ class BillText(DataScienceDevTableBase):
version_name: Mapped[str | None]
text_content: Mapped[str | None]
summary: Mapped[str | None]
summarization_model: Mapped[str | None]
summarization_user_prompt_version: Mapped[str | None]
summarization_system_prompt_version: Mapped[str | None]
date: Mapped[date | None]
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
@@ -104,7 +104,6 @@ class LegislatorScore(DataScienceDevTableBase):
legislator: Mapped[Legislator] = relationship(back_populates="scores")
class LegislatorBillScore(DataScienceDevTableBase):
"""Per-bill source score used to maintain aggregate legislator scores."""
@@ -136,6 +135,7 @@ class LegislatorBillScore(DataScienceDevTableBase):
score: Mapped[float]
bill: Mapped[Bill] = relationship(back_populates="legislator_bill_scores")
bill_topic: Mapped[BillTopic] = relationship(back_populates="legislator_bill_scores")
bill_topic: Mapped[BillTopic] = relationship(
back_populates="legislator_bill_scores",
)
legislator: Mapped[Legislator] = relationship(back_populates="bill_scores")
-34
View File
@@ -1,34 +0,0 @@
SUMMARIZATION_SYSTEM_PROMPT = """You are a legislative analyst extracting policy substance from Congressional bill text.
Your job is to compress a bill into a dense, neutral structured summary that captures every distinct policy action — including secondary effects that might be buried in subsections.
EXTRACTION RULES:
- IGNORE: whereas clauses, congressional findings that are purely political statements, recitals, preambles, citations of existing law by number alone, and procedural boilerplate.
- FOCUS ON: operative verbs — what the bill SHALL do, PROHIBIT, REQUIRE, AUTHORIZE, AMEND, APPROPRIATE, or ESTABLISH.
- SURFACE ALL THREADS: If the bill touches multiple policy areas, list each thread separately. Do not collapse them.
- BE CONCRETE: Name the affected population, the mechanism, and the direction (expands/restricts/maintains).
- STAY NEUTRAL: No political framing. Describe what the text does, not what its sponsors claim it does.
OUTPUT FORMAT — plain structured text, not JSON:
OPERATIVE ACTIONS:
[Numbered list of what the bill actually does, one action per line, max 20 words each]
AFFECTED POPULATIONS:
[Who gains something, who loses something, or whose behavior is regulated]
MECHANISMS:
[How it works: new funding, mandate, prohibition, amendment to existing statute, grant program, study commission, etc.]
POLICY THREADS:
[List each distinct policy domain this bill touches, even minor ones. Use plain language, not domain codes.]
SYMBOLIC/PROCEDURAL ONLY:
[Yes or No — is this bill primarily a resolution, designation, or awareness declaration with no operative effect?]
LENGTH TARGET: 150-250 words total. Be ruthless about cutting. Density over completeness."""
SUMMARIZATION_USER_TEMPLATE = """Summarize the following Congressional bill according to your instructions.
BILL TEXT:
{text_content}"""