created scoring tables and basic logic
This commit is contained in:
@@ -1,8 +1,15 @@
|
||||
"""init."""
|
||||
"""Congress ORM models."""
|
||||
|
||||
from pipelines.orm.data_science_dev.congress.bill import Bill, BillText
|
||||
from pipelines.orm.data_science_dev.congress.bill import (
|
||||
Bill,
|
||||
BillText,
|
||||
BillTopic,
|
||||
BillTopicPosition,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress.legislator import (
|
||||
LegislatorBillScore,
|
||||
Legislator,
|
||||
LegislatorScore,
|
||||
LegislatorSocialMedia,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress.vote import Vote, VoteRecord
|
||||
@@ -10,7 +17,11 @@ from pipelines.orm.data_science_dev.congress.vote import Vote, VoteRecord
|
||||
__all__ = [
|
||||
"Bill",
|
||||
"BillText",
|
||||
"BillTopic",
|
||||
"BillTopicPosition",
|
||||
"Legislator",
|
||||
"LegislatorBillScore",
|
||||
"LegislatorScore",
|
||||
"LegislatorSocialMedia",
|
||||
"Vote",
|
||||
"VoteRecord",
|
||||
|
||||
@@ -2,22 +2,37 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from datetime import date, datetime
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy import DateTime, Enum, ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipelines.orm.data_science_dev.congress.legislator import LegislatorBillScore
|
||||
from pipelines.orm.data_science_dev.congress.vote import Vote
|
||||
|
||||
|
||||
class BillTopicPosition(StrEnum):
|
||||
"""Whether a yes vote on a bill is for or against a topic."""
|
||||
|
||||
FOR = "for"
|
||||
AGAINST = "against"
|
||||
|
||||
|
||||
class Bill(DataScienceDevTableBase):
|
||||
"""Legislation with congress number, type, titles, status, and sponsor."""
|
||||
|
||||
__tablename__ = "bill"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress", "bill_type", "number", name="uq_bill_congress_type_number"
|
||||
),
|
||||
Index("ix_bill_congress", "congress"),
|
||||
)
|
||||
|
||||
congress: Mapped[int]
|
||||
bill_type: Mapped[str]
|
||||
@@ -33,6 +48,7 @@ class Bill(DataScienceDevTableBase):
|
||||
sponsor_bioguide_id: Mapped[str | None]
|
||||
|
||||
subjects_top_term: Mapped[str | None]
|
||||
score_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
votes: Mapped[list[Vote]] = relationship(
|
||||
"Vote",
|
||||
@@ -43,12 +59,15 @@ class Bill(DataScienceDevTableBase):
|
||||
back_populates="bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress", "bill_type", "number", name="uq_bill_congress_type_number"
|
||||
),
|
||||
Index("ix_bill_congress", "congress"),
|
||||
topics: Mapped[list[BillTopic]] = relationship(
|
||||
"BillTopic",
|
||||
back_populates="bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
legislator_bill_scores: Mapped[list[LegislatorBillScore]] = relationship(
|
||||
"LegislatorBillScore",
|
||||
back_populates="bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
@@ -56,17 +75,50 @@ class BillText(DataScienceDevTableBase):
|
||||
"""Stores different text versions of a bill (introduced, enrolled, etc.)."""
|
||||
|
||||
__tablename__ = "bill_text"
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
version_code: Mapped[str]
|
||||
version_name: Mapped[str | None]
|
||||
text_content: Mapped[str | None]
|
||||
date: Mapped[date | None]
|
||||
|
||||
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"bill_id", "version_code", name="uq_bill_text_bill_id_version_code"
|
||||
),
|
||||
)
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
version_code: Mapped[str]
|
||||
version_name: Mapped[str | None]
|
||||
text_content: Mapped[str | None]
|
||||
summary: Mapped[str | None]
|
||||
date: Mapped[date | None]
|
||||
|
||||
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
|
||||
|
||||
|
||||
class BillTopic(DataScienceDevTableBase):
|
||||
"""One bill stance on one topic used to score roll-call votes."""
|
||||
|
||||
__tablename__ = "bill_topic"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"bill_id",
|
||||
"topic",
|
||||
"support_position",
|
||||
name="uq_bill_topic_bill_id_topic_support_position",
|
||||
),
|
||||
Index("ix_bill_topic_topic", "topic"),
|
||||
)
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
topic: Mapped[str]
|
||||
support_position: Mapped[BillTopicPosition] = mapped_column(
|
||||
Enum(
|
||||
BillTopicPosition,
|
||||
values_callable=lambda enum_cls: [member.value for member in enum_cls],
|
||||
native_enum=False,
|
||||
name="bill_topic_position",
|
||||
)
|
||||
)
|
||||
|
||||
bill: Mapped[Bill] = relationship("Bill", back_populates="topics")
|
||||
legislator_bill_scores: Mapped[list[LegislatorBillScore]] = relationship(
|
||||
"LegislatorBillScore",
|
||||
back_populates="bill_topic",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
@@ -5,12 +5,13 @@ from __future__ import annotations
|
||||
from datetime import date
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import ForeignKey, Text
|
||||
from sqlalchemy import ForeignKey, Index, Text, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipelines.orm.data_science_dev.congress.bill import Bill, BillTopic
|
||||
from pipelines.orm.data_science_dev.congress.vote import VoteRecord
|
||||
|
||||
|
||||
@@ -50,6 +51,16 @@ class Legislator(DataScienceDevTableBase):
|
||||
back_populates="legislator",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
scores: Mapped[list[LegislatorScore]] = relationship(
|
||||
"LegislatorScore",
|
||||
back_populates="legislator",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
bill_scores: Mapped[list[LegislatorBillScore]] = relationship(
|
||||
"LegislatorBillScore",
|
||||
back_populates="legislator",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class LegislatorSocialMedia(DataScienceDevTableBase):
|
||||
@@ -66,3 +77,65 @@ class LegislatorSocialMedia(DataScienceDevTableBase):
|
||||
legislator: Mapped[Legislator] = relationship(
|
||||
back_populates="social_media_accounts"
|
||||
)
|
||||
|
||||
|
||||
class LegislatorScore(DataScienceDevTableBase):
|
||||
"""Computed topic score for a legislator in one calendar year."""
|
||||
|
||||
__tablename__ = "legislator_score"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"legislator_id",
|
||||
"year",
|
||||
"topic",
|
||||
name="uq_legislator_score_legislator_id_year_topic",
|
||||
),
|
||||
Index("ix_legislator_score_year_topic", "year", "topic"),
|
||||
)
|
||||
|
||||
legislator_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.legislator.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
year: Mapped[int]
|
||||
topic: Mapped[str]
|
||||
score: Mapped[float]
|
||||
|
||||
legislator: Mapped[Legislator] = relationship(back_populates="scores")
|
||||
|
||||
|
||||
|
||||
class LegislatorBillScore(DataScienceDevTableBase):
|
||||
"""Per-bill source score used to maintain aggregate legislator scores."""
|
||||
|
||||
__tablename__ = "legislator_bill_score"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"bill_topic_id",
|
||||
"legislator_id",
|
||||
"year",
|
||||
name="uq_legislator_bill_score_bill_topic_id_legislator_id_year",
|
||||
),
|
||||
Index("ix_legislator_bill_score_year_topic", "year", "topic"),
|
||||
)
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.bill.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
bill_topic_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.bill_topic.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
legislator_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.legislator.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
year: Mapped[int]
|
||||
topic: Mapped[str]
|
||||
score: Mapped[float]
|
||||
|
||||
bill: Mapped[Bill] = relationship(back_populates="legislator_bill_scores")
|
||||
bill_topic: Mapped[BillTopic] = relationship(back_populates="legislator_bill_scores")
|
||||
legislator: Mapped[Legislator] = relationship(back_populates="bill_scores")
|
||||
|
||||
|
||||
@@ -44,6 +44,17 @@ class Vote(DataScienceDevTableBase):
|
||||
"""Roll call votes with counts and optional bill linkage."""
|
||||
|
||||
__tablename__ = "vote"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress",
|
||||
"chamber",
|
||||
"session",
|
||||
"number",
|
||||
name="uq_vote_congress_chamber_session_number",
|
||||
),
|
||||
Index("ix_vote_date", "vote_date"),
|
||||
Index("ix_vote_congress_chamber", "congress", "chamber"),
|
||||
)
|
||||
|
||||
congress: Mapped[int]
|
||||
chamber: Mapped[str]
|
||||
@@ -71,14 +82,3 @@ class Vote(DataScienceDevTableBase):
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress",
|
||||
"chamber",
|
||||
"session",
|
||||
"number",
|
||||
name="uq_vote_congress_chamber_session_number",
|
||||
),
|
||||
Index("ix_vote_date", "vote_date"),
|
||||
Index("ix_vote_congress_chamber", "congress", "chamber"),
|
||||
)
|
||||
|
||||
@@ -2,14 +2,28 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pipelines.orm.data_science_dev.congress import Bill, BillText, Legislator, Vote, VoteRecord
|
||||
from pipelines.orm.data_science_dev.congress import (
|
||||
Bill,
|
||||
BillText,
|
||||
BillTopic,
|
||||
BillTopicPosition,
|
||||
Legislator,
|
||||
LegislatorBillScore,
|
||||
LegislatorScore,
|
||||
Vote,
|
||||
VoteRecord,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.posts import partitions # noqa: F401 — registers partition classes in metadata
|
||||
from pipelines.orm.data_science_dev.posts.tables import Posts
|
||||
|
||||
__all__ = [
|
||||
"Bill",
|
||||
"BillText",
|
||||
"BillTopic",
|
||||
"BillTopicPosition",
|
||||
"Legislator",
|
||||
"LegislatorBillScore",
|
||||
"LegislatorScore",
|
||||
"Posts",
|
||||
"Vote",
|
||||
"VoteRecord",
|
||||
|
||||
@@ -0,0 +1,394 @@
|
||||
"""Calculate legislator topic scores from bill topic metadata and roll-call votes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from datetime import UTC, datetime
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import Integer, delete, extract, func, select, tuple_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from pipelines.orm.common import get_postgres_engine
|
||||
from pipelines.orm.data_science_dev.congress import (
|
||||
Bill,
|
||||
BillTopic,
|
||||
BillTopicPosition,
|
||||
LegislatorBillScore,
|
||||
LegislatorScore,
|
||||
Vote,
|
||||
VoteRecord,
|
||||
)
|
||||
|
||||
SUPPORT_VOTES = frozenset({"yea", "aye", "yes"})
|
||||
OPPOSE_VOTES = frozenset({"nay", "no"})
|
||||
NEUTRAL_SCORE = 50.0
|
||||
SUPPORT_SCORE = 100.0
|
||||
OPPOSE_SCORE = 1.0
|
||||
ScoreKey = tuple[int, int, str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VoteScoreInput:
|
||||
"""Raw vote data needed for one bill/topic/legislator scoring event."""
|
||||
|
||||
bill_id: int
|
||||
bill_topic_id: int
|
||||
legislator_id: int
|
||||
year: int
|
||||
topic: str
|
||||
support_position: str | BillTopicPosition
|
||||
vote_position: str | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ComputedBillScore:
|
||||
"""Per-bill source score for one legislator/year/topic."""
|
||||
|
||||
bill_id: int
|
||||
bill_topic_id: int
|
||||
legislator_id: int
|
||||
year: int
|
||||
topic: str
|
||||
score: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScoreRunResult:
|
||||
"""Summary for a scoring job run."""
|
||||
|
||||
processed_bills: int
|
||||
bill_score_rows: int
|
||||
aggregate_score_rows: int
|
||||
|
||||
|
||||
def score_vote(
|
||||
vote_position: str | None,
|
||||
support_position: str | BillTopicPosition | None,
|
||||
) -> float | None:
|
||||
"""Return a 1-100 score where 50 is neutral."""
|
||||
stance = normalize_support_position(support_position)
|
||||
if stance is None:
|
||||
return None
|
||||
if vote_position is None:
|
||||
return NEUTRAL_SCORE
|
||||
|
||||
vote = vote_position.strip().casefold()
|
||||
if vote not in SUPPORT_VOTES | OPPOSE_VOTES:
|
||||
return NEUTRAL_SCORE
|
||||
|
||||
voted_yes = vote in SUPPORT_VOTES
|
||||
yes_is_for_topic = stance is BillTopicPosition.FOR
|
||||
return SUPPORT_SCORE if voted_yes == yes_is_for_topic else OPPOSE_SCORE
|
||||
|
||||
|
||||
def normalize_support_position(
|
||||
support_position: str | BillTopicPosition | None,
|
||||
) -> BillTopicPosition | None:
|
||||
"""Normalize a DB enum/string stance value."""
|
||||
if support_position is None:
|
||||
return None
|
||||
if isinstance(support_position, BillTopicPosition):
|
||||
return support_position
|
||||
value = support_position.strip().casefold()
|
||||
try:
|
||||
return BillTopicPosition(value)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def calculate_bill_score_values(
|
||||
vote_inputs: Iterable[VoteScoreInput],
|
||||
) -> list[ComputedBillScore]:
|
||||
"""Aggregate raw vote inputs into per-bill source scores."""
|
||||
grouped: dict[tuple[int, int, int, int, str], list[float]] = defaultdict(list)
|
||||
for vote_input in vote_inputs:
|
||||
score = score_vote(vote_input.vote_position, vote_input.support_position)
|
||||
if score is None:
|
||||
continue
|
||||
key = (
|
||||
vote_input.bill_id,
|
||||
vote_input.bill_topic_id,
|
||||
vote_input.legislator_id,
|
||||
vote_input.year,
|
||||
vote_input.topic,
|
||||
)
|
||||
grouped[key].append(score)
|
||||
|
||||
return [
|
||||
ComputedBillScore(
|
||||
bill_id=bill_id,
|
||||
bill_topic_id=bill_topic_id,
|
||||
legislator_id=legislator_id,
|
||||
year=year,
|
||||
topic=topic,
|
||||
score=sum(scores) / len(scores),
|
||||
)
|
||||
for (bill_id, bill_topic_id, legislator_id, year, topic), scores in sorted(
|
||||
grouped.items()
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def calculate_and_store_legislator_scores(
|
||||
session: Session,
|
||||
*,
|
||||
congress: int | None = None,
|
||||
bill_ids: list[int] | None = None,
|
||||
topics: list[str] | None = None,
|
||||
force: bool = False,
|
||||
limit: int | None = None,
|
||||
) -> ScoreRunResult:
|
||||
"""Score selected bills and refresh aggregate legislator score rows."""
|
||||
selected_bill_ids = select_bill_ids_to_score(
|
||||
session,
|
||||
congress=congress,
|
||||
bill_ids=bill_ids,
|
||||
topics=topics,
|
||||
force=force,
|
||||
limit=limit,
|
||||
)
|
||||
result = ScoreRunResult(
|
||||
processed_bills=0,
|
||||
bill_score_rows=0,
|
||||
aggregate_score_rows=0,
|
||||
)
|
||||
for bill_id in selected_bill_ids:
|
||||
bill_score_rows, aggregate_score_rows = score_bill(
|
||||
session,
|
||||
bill_id=bill_id,
|
||||
topics=topics,
|
||||
mark_processed=topics is None,
|
||||
)
|
||||
result = ScoreRunResult(
|
||||
processed_bills=result.processed_bills + 1,
|
||||
bill_score_rows=result.bill_score_rows + bill_score_rows,
|
||||
aggregate_score_rows=result.aggregate_score_rows + aggregate_score_rows,
|
||||
)
|
||||
session.commit()
|
||||
return result
|
||||
|
||||
|
||||
def select_bill_ids_to_score(
|
||||
session: Session,
|
||||
*,
|
||||
congress: int | None = None,
|
||||
bill_ids: list[int] | None = None,
|
||||
topics: list[str] | None = None,
|
||||
force: bool = False,
|
||||
limit: int | None = None,
|
||||
) -> list[int]:
|
||||
"""Select bills with topic metadata and votes that should be scored."""
|
||||
stmt = (
|
||||
select(Bill.id)
|
||||
.join(BillTopic, BillTopic.bill_id == Bill.id)
|
||||
.join(Vote, Vote.bill_id == Bill.id)
|
||||
.distinct()
|
||||
.order_by(Bill.id)
|
||||
)
|
||||
if not force:
|
||||
stmt = stmt.where(Bill.score_processed_at.is_(None))
|
||||
if congress is not None:
|
||||
stmt = stmt.where(Bill.congress == congress)
|
||||
if bill_ids:
|
||||
stmt = stmt.where(Bill.id.in_(bill_ids))
|
||||
if topics:
|
||||
stmt = stmt.where(BillTopic.topic.in_(topics))
|
||||
if limit is not None:
|
||||
stmt = stmt.limit(limit)
|
||||
return list(session.scalars(stmt))
|
||||
|
||||
|
||||
def score_bill(
|
||||
session: Session,
|
||||
*,
|
||||
bill_id: int,
|
||||
topics: list[str] | None = None,
|
||||
mark_processed: bool = True,
|
||||
) -> tuple[int, int]:
|
||||
"""Score all selected vote records for one bill and refresh aggregates."""
|
||||
prior_keys = _existing_score_keys_for_bill(session, bill_id=bill_id, topics=topics)
|
||||
session.execute(_delete_bill_scores_statement(bill_id=bill_id, topics=topics))
|
||||
session.flush()
|
||||
|
||||
scores = calculate_bill_score_values(
|
||||
_load_bill_vote_score_inputs(session, bill_id=bill_id, topics=topics)
|
||||
)
|
||||
session.add_all(
|
||||
LegislatorBillScore(
|
||||
bill_id=score.bill_id,
|
||||
bill_topic_id=score.bill_topic_id,
|
||||
legislator_id=score.legislator_id,
|
||||
year=score.year,
|
||||
topic=score.topic,
|
||||
score=score.score,
|
||||
)
|
||||
for score in scores
|
||||
)
|
||||
if mark_processed:
|
||||
bill = session.get(Bill, bill_id)
|
||||
if bill is not None:
|
||||
bill.score_processed_at = datetime.now(tz=UTC)
|
||||
session.flush()
|
||||
|
||||
affected_keys = prior_keys | {
|
||||
(score.legislator_id, score.year, score.topic) for score in scores
|
||||
}
|
||||
aggregate_rows = refresh_aggregate_scores(session, affected_keys)
|
||||
return len(scores), aggregate_rows
|
||||
|
||||
|
||||
def refresh_aggregate_scores(session: Session, keys: set[ScoreKey]) -> int:
|
||||
"""Refresh aggregate legislator_score rows from per-bill source scores."""
|
||||
if not keys:
|
||||
return 0
|
||||
|
||||
key_tuple = tuple_(
|
||||
LegislatorScore.legislator_id,
|
||||
LegislatorScore.year,
|
||||
LegislatorScore.topic,
|
||||
)
|
||||
session.execute(delete(LegislatorScore).where(key_tuple.in_(list(keys))))
|
||||
session.flush()
|
||||
|
||||
source_key_tuple = tuple_(
|
||||
LegislatorBillScore.legislator_id,
|
||||
LegislatorBillScore.year,
|
||||
LegislatorBillScore.topic,
|
||||
)
|
||||
rows = session.execute(
|
||||
select(
|
||||
LegislatorBillScore.legislator_id,
|
||||
LegislatorBillScore.year,
|
||||
LegislatorBillScore.topic,
|
||||
func.avg(LegislatorBillScore.score).label("score"),
|
||||
)
|
||||
.where(source_key_tuple.in_(list(keys)))
|
||||
.group_by(
|
||||
LegislatorBillScore.legislator_id,
|
||||
LegislatorBillScore.year,
|
||||
LegislatorBillScore.topic,
|
||||
)
|
||||
).all()
|
||||
session.add_all(
|
||||
LegislatorScore(
|
||||
legislator_id=row.legislator_id,
|
||||
year=row.year,
|
||||
topic=row.topic,
|
||||
score=float(row.score),
|
||||
)
|
||||
for row in rows
|
||||
)
|
||||
session.flush()
|
||||
return len(rows)
|
||||
|
||||
|
||||
def _load_bill_vote_score_inputs(
|
||||
session: Session,
|
||||
*,
|
||||
bill_id: int,
|
||||
topics: list[str] | None,
|
||||
) -> list[VoteScoreInput]:
|
||||
year = extract("year", Vote.vote_date).cast(Integer).label("year")
|
||||
stmt = (
|
||||
select(
|
||||
Vote.bill_id,
|
||||
BillTopic.id.label("bill_topic_id"),
|
||||
VoteRecord.legislator_id,
|
||||
year,
|
||||
BillTopic.topic,
|
||||
BillTopic.support_position,
|
||||
VoteRecord.position,
|
||||
)
|
||||
.join(Vote, Vote.id == VoteRecord.vote_id)
|
||||
.join(BillTopic, BillTopic.bill_id == Vote.bill_id)
|
||||
.where(Vote.bill_id == bill_id)
|
||||
)
|
||||
if topics:
|
||||
stmt = stmt.where(BillTopic.topic.in_(topics))
|
||||
|
||||
return [
|
||||
VoteScoreInput(
|
||||
bill_id=row.bill_id,
|
||||
bill_topic_id=row.bill_topic_id,
|
||||
legislator_id=row.legislator_id,
|
||||
year=int(row.year),
|
||||
topic=row.topic,
|
||||
support_position=row.support_position,
|
||||
vote_position=row.position,
|
||||
)
|
||||
for row in session.execute(stmt)
|
||||
]
|
||||
|
||||
|
||||
def _existing_score_keys_for_bill(
|
||||
session: Session,
|
||||
*,
|
||||
bill_id: int,
|
||||
topics: list[str] | None,
|
||||
) -> set[ScoreKey]:
|
||||
stmt = select(
|
||||
LegislatorBillScore.legislator_id,
|
||||
LegislatorBillScore.year,
|
||||
LegislatorBillScore.topic,
|
||||
).where(LegislatorBillScore.bill_id == bill_id)
|
||||
if topics:
|
||||
stmt = stmt.where(LegislatorBillScore.topic.in_(topics))
|
||||
return {(row.legislator_id, row.year, row.topic) for row in session.execute(stmt)}
|
||||
|
||||
|
||||
def _delete_bill_scores_statement(*, bill_id: int, topics: list[str] | None):
|
||||
stmt = delete(LegislatorBillScore).where(LegislatorBillScore.bill_id == bill_id)
|
||||
if topics:
|
||||
stmt = stmt.where(LegislatorBillScore.topic.in_(topics))
|
||||
return stmt
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""CLI entrypoint."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Calculate legislator_score rows from bill_topic and vote_record data."
|
||||
)
|
||||
parser.add_argument("--congress", type=int, help="Only score bills from one Congress.")
|
||||
parser.add_argument(
|
||||
"--bill-id",
|
||||
action="append",
|
||||
dest="bill_ids",
|
||||
type=int,
|
||||
help="Only score one bill id. Repeat for multiple bills.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--topic",
|
||||
action="append",
|
||||
dest="topics",
|
||||
help="Only calculate one topic. Repeat for multiple topics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Reprocess bills even when bill.score_processed_at is already set.",
|
||||
)
|
||||
parser.add_argument("--limit", type=int, help="Maximum number of bills to process.")
|
||||
args = parser.parse_args()
|
||||
|
||||
engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
|
||||
with Session(engine) as session:
|
||||
result = calculate_and_store_legislator_scores(
|
||||
session,
|
||||
congress=args.congress,
|
||||
bill_ids=args.bill_ids,
|
||||
topics=args.topics,
|
||||
force=args.force,
|
||||
limit=args.limit,
|
||||
)
|
||||
print(
|
||||
"Processed "
|
||||
f"{result.processed_bills} bills; stored {result.bill_score_rows} bill score rows; "
|
||||
f"refreshed {result.aggregate_score_rows} aggregate score rows."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user