created scoring tables and basic logic

This commit is contained in:
2026-04-21 11:44:53 -04:00
parent be4b473a3c
commit 674edafe94
9 changed files with 843 additions and 38 deletions
@@ -1,8 +1,15 @@
"""init."""
"""Congress ORM models."""
from pipelines.orm.data_science_dev.congress.bill import Bill, BillText
from pipelines.orm.data_science_dev.congress.bill import (
Bill,
BillText,
BillTopic,
BillTopicPosition,
)
from pipelines.orm.data_science_dev.congress.legislator import (
LegislatorBillScore,
Legislator,
LegislatorScore,
LegislatorSocialMedia,
)
from pipelines.orm.data_science_dev.congress.vote import Vote, VoteRecord
@@ -10,7 +17,11 @@ from pipelines.orm.data_science_dev.congress.vote import Vote, VoteRecord
__all__ = [
"Bill",
"BillText",
"BillTopic",
"BillTopicPosition",
"Legislator",
"LegislatorBillScore",
"LegislatorScore",
"LegislatorSocialMedia",
"Vote",
"VoteRecord",
+69 -17
View File
@@ -2,22 +2,37 @@
from __future__ import annotations
from datetime import date
from datetime import date, datetime
from enum import StrEnum
from typing import TYPE_CHECKING
from sqlalchemy import ForeignKey, Index, UniqueConstraint
from sqlalchemy import DateTime, Enum, ForeignKey, Index, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
if TYPE_CHECKING:
from pipelines.orm.data_science_dev.congress.legislator import LegislatorBillScore
from pipelines.orm.data_science_dev.congress.vote import Vote
class BillTopicPosition(StrEnum):
"""Whether a yes vote on a bill is for or against a topic."""
FOR = "for"
AGAINST = "against"
class Bill(DataScienceDevTableBase):
"""Legislation with congress number, type, titles, status, and sponsor."""
__tablename__ = "bill"
__table_args__ = (
UniqueConstraint(
"congress", "bill_type", "number", name="uq_bill_congress_type_number"
),
Index("ix_bill_congress", "congress"),
)
congress: Mapped[int]
bill_type: Mapped[str]
@@ -33,6 +48,7 @@ class Bill(DataScienceDevTableBase):
sponsor_bioguide_id: Mapped[str | None]
subjects_top_term: Mapped[str | None]
score_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
votes: Mapped[list[Vote]] = relationship(
"Vote",
@@ -43,12 +59,15 @@ class Bill(DataScienceDevTableBase):
back_populates="bill",
cascade="all, delete-orphan",
)
__table_args__ = (
UniqueConstraint(
"congress", "bill_type", "number", name="uq_bill_congress_type_number"
),
Index("ix_bill_congress", "congress"),
topics: Mapped[list[BillTopic]] = relationship(
"BillTopic",
back_populates="bill",
cascade="all, delete-orphan",
)
legislator_bill_scores: Mapped[list[LegislatorBillScore]] = relationship(
"LegislatorBillScore",
back_populates="bill",
cascade="all, delete-orphan",
)
@@ -56,17 +75,50 @@ class BillText(DataScienceDevTableBase):
"""Stores different text versions of a bill (introduced, enrolled, etc.)."""
__tablename__ = "bill_text"
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
version_code: Mapped[str]
version_name: Mapped[str | None]
text_content: Mapped[str | None]
date: Mapped[date | None]
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
__table_args__ = (
UniqueConstraint(
"bill_id", "version_code", name="uq_bill_text_bill_id_version_code"
),
)
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
version_code: Mapped[str]
version_name: Mapped[str | None]
text_content: Mapped[str | None]
summary: Mapped[str | None]
date: Mapped[date | None]
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
class BillTopic(DataScienceDevTableBase):
"""One bill stance on one topic used to score roll-call votes."""
__tablename__ = "bill_topic"
__table_args__ = (
UniqueConstraint(
"bill_id",
"topic",
"support_position",
name="uq_bill_topic_bill_id_topic_support_position",
),
Index("ix_bill_topic_topic", "topic"),
)
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
topic: Mapped[str]
support_position: Mapped[BillTopicPosition] = mapped_column(
Enum(
BillTopicPosition,
values_callable=lambda enum_cls: [member.value for member in enum_cls],
native_enum=False,
name="bill_topic_position",
)
)
bill: Mapped[Bill] = relationship("Bill", back_populates="topics")
legislator_bill_scores: Mapped[list[LegislatorBillScore]] = relationship(
"LegislatorBillScore",
back_populates="bill_topic",
cascade="all, delete-orphan",
)
@@ -5,12 +5,13 @@ from __future__ import annotations
from datetime import date
from typing import TYPE_CHECKING
from sqlalchemy import ForeignKey, Text
from sqlalchemy import ForeignKey, Index, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
if TYPE_CHECKING:
from pipelines.orm.data_science_dev.congress.bill import Bill, BillTopic
from pipelines.orm.data_science_dev.congress.vote import VoteRecord
@@ -50,6 +51,16 @@ class Legislator(DataScienceDevTableBase):
back_populates="legislator",
cascade="all, delete-orphan",
)
scores: Mapped[list[LegislatorScore]] = relationship(
"LegislatorScore",
back_populates="legislator",
cascade="all, delete-orphan",
)
bill_scores: Mapped[list[LegislatorBillScore]] = relationship(
"LegislatorBillScore",
back_populates="legislator",
cascade="all, delete-orphan",
)
class LegislatorSocialMedia(DataScienceDevTableBase):
@@ -66,3 +77,65 @@ class LegislatorSocialMedia(DataScienceDevTableBase):
legislator: Mapped[Legislator] = relationship(
back_populates="social_media_accounts"
)
class LegislatorScore(DataScienceDevTableBase):
"""Computed topic score for a legislator in one calendar year."""
__tablename__ = "legislator_score"
__table_args__ = (
UniqueConstraint(
"legislator_id",
"year",
"topic",
name="uq_legislator_score_legislator_id_year_topic",
),
Index("ix_legislator_score_year_topic", "year", "topic"),
)
legislator_id: Mapped[int] = mapped_column(
ForeignKey("main.legislator.id", ondelete="CASCADE"),
index=True,
)
year: Mapped[int]
topic: Mapped[str]
score: Mapped[float]
legislator: Mapped[Legislator] = relationship(back_populates="scores")
class LegislatorBillScore(DataScienceDevTableBase):
"""Per-bill source score used to maintain aggregate legislator scores."""
__tablename__ = "legislator_bill_score"
__table_args__ = (
UniqueConstraint(
"bill_topic_id",
"legislator_id",
"year",
name="uq_legislator_bill_score_bill_topic_id_legislator_id_year",
),
Index("ix_legislator_bill_score_year_topic", "year", "topic"),
)
bill_id: Mapped[int] = mapped_column(
ForeignKey("main.bill.id", ondelete="CASCADE"),
index=True,
)
bill_topic_id: Mapped[int] = mapped_column(
ForeignKey("main.bill_topic.id", ondelete="CASCADE"),
index=True,
)
legislator_id: Mapped[int] = mapped_column(
ForeignKey("main.legislator.id", ondelete="CASCADE"),
index=True,
)
year: Mapped[int]
topic: Mapped[str]
score: Mapped[float]
bill: Mapped[Bill] = relationship(back_populates="legislator_bill_scores")
bill_topic: Mapped[BillTopic] = relationship(back_populates="legislator_bill_scores")
legislator: Mapped[Legislator] = relationship(back_populates="bill_scores")
+11 -11
View File
@@ -44,6 +44,17 @@ class Vote(DataScienceDevTableBase):
"""Roll call votes with counts and optional bill linkage."""
__tablename__ = "vote"
__table_args__ = (
UniqueConstraint(
"congress",
"chamber",
"session",
"number",
name="uq_vote_congress_chamber_session_number",
),
Index("ix_vote_date", "vote_date"),
Index("ix_vote_congress_chamber", "congress", "chamber"),
)
congress: Mapped[int]
chamber: Mapped[str]
@@ -71,14 +82,3 @@ class Vote(DataScienceDevTableBase):
cascade="all, delete-orphan",
)
__table_args__ = (
UniqueConstraint(
"congress",
"chamber",
"session",
"number",
name="uq_vote_congress_chamber_session_number",
),
Index("ix_vote_date", "vote_date"),
Index("ix_vote_congress_chamber", "congress", "chamber"),
)
+15 -1
View File
@@ -2,14 +2,28 @@
from __future__ import annotations
from pipelines.orm.data_science_dev.congress import Bill, BillText, Legislator, Vote, VoteRecord
from pipelines.orm.data_science_dev.congress import (
Bill,
BillText,
BillTopic,
BillTopicPosition,
Legislator,
LegislatorBillScore,
LegislatorScore,
Vote,
VoteRecord,
)
from pipelines.orm.data_science_dev.posts import partitions # noqa: F401 — registers partition classes in metadata
from pipelines.orm.data_science_dev.posts.tables import Posts
__all__ = [
"Bill",
"BillText",
"BillTopic",
"BillTopicPosition",
"Legislator",
"LegislatorBillScore",
"LegislatorScore",
"Posts",
"Vote",
"VoteRecord",