From ca62cc36a770bf99d41b21eb1f248674ba2833c9 Mon Sep 17 00:00:00 2001 From: Richie Cahill Date: Fri, 27 Mar 2026 10:49:17 -0400 Subject: [PATCH] adding congress data to new DS DB --- ...03_27-adding_congress_data_83bfc8af92d8.py | 153 ++++++++++++++++++ .../orm/data_science_dev/congress/__init__.py | 13 ++ python/orm/data_science_dev/congress/bill.py | 66 ++++++++ .../data_science_dev/congress/legislator.py | 47 ++++++ python/orm/data_science_dev/congress/vote.py | 79 +++++++++ python/orm/data_science_dev/models.py | 6 + 6 files changed, 364 insertions(+) create mode 100644 python/alembic/data_science_dev/versions/2026_03_27-adding_congress_data_83bfc8af92d8.py create mode 100644 python/orm/data_science_dev/congress/__init__.py create mode 100644 python/orm/data_science_dev/congress/bill.py create mode 100644 python/orm/data_science_dev/congress/legislator.py create mode 100644 python/orm/data_science_dev/congress/vote.py diff --git a/python/alembic/data_science_dev/versions/2026_03_27-adding_congress_data_83bfc8af92d8.py b/python/alembic/data_science_dev/versions/2026_03_27-adding_congress_data_83bfc8af92d8.py new file mode 100644 index 0000000..79295c5 --- /dev/null +++ b/python/alembic/data_science_dev/versions/2026_03_27-adding_congress_data_83bfc8af92d8.py @@ -0,0 +1,153 @@ +"""adding congress data. + +Revision ID: 83bfc8af92d8 +Revises: a1b2c3d4e5f6 +Create Date: 2026-03-27 10:43:02.324510 + +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import sqlalchemy as sa +from alembic import op + +from python.orm import DataScienceDevBase + +if TYPE_CHECKING: + from collections.abc import Sequence + +# revision identifiers, used by Alembic. +revision: str = "83bfc8af92d8" +down_revision: str | None = "a1b2c3d4e5f6" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + +schema = DataScienceDevBase.schema_name + + +def upgrade() -> None: + """Upgrade.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "bill", + sa.Column("congress", sa.Integer(), nullable=False), + sa.Column("bill_type", sa.String(), nullable=False), + sa.Column("number", sa.Integer(), nullable=False), + sa.Column("title", sa.String(), nullable=True), + sa.Column("title_short", sa.String(), nullable=True), + sa.Column("official_title", sa.String(), nullable=True), + sa.Column("status", sa.String(), nullable=True), + sa.Column("status_at", sa.Date(), nullable=True), + sa.Column("sponsor_bioguide_id", sa.String(), nullable=True), + sa.Column("subjects_top_term", sa.String(), nullable=True), + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_bill")), + sa.UniqueConstraint("congress", "bill_type", "number", name="uq_bill_congress_type_number"), + schema=schema, + ) + op.create_index("ix_bill_congress", "bill", ["congress"], unique=False, schema=schema) + op.create_table( + "legislator", + sa.Column("bioguide_id", sa.Text(), nullable=False), + sa.Column("thomas_id", sa.String(), nullable=True), + sa.Column("lis_id", sa.String(), nullable=True), + sa.Column("govtrack_id", sa.Integer(), nullable=True), + sa.Column("opensecrets_id", sa.String(), nullable=True), + sa.Column("fec_ids", sa.String(), nullable=True), + sa.Column("first_name", sa.String(), nullable=False), + sa.Column("last_name", sa.String(), nullable=False), + sa.Column("official_full_name", sa.String(), nullable=True), + sa.Column("nickname", sa.String(), nullable=True), + sa.Column("birthday", sa.Date(), nullable=True), + sa.Column("gender", sa.String(), nullable=True), + sa.Column("current_party", sa.String(), nullable=True), + sa.Column("current_state", sa.String(), nullable=True), + sa.Column("current_district", sa.Integer(), nullable=True), + sa.Column("current_chamber", sa.String(), nullable=True), + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("pk_legislator")), + schema=schema, + ) + op.create_index(op.f("ix_legislator_bioguide_id"), "legislator", ["bioguide_id"], unique=True, schema=schema) + op.create_table( + "bill_text", + sa.Column("bill_id", sa.Integer(), nullable=False), + sa.Column("version_code", sa.String(), nullable=False), + sa.Column("version_name", sa.String(), nullable=True), + sa.Column("text_content", sa.String(), nullable=True), + sa.Column("date", sa.Date(), nullable=True), + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.ForeignKeyConstraint( + ["bill_id"], [f"{schema}.bill.id"], name=op.f("fk_bill_text_bill_id_bill"), ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id", name=op.f("pk_bill_text")), + sa.UniqueConstraint("bill_id", "version_code", name="uq_bill_text_bill_id_version_code"), + schema=schema, + ) + op.create_table( + "vote", + sa.Column("congress", sa.Integer(), nullable=False), + sa.Column("chamber", sa.String(), nullable=False), + sa.Column("session", sa.Integer(), nullable=False), + sa.Column("number", sa.Integer(), nullable=False), + sa.Column("vote_type", sa.String(), nullable=True), + sa.Column("question", sa.String(), nullable=True), + sa.Column("result", sa.String(), nullable=True), + sa.Column("result_text", sa.String(), nullable=True), + sa.Column("vote_date", sa.Date(), nullable=False), + sa.Column("yea_count", sa.Integer(), nullable=True), + sa.Column("nay_count", sa.Integer(), nullable=True), + sa.Column("not_voting_count", sa.Integer(), nullable=True), + sa.Column("present_count", sa.Integer(), nullable=True), + sa.Column("bill_id", sa.Integer(), nullable=True), + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False), + sa.ForeignKeyConstraint(["bill_id"], [f"{schema}.bill.id"], name=op.f("fk_vote_bill_id_bill")), + sa.PrimaryKeyConstraint("id", name=op.f("pk_vote")), + sa.UniqueConstraint("congress", "chamber", "session", "number", name="uq_vote_congress_chamber_session_number"), + schema=schema, + ) + op.create_index("ix_vote_congress_chamber", "vote", ["congress", "chamber"], unique=False, schema=schema) + op.create_index("ix_vote_date", "vote", ["vote_date"], unique=False, schema=schema) + op.create_table( + "vote_record", + sa.Column("vote_id", sa.Integer(), nullable=False), + sa.Column("legislator_id", sa.Integer(), nullable=False), + sa.Column("position", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["legislator_id"], + [f"{schema}.legislator.id"], + name=op.f("fk_vote_record_legislator_id_legislator"), + ondelete="CASCADE", + ), + sa.ForeignKeyConstraint( + ["vote_id"], [f"{schema}.vote.id"], name=op.f("fk_vote_record_vote_id_vote"), ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("vote_id", "legislator_id", name=op.f("pk_vote_record")), + schema=schema, + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + """Downgrade.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table("vote_record", schema=schema) + op.drop_index("ix_vote_date", table_name="vote", schema=schema) + op.drop_index("ix_vote_congress_chamber", table_name="vote", schema=schema) + op.drop_table("vote", schema=schema) + op.drop_table("bill_text", schema=schema) + op.drop_index(op.f("ix_legislator_bioguide_id"), table_name="legislator", schema=schema) + op.drop_table("legislator", schema=schema) + op.drop_index("ix_bill_congress", table_name="bill", schema=schema) + op.drop_table("bill", schema=schema) + # ### end Alembic commands ### diff --git a/python/orm/data_science_dev/congress/__init__.py b/python/orm/data_science_dev/congress/__init__.py new file mode 100644 index 0000000..44cb304 --- /dev/null +++ b/python/orm/data_science_dev/congress/__init__.py @@ -0,0 +1,13 @@ +"""init.""" + +from python.orm.data_science_dev.congress.bill import Bill, BillText +from python.orm.data_science_dev.congress.legislator import Legislator +from python.orm.data_science_dev.congress.vote import Vote, VoteRecord + +__all__ = [ + "Bill", + "BillText", + "Legislator", + "Vote", + "VoteRecord", +] diff --git a/python/orm/data_science_dev/congress/bill.py b/python/orm/data_science_dev/congress/bill.py new file mode 100644 index 0000000..9cbc0bf --- /dev/null +++ b/python/orm/data_science_dev/congress/bill.py @@ -0,0 +1,66 @@ +"""Bill model - legislation introduced in Congress.""" + +from __future__ import annotations + +from datetime import date +from typing import TYPE_CHECKING + +from sqlalchemy import ForeignKey, Index, UniqueConstraint +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from python.orm.data_science_dev.base import DataScienceDevTableBase + +if TYPE_CHECKING: + from python.orm.data_science_dev.congress.vote import Vote + + +class Bill(DataScienceDevTableBase): + """Legislation with congress number, type, titles, status, and sponsor.""" + + __tablename__ = "bill" + + congress: Mapped[int] + bill_type: Mapped[str] + number: Mapped[int] + + title: Mapped[str | None] + title_short: Mapped[str | None] + official_title: Mapped[str | None] + + status: Mapped[str | None] + status_at: Mapped[date | None] + + sponsor_bioguide_id: Mapped[str | None] + + subjects_top_term: Mapped[str | None] + + votes: Mapped[list[Vote]] = relationship( + "Vote", + back_populates="bill", + ) + bill_texts: Mapped[list[BillText]] = relationship( + "BillText", + back_populates="bill", + cascade="all, delete-orphan", + ) + + __table_args__ = ( + UniqueConstraint("congress", "bill_type", "number", name="uq_bill_congress_type_number"), + Index("ix_bill_congress", "congress"), + ) + + +class BillText(DataScienceDevTableBase): + """Stores different text versions of a bill (introduced, enrolled, etc.).""" + + __tablename__ = "bill_text" + + bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE")) + version_code: Mapped[str] + version_name: Mapped[str | None] + text_content: Mapped[str | None] + date: Mapped[date | None] + + bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts") + + __table_args__ = (UniqueConstraint("bill_id", "version_code", name="uq_bill_text_bill_id_version_code"),) diff --git a/python/orm/data_science_dev/congress/legislator.py b/python/orm/data_science_dev/congress/legislator.py new file mode 100644 index 0000000..f750451 --- /dev/null +++ b/python/orm/data_science_dev/congress/legislator.py @@ -0,0 +1,47 @@ +"""Legislator model - members of Congress.""" + +from __future__ import annotations + +from datetime import date +from typing import TYPE_CHECKING + +from sqlalchemy import Text +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from python.orm.data_science_dev.base import DataScienceDevTableBase + +if TYPE_CHECKING: + from python.orm.data_science_dev.congress.vote import VoteRecord + + +class Legislator(DataScienceDevTableBase): + """Members of Congress with identification and current term info.""" + + __tablename__ = "legislator" + + bioguide_id: Mapped[str] = mapped_column(Text, unique=True, index=True) + + thomas_id: Mapped[str | None] + lis_id: Mapped[str | None] + govtrack_id: Mapped[int | None] + opensecrets_id: Mapped[str | None] + fec_ids: Mapped[str | None] + + first_name: Mapped[str] + last_name: Mapped[str] + official_full_name: Mapped[str | None] + nickname: Mapped[str | None] + + birthday: Mapped[date | None] + gender: Mapped[str | None] + + current_party: Mapped[str | None] + current_state: Mapped[str | None] + current_district: Mapped[int | None] + current_chamber: Mapped[str | None] + + vote_records: Mapped[list[VoteRecord]] = relationship( + "VoteRecord", + back_populates="legislator", + cascade="all, delete-orphan", + ) diff --git a/python/orm/data_science_dev/congress/vote.py b/python/orm/data_science_dev/congress/vote.py new file mode 100644 index 0000000..5495f7a --- /dev/null +++ b/python/orm/data_science_dev/congress/vote.py @@ -0,0 +1,79 @@ +"""Vote model - roll call votes in Congress.""" + +from __future__ import annotations + +from datetime import date +from typing import TYPE_CHECKING + +from sqlalchemy import ForeignKey, Index, UniqueConstraint +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from python.orm.data_science_dev.base import DataScienceDevBase, DataScienceDevTableBase + +if TYPE_CHECKING: + from python.orm.data_science_dev.congress.bill import Bill + from python.orm.data_science_dev.congress.legislator import Legislator + from python.orm.data_science_dev.congress.vote import Vote + + +class VoteRecord(DataScienceDevBase): + """Links a vote to a legislator with their position (Yea, Nay, etc.).""" + + __tablename__ = "vote_record" + + vote_id: Mapped[int] = mapped_column( + ForeignKey("main.vote.id", ondelete="CASCADE"), + primary_key=True, + ) + legislator_id: Mapped[int] = mapped_column( + ForeignKey("main.legislator.id", ondelete="CASCADE"), + primary_key=True, + ) + position: Mapped[str] + + vote: Mapped[Vote] = relationship("Vote", back_populates="vote_records") + legislator: Mapped[Legislator] = relationship("Legislator", back_populates="vote_records") + + +class Vote(DataScienceDevTableBase): + """Roll call votes with counts and optional bill linkage.""" + + __tablename__ = "vote" + + congress: Mapped[int] + chamber: Mapped[str] + session: Mapped[int] + number: Mapped[int] + + vote_type: Mapped[str | None] + question: Mapped[str | None] + result: Mapped[str | None] + result_text: Mapped[str | None] + + vote_date: Mapped[date] + + yea_count: Mapped[int | None] + nay_count: Mapped[int | None] + not_voting_count: Mapped[int | None] + present_count: Mapped[int | None] + + bill_id: Mapped[int | None] = mapped_column(ForeignKey("main.bill.id")) + + bill: Mapped[Bill | None] = relationship("Bill", back_populates="votes") + vote_records: Mapped[list[VoteRecord]] = relationship( + "VoteRecord", + back_populates="vote", + cascade="all, delete-orphan", + ) + + __table_args__ = ( + UniqueConstraint( + "congress", + "chamber", + "session", + "number", + name="uq_vote_congress_chamber_session_number", + ), + Index("ix_vote_date", "vote_date"), + Index("ix_vote_congress_chamber", "congress", "chamber"), + ) diff --git a/python/orm/data_science_dev/models.py b/python/orm/data_science_dev/models.py index 29896cd..6cbad45 100644 --- a/python/orm/data_science_dev/models.py +++ b/python/orm/data_science_dev/models.py @@ -2,9 +2,15 @@ from __future__ import annotations +from python.orm.data_science_dev.congress import Bill, BillText, Legislator, Vote, VoteRecord from python.orm.data_science_dev.posts import partitions # noqa: F401 — registers partition classes in metadata from python.orm.data_science_dev.posts.tables import Posts __all__ = [ + "Bill", + "BillText", + "Legislator", "Posts", + "Vote", + "VoteRecord", ]