updated dependencies

removed LegislatorBillScore
converted default_config_path and get_config_dir
2026-04-21 22:57:54 -04:00 · 2026-04-21 22:51:29 -04:00 · 2026-04-21 22:51:10 -04:00 · 2026-04-21 22:48:13 -04:00 · 2026-04-21 22:44:52 -04:00 · 2026-04-21 21:42:13 -04:00
54 changed files with 162 additions and 10261 deletions
@@ -110,7 +110,7 @@ ipython_config.py
 # pdm
 #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
-#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-pipelines.
 #   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
 # pdm.lock
 # pdm.toml
@@ -1,98 +0,0 @@
 """adding LegislatorScoreFake.
 Revision ID: 06f833813bd7
 Revises: b63ed11d6775
 Create Date: 2026-04-22 18:41:07.484609
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 import sqlalchemy as sa
 from alembic import op
 from pipelines.orm import DataScienceDevBase
 if TYPE_CHECKING:
    from collections.abc import Sequence
 # revision identifiers, used by Alembic.
 revision: str = "06f833813bd7"
 down_revision: str | None = "b63ed11d6775"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 schema = DataScienceDevBase.schema_name
 def upgrade() -> None:
    """Upgrade."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table(
        "legislator_score_fake",
        sa.Column("legislator_id", sa.Integer(), nullable=False),
        sa.Column("year", sa.Integer(), nullable=False),
        sa.Column("topic", sa.String(), nullable=False),
        sa.Column("score", sa.Float(), nullable=False),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["legislator_id"],
            [f"{schema}.legislator.id"],
            name=op.f("fk_legislator_score_fake_legislator_id_legislator"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_legislator_score_fake")),
        sa.UniqueConstraint(
            "legislator_id",
            "year",
            "topic",
            name="uq_legislator_score_fake_legislator_id_year_topic",
        ),
        schema=schema,
    )
    op.create_index(
        op.f("ix_legislator_score_fake_legislator_id"),
        "legislator_score_fake",
        ["legislator_id"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_legislator_score_fake_year_topic",
        "legislator_score_fake",
        ["year", "topic"],
        unique=False,
        schema=schema,
    )
    # ### end Alembic commands ###
 def downgrade() -> None:
    """Downgrade."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_index(
        "ix_legislator_score_fake_year_topic",
        table_name="legislator_score_fake",
        schema=schema,
    )
    op.drop_index(
        op.f("ix_legislator_score_fake_legislator_id"),
        table_name="legislator_score_fake",
        schema=schema,
    )
    op.drop_table("legislator_score_fake", schema=schema)
    # ### end Alembic commands ###
@@ -1,64 +0,0 @@
 """add vote.bill_text_id linkage.
 Revision ID: 9c7d4a2e1b10
 Revises: 06f833813bd7
 Create Date: 2026-04-23 00:00:00.000000
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 import sqlalchemy as sa
 from alembic import op
 from pipelines.orm import DataScienceDevBase
 if TYPE_CHECKING:
    from collections.abc import Sequence
 # revision identifiers, used by Alembic.
 revision: str = "9c7d4a2e1b10"
 down_revision: str | None = "06f833813bd7"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 schema = DataScienceDevBase.schema_name
 def upgrade() -> None:
    """Upgrade."""
    op.add_column(
        "vote",
        sa.Column("bill_text_id", sa.Integer(), nullable=True),
        schema=schema,
    )
    op.create_index(
        "ix_vote_bill_text_id",
        "vote",
        ["bill_text_id"],
        unique=False,
        schema=schema,
    )
    op.create_foreign_key(
        "fk_vote_bill_text_id_bill_text",
        "vote",
        "bill_text",
        ["bill_text_id"],
        ["id"],
        source_schema=schema,
        referent_schema=schema,
    )
 def downgrade() -> None:
    """Downgrade."""
    op.drop_constraint(
        "fk_vote_bill_text_id_bill_text",
        "vote",
        schema=schema,
        type_="foreignkey",
    )
    op.drop_index("ix_vote_bill_text_id", table_name="vote", schema=schema)
    op.drop_column("vote", "bill_text_id", schema=schema)
@@ -1,844 +0,0 @@
 """canonical vote context v3.
 Revision ID: 1f8c0e7a9d21
 Revises: 9c7d4a2e1b10
 Create Date: 2026-04-25 00:00:00.000000
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 import sqlalchemy as sa
 from sqlalchemy.dialects import postgresql
 from alembic import op
 from pipelines.orm import DataScienceDevBase
 if TYPE_CHECKING:
    from collections.abc import Sequence
 revision: str = "1f8c0e7a9d21"
 down_revision: str | None = "9c7d4a2e1b10"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 schema = DataScienceDevBase.schema_name
 def upgrade() -> None:
    """Upgrade."""
    op.create_table(
        "ingest_run",
        sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("git_sha", sa.String(), nullable=True),
        sa.Column("classifier_version", sa.String(), nullable=True),
        sa.Column("source_snapshot_label", sa.String(), nullable=True),
        sa.Column("status", sa.String(), nullable=False),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_ingest_run")),
        schema=schema,
    )
    op.create_table(
        "source_artifact",
        sa.Column("source_kind", sa.String(), nullable=False),
        sa.Column("congress", sa.Integer(), nullable=False),
        sa.Column("chamber", sa.String(), nullable=True),
        sa.Column("local_path", sa.String(), nullable=False),
        sa.Column("source_url", sa.String(), nullable=True),
        sa.Column("sha256", sa.String(), nullable=False),
        sa.Column("byte_size", sa.Integer(), nullable=False),
        sa.Column("modified_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("ingest_run_id", sa.Integer(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["ingest_run_id"],
            [f"{schema}.ingest_run.id"],
            name=op.f("fk_source_artifact_ingest_run_id_ingest_run"),
            ondelete="SET NULL",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_source_artifact")),
        schema=schema,
    )
    op.create_index(
        "ix_source_artifact_source_kind",
        "source_artifact",
        ["source_kind"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_source_artifact_congress",
        "source_artifact",
        ["congress"],
        unique=False,
        schema=schema,
    )
    op.create_table(
        "score_run",
        sa.Column("ingest_run_id", sa.Integer(), nullable=True),
        sa.Column("classifier_version", sa.String(), nullable=True),
        sa.Column("scoring_version", sa.String(), nullable=True),
        sa.Column("included_vote_count", sa.Integer(), nullable=False),
        sa.Column("excluded_vote_count", sa.Integer(), nullable=False),
        sa.Column("started_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("completed_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["ingest_run_id"],
            [f"{schema}.ingest_run.id"],
            name=op.f("fk_score_run_ingest_run_id_ingest_run"),
            ondelete="SET NULL",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_score_run")),
        schema=schema,
    )
    op.add_column(
        "legislator_score",
        sa.Column("score_run_id", sa.Integer(), nullable=True),
        schema=schema,
    )
    op.create_index(
        op.f("ix_legislator_score_score_run_id"),
        "legislator_score",
        ["score_run_id"],
        unique=False,
        schema=schema,
    )
    op.create_foreign_key(
        op.f("fk_legislator_score_score_run_id_score_run"),
        "legislator_score",
        "score_run",
        ["score_run_id"],
        ["id"],
        source_schema=schema,
        referent_schema=schema,
        ondelete="CASCADE",
    )
    op.add_column(
        "bill_text",
        sa.Column("source_datetime_raw", sa.String(), nullable=True),
        schema=schema,
    )
    op.add_column(
        "bill_text", sa.Column("text_url_xml", sa.String(), nullable=True), schema=schema
    )
    op.add_column(
        "bill_text", sa.Column("text_url_pdf", sa.String(), nullable=True), schema=schema
    )
    op.add_column(
        "bill_text",
        sa.Column("text_url_html", sa.String(), nullable=True),
        schema=schema,
    )
    op.add_column(
        "bill_text",
        sa.Column("source_artifact_id", sa.Integer(), nullable=True),
        schema=schema,
    )
    op.create_foreign_key(
        op.f("fk_bill_text_source_artifact_id_source_artifact"),
        "bill_text",
        "source_artifact",
        ["source_artifact_id"],
        ["id"],
        source_schema=schema,
        referent_schema=schema,
        ondelete="SET NULL",
    )
    op.create_table(
        "bill_action",
        sa.Column("bill_id", sa.Integer(), nullable=False),
        sa.Column("sequence", sa.Integer(), nullable=False),
        sa.Column("action_date", sa.Date(), nullable=False),
        sa.Column("action_time", sa.String(), nullable=True),
        sa.Column("action_text", sa.String(), nullable=False),
        sa.Column("action_type", sa.String(), nullable=True),
        sa.Column("action_code", sa.String(), nullable=True),
        sa.Column("source_system_code", sa.String(), nullable=True),
        sa.Column("source_system_name", sa.String(), nullable=True),
        sa.Column("source_artifact_id", sa.Integer(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["bill_id"],
            [f"{schema}.bill.id"],
            name=op.f("fk_bill_action_bill_id_bill"),
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["source_artifact_id"],
            [f"{schema}.source_artifact.id"],
            name=op.f("fk_bill_action_source_artifact_id_source_artifact"),
            ondelete="SET NULL",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_bill_action")),
        sa.UniqueConstraint("bill_id", "sequence", name="uq_bill_action_bill_id_sequence"),
        schema=schema,
    )
    op.create_table(
        "bill_action_recorded_vote",
        sa.Column("bill_action_id", sa.Integer(), nullable=False),
        sa.Column("congress", sa.Integer(), nullable=False),
        sa.Column("chamber", sa.String(), nullable=False),
        sa.Column("session_number", sa.Integer(), nullable=False),
        sa.Column("roll_number", sa.Integer(), nullable=False),
        sa.Column("vote_datetime", sa.DateTime(timezone=True), nullable=True),
        sa.Column("vote_url", sa.String(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["bill_action_id"],
            [f"{schema}.bill_action.id"],
            name=op.f("fk_bill_action_recorded_vote_bill_action_id_bill_action"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_bill_action_recorded_vote")),
        sa.UniqueConstraint(
            "bill_action_id",
            "congress",
            "chamber",
            "session_number",
            "roll_number",
            name="uq_bill_action_recorded_vote_match_key",
        ),
        schema=schema,
    )
    op.create_table(
        "bill_relation",
        sa.Column("bill_id", sa.Integer(), nullable=False),
        sa.Column("related_bill_id", sa.Integer(), nullable=False),
        sa.Column("relationship_type", sa.String(), nullable=False),
        sa.Column("identified_by", sa.String(), nullable=True),
        sa.Column("latest_action_date", sa.Date(), nullable=True),
        sa.Column("latest_action_text", sa.String(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["bill_id"],
            [f"{schema}.bill.id"],
            name=op.f("fk_bill_relation_bill_id_bill"),
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["related_bill_id"],
            [f"{schema}.bill.id"],
            name=op.f("fk_bill_relation_related_bill_id_bill"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_bill_relation")),
        schema=schema,
    )
    op.create_index(
        "ix_bill_relation_bill_id",
        "bill_relation",
        ["bill_id"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_bill_relation_related_bill_id",
        "bill_relation",
        ["related_bill_id"],
        unique=False,
        schema=schema,
    )
    op.create_table(
        "amendment",
        sa.Column("congress", sa.Integer(), nullable=False),
        sa.Column("amendment_type", sa.String(), nullable=False),
        sa.Column("number", sa.Integer(), nullable=False),
        sa.Column("chamber", sa.String(), nullable=False),
        sa.Column("description", sa.String(), nullable=True),
        sa.Column("purpose", sa.String(), nullable=True),
        sa.Column("amended_bill_id", sa.Integer(), nullable=True),
        sa.Column("amended_amendment_id", sa.Integer(), nullable=True),
        sa.Column("source_path", sa.String(), nullable=True),
        sa.Column("source_artifact_id", sa.Integer(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["amended_amendment_id"],
            [f"{schema}.amendment.id"],
            name=op.f("fk_amendment_amended_amendment_id_amendment"),
            ondelete="SET NULL",
        ),
        sa.ForeignKeyConstraint(
            ["amended_bill_id"],
            [f"{schema}.bill.id"],
            name=op.f("fk_amendment_amended_bill_id_bill"),
            ondelete="SET NULL",
        ),
        sa.ForeignKeyConstraint(
            ["source_artifact_id"],
            [f"{schema}.source_artifact.id"],
            name=op.f("fk_amendment_source_artifact_id_source_artifact"),
            ondelete="SET NULL",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_amendment")),
        sa.UniqueConstraint(
            "congress",
            "amendment_type",
            "number",
            name="uq_amendment_congress_type_number",
        ),
        schema=schema,
    )
    op.create_table(
        "amendment_action",
        sa.Column("amendment_id", sa.Integer(), nullable=False),
        sa.Column("sequence", sa.Integer(), nullable=False),
        sa.Column("action_date", sa.Date(), nullable=False),
        sa.Column("action_time", sa.String(), nullable=True),
        sa.Column("action_text", sa.String(), nullable=False),
        sa.Column("action_type", sa.String(), nullable=True),
        sa.Column("action_code", sa.String(), nullable=True),
        sa.Column("source_system_code", sa.String(), nullable=True),
        sa.Column("source_system_name", sa.String(), nullable=True),
        sa.Column("source_artifact_id", sa.Integer(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["amendment_id"],
            [f"{schema}.amendment.id"],
            name=op.f("fk_amendment_action_amendment_id_amendment"),
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["source_artifact_id"],
            [f"{schema}.source_artifact.id"],
            name=op.f("fk_amendment_action_source_artifact_id_source_artifact"),
            ondelete="SET NULL",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_amendment_action")),
        sa.UniqueConstraint(
            "amendment_id",
            "sequence",
            name="uq_amendment_action_amendment_id_sequence",
        ),
        schema=schema,
    )
    op.create_table(
        "amendment_action_recorded_vote",
        sa.Column("amendment_action_id", sa.Integer(), nullable=False),
        sa.Column("congress", sa.Integer(), nullable=False),
        sa.Column("chamber", sa.String(), nullable=False),
        sa.Column("session_number", sa.Integer(), nullable=False),
        sa.Column("roll_number", sa.Integer(), nullable=False),
        sa.Column("vote_datetime", sa.DateTime(timezone=True), nullable=True),
        sa.Column("vote_url", sa.String(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["amendment_action_id"],
            [f"{schema}.amendment_action.id"],
            name=op.f(
                "fk_amendment_action_recorded_vote_amendment_action_id_amendment_action"
            ),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_amendment_action_recorded_vote")),
        sa.UniqueConstraint(
            "amendment_action_id",
            "congress",
            "chamber",
            "session_number",
            "roll_number",
            name="uq_amendment_action_recorded_vote_match_key",
        ),
        schema=schema,
    )
    op.drop_constraint(
        "uq_vote_congress_chamber_session_number",
        "vote",
        schema=schema,
        type_="unique",
    )
    op.alter_column("vote", "session", new_column_name="session_year", schema=schema)
    op.alter_column("vote", "number", new_column_name="roll_number", schema=schema)
    op.add_column("vote", sa.Column("session_number", sa.Integer(), nullable=True), schema=schema)
    op.add_column(
        "vote",
        sa.Column("vote_datetime", sa.DateTime(timezone=True), nullable=True),
        schema=schema,
    )
    op.add_column(
        "vote", sa.Column("raw_vote_source_url", sa.String(), nullable=True), schema=schema
    )
    op.add_column(
        "vote",
        sa.Column("raw_bill_ref", postgresql.JSONB(astext_type=sa.Text()), nullable=True),
        schema=schema,
    )
    op.add_column(
        "vote",
        sa.Column(
            "raw_amendment_ref",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
        schema=schema,
    )
    op.add_column(
        "vote",
        sa.Column(
            "raw_nomination_ref",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
        schema=schema,
    )
    op.add_column(
        "vote",
        sa.Column(
            "raw_treaty_ref",
            postgresql.JSONB(astext_type=sa.Text()),
            nullable=True,
        ),
        schema=schema,
    )
    op.add_column(
        "vote",
        sa.Column("raw_vote_source_artifact_id", sa.Integer(), nullable=True),
        schema=schema,
    )
    op.create_foreign_key(
        op.f("fk_vote_raw_vote_source_artifact_id_source_artifact"),
        "vote",
        "source_artifact",
        ["raw_vote_source_artifact_id"],
        ["id"],
        source_schema=schema,
        referent_schema=schema,
        ondelete="SET NULL",
    )
    op.execute(
        sa.text(
            f"""
            UPDATE {schema}.vote
            SET session_number = session_year - (((congress - 1) * 2) + 1789) + 1
            """
        )
    )
    op.alter_column("vote", "session_number", nullable=False, schema=schema)
    op.create_unique_constraint(
        "uq_vote_congress_chamber_session_number_roll_number",
        "vote",
        ["congress", "chamber", "session_number", "roll_number"],
        schema=schema,
    )
    op.drop_constraint(
        op.f("fk_vote_bill_id_bill"),
        "vote",
        schema=schema,
        type_="foreignkey",
    )
    op.drop_constraint(
        "fk_vote_bill_text_id_bill_text",
        "vote",
        schema=schema,
        type_="foreignkey",
    )
    op.drop_index("ix_vote_bill_text_id", table_name="vote", schema=schema)
    op.drop_column("vote", "bill_id", schema=schema)
    op.drop_column("vote", "bill_text_id", schema=schema)
    op.create_table(
        "vote_action_match",
        sa.Column("vote_id", sa.Integer(), nullable=False),
        sa.Column("action_scope", sa.String(), nullable=False),
        sa.Column("bill_action_id", sa.Integer(), nullable=True),
        sa.Column("amendment_action_id", sa.Integer(), nullable=True),
        sa.Column("is_selected", sa.Boolean(), nullable=False),
        sa.Column("match_method", sa.String(), nullable=False),
        sa.Column("match_reason", sa.String(), nullable=True),
        sa.Column("match_confidence", sa.String(), nullable=False),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["amendment_action_id"],
            [f"{schema}.amendment_action.id"],
            name=op.f("fk_vote_action_match_amendment_action_id_amendment_action"),
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["bill_action_id"],
            [f"{schema}.bill_action.id"],
            name=op.f("fk_vote_action_match_bill_action_id_bill_action"),
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["vote_id"],
            [f"{schema}.vote.id"],
            name=op.f("fk_vote_action_match_vote_id_vote"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_vote_action_match")),
        schema=schema,
    )
    op.create_index(
        "ix_vote_action_match_vote_id",
        "vote_action_match",
        ["vote_id"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "uq_vote_action_match_selected_vote_id",
        "vote_action_match",
        ["vote_id"],
        unique=True,
        schema=schema,
        postgresql_where=sa.text("is_selected"),
    )
    op.create_table(
        "vote_classification",
        sa.Column("vote_id", sa.Integer(), nullable=False),
        sa.Column("subject_type", sa.String(), nullable=False),
        sa.Column("measure_type", sa.String(), nullable=True),
        sa.Column("measure_subtype", sa.String(), nullable=True),
        sa.Column("measure_function", sa.String(), nullable=True),
        sa.Column("vote_relationship", sa.String(), nullable=False),
        sa.Column("is_legislation_related", sa.Boolean(), nullable=False),
        sa.Column("is_direct_vote_on_legislative_text", sa.Boolean(), nullable=False),
        sa.Column("is_substantive_policy_vote", sa.Boolean(), nullable=False),
        sa.Column("is_lawmaking_vehicle", sa.Boolean(), nullable=False),
        sa.Column("is_special_rule", sa.Boolean(), nullable=False),
        sa.Column("classification_method", sa.String(), nullable=False),
        sa.Column("classification_confidence_reason", sa.String(), nullable=True),
        sa.Column("confidence", sa.String(), nullable=False),
        sa.Column("classified_at", sa.DateTime(timezone=True), nullable=True),
        sa.Column("classification_version", sa.String(), nullable=False),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["vote_id"],
            [f"{schema}.vote.id"],
            name=op.f("fk_vote_classification_vote_id_vote"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_vote_classification")),
        sa.UniqueConstraint("vote_id", name=op.f("uq_vote_classification_vote_id")),
        schema=schema,
    )
    op.create_index(
        "ix_vote_classification_subject_type",
        "vote_classification",
        ["subject_type"],
        unique=False,
        schema=schema,
    )
    op.create_table(
        "vote_measure_link",
        sa.Column("vote_id", sa.Integer(), nullable=False),
        sa.Column("measure_id", sa.Integer(), nullable=False),
        sa.Column("role", sa.String(), nullable=False),
        sa.Column("source", sa.String(), nullable=False),
        sa.Column("confidence", sa.String(), nullable=False),
        sa.Column("notes", sa.String(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["measure_id"],
            [f"{schema}.bill.id"],
            name=op.f("fk_vote_measure_link_measure_id_bill"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_vote_measure_link")),
        schema=schema,
    )
    op.create_index(
        "ix_vote_measure_link_vote_id",
        "vote_measure_link",
        ["vote_id"],
        unique=False,
        schema=schema,
    )
    op.create_foreign_key(
        op.f("fk_vote_measure_link_vote_id_vote"),
        "vote_measure_link",
        "vote",
        ["vote_id"],
        ["id"],
        source_schema=schema,
        referent_schema=schema,
        ondelete="CASCADE",
    )
    op.create_table(
        "vote_text_target",
        sa.Column("vote_id", sa.Integer(), nullable=False),
        sa.Column("text_target_type", sa.String(), nullable=False),
        sa.Column("voted_text_version_id", sa.Integer(), nullable=True),
        sa.Column("resulting_text_version_id", sa.Integer(), nullable=True),
        sa.Column("related_amendment_id", sa.Integer(), nullable=True),
        sa.Column("text_target_basis", sa.String(), nullable=False),
        sa.Column("text_resolution_method", sa.String(), nullable=False),
        sa.Column("text_resolution_confidence_reason", sa.String(), nullable=True),
        sa.Column("confidence", sa.String(), nullable=False),
        sa.Column("notes", sa.String(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["related_amendment_id"],
            [f"{schema}.amendment.id"],
            name=op.f("fk_vote_text_target_related_amendment_id_amendment"),
            ondelete="SET NULL",
        ),
        sa.ForeignKeyConstraint(
            ["resulting_text_version_id"],
            [f"{schema}.bill_text.id"],
            name=op.f("fk_vote_text_target_resulting_text_version_id_bill_text"),
            ondelete="SET NULL",
        ),
        sa.ForeignKeyConstraint(
            ["vote_id"],
            [f"{schema}.vote.id"],
            name=op.f("fk_vote_text_target_vote_id_vote"),
            ondelete="CASCADE",
        ),
        sa.ForeignKeyConstraint(
            ["voted_text_version_id"],
            [f"{schema}.bill_text.id"],
            name=op.f("fk_vote_text_target_voted_text_version_id_bill_text"),
            ondelete="SET NULL",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_vote_text_target")),
        sa.UniqueConstraint("vote_id", name=op.f("uq_vote_text_target_vote_id")),
        schema=schema,
    )
    op.create_table(
        "vote_position_meaning",
        sa.Column("vote_id", sa.Integer(), nullable=False),
        sa.Column("yea_effect", sa.String(), nullable=False),
        sa.Column("nay_effect", sa.String(), nullable=False),
        sa.Column("present_effect", sa.String(), nullable=False),
        sa.Column("polarity_confidence", sa.String(), nullable=False),
        sa.Column("polarity_method", sa.String(), nullable=False),
        sa.Column("notes", sa.String(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["vote_id"],
            [f"{schema}.vote.id"],
            name=op.f("fk_vote_position_meaning_vote_id_vote"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_vote_position_meaning")),
        sa.UniqueConstraint("vote_id", name=op.f("uq_vote_position_meaning_vote_id")),
        schema=schema,
    )
    op.create_table(
        "vote_context_audit",
        sa.Column("vote_id", sa.Integer(), nullable=False),
        sa.Column("step", sa.String(), nullable=False),
        sa.Column("message", sa.String(), nullable=False),
        sa.Column("severity", sa.String(), nullable=False),
        sa.Column("source_path", sa.String(), nullable=True),
        sa.Column(
            "created_at",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.ForeignKeyConstraint(
            ["vote_id"],
            [f"{schema}.vote.id"],
            name=op.f("fk_vote_context_audit_vote_id_vote"),
            ondelete="CASCADE",
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_vote_context_audit")),
        schema=schema,
    )
    op.create_index(
        "ix_vote_context_audit_vote_id",
        "vote_context_audit",
        ["vote_id"],
        unique=False,
        schema=schema,
    )
 def downgrade() -> None:
    """Downgrade."""
    raise NotImplementedError("Downgrade is not supported for canonical vote context v3.")
@@ -1,203 +0,0 @@
 """add supporting indexes for congress vote context and scoring.
 Revision ID: a7b91c4e2d30
 Revises: 1f8c0e7a9d21
 Create Date: 2026-04-26 00:00:00.000000
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 import sqlalchemy as sa
 from alembic import op
 from pipelines.orm import DataScienceDevBase
 if TYPE_CHECKING:
    from collections.abc import Sequence
 revision: str = "a7b91c4e2d30"
 down_revision: str | None = "1f8c0e7a9d21"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 schema = DataScienceDevBase.schema_name
 def _dedupe_source_artifacts() -> None:
    op.execute(
        sa.text(
            f"""
            CREATE TEMP TABLE tmp_source_artifact_dups AS
            WITH ranked AS (
                SELECT
                    id,
                    first_value(id) OVER (
                        PARTITION BY ingest_run_id, local_path, sha256
                        ORDER BY id
                    ) AS keep_id,
                    row_number() OVER (
                        PARTITION BY ingest_run_id, local_path, sha256
                        ORDER BY id
                    ) AS rn
                FROM {schema}.source_artifact
                WHERE ingest_run_id IS NOT NULL
            )
            SELECT id, keep_id
            FROM ranked
            WHERE rn > 1
            """
        )
    )
    for table_name, column_name in (
        ("bill_text", "source_artifact_id"),
        ("bill_action", "source_artifact_id"),
        ("amendment", "source_artifact_id"),
        ("amendment_action", "source_artifact_id"),
        ("vote", "raw_vote_source_artifact_id"),
    ):
        op.execute(
            sa.text(
                f"""
                UPDATE {schema}.{table_name} AS target
                SET {column_name} = d.keep_id
                FROM tmp_source_artifact_dups AS d
                WHERE target.{column_name} = d.id
                """
            )
        )
    op.execute(
        sa.text(
            f"""
            DELETE FROM {schema}.source_artifact AS source_artifact
            USING tmp_source_artifact_dups AS d
            WHERE source_artifact.id = d.id
            """
        )
    )
    op.execute(sa.text("DROP TABLE tmp_source_artifact_dups"))
 def upgrade() -> None:
    """Upgrade."""
    _dedupe_source_artifacts()
    op.create_index(
        "uq_source_artifact_ingest_identity",
        "source_artifact",
        ["ingest_run_id", "local_path", "sha256"],
        unique=True,
        schema=schema,
    )
    op.create_index(
        "ix_bill_action_recorded_vote_match_tuple",
        "bill_action_recorded_vote",
        ["congress", "chamber", "session_number", "roll_number"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_amendment_action_recorded_vote_match_tuple",
        "amendment_action_recorded_vote",
        ["congress", "chamber", "session_number", "roll_number"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_vote_classification_eligible_vote_id",
        "vote_classification",
        ["vote_id"],
        unique=False,
        schema=schema,
        postgresql_where=sa.text(
            "subject_type = 'measure' "
            "AND vote_relationship = 'direct_text_vote' "
            "AND is_direct_vote_on_legislative_text "
            "AND is_substantive_policy_vote "
            "AND NOT is_special_rule"
        ),
    )
    op.create_index(
        "ix_vote_measure_link_vote_id_role",
        "vote_measure_link",
        ["vote_id", "role"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_vote_measure_link_measure_id_role",
        "vote_measure_link",
        ["measure_id", "role"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_vote_text_target_voted_text_version_id",
        "vote_text_target",
        ["voted_text_version_id"],
        unique=False,
        schema=schema,
        postgresql_where=sa.text("voted_text_version_id IS NOT NULL"),
    )
    op.create_index(
        "ix_vote_context_audit_severity_vote_id",
        "vote_context_audit",
        ["severity", "vote_id"],
        unique=False,
        schema=schema,
    )
    op.create_index(
        "ix_legislator_current_chamber",
        "legislator",
        ["current_chamber"],
        unique=False,
        schema=schema,
    )
 def downgrade() -> None:
    """Downgrade."""
    op.drop_index("ix_legislator_current_chamber", table_name="legislator", schema=schema)
    op.drop_index(
        "ix_vote_context_audit_severity_vote_id",
        table_name="vote_context_audit",
        schema=schema,
    )
    op.drop_index(
        "ix_vote_text_target_voted_text_version_id",
        table_name="vote_text_target",
        schema=schema,
    )
    op.drop_index(
        "ix_vote_measure_link_measure_id_role",
        table_name="vote_measure_link",
        schema=schema,
    )
    op.drop_index(
        "ix_vote_measure_link_vote_id_role",
        table_name="vote_measure_link",
        schema=schema,
    )
    op.drop_index(
        "ix_vote_classification_eligible_vote_id",
        table_name="vote_classification",
        schema=schema,
    )
    op.drop_index(
        "ix_amendment_action_recorded_vote_match_tuple",
        table_name="amendment_action_recorded_vote",
        schema=schema,
    )
    op.drop_index(
        "ix_bill_action_recorded_vote_match_tuple",
        table_name="bill_action_recorded_vote",
        schema=schema,
    )
    op.drop_index(
        "uq_source_artifact_ingest_identity",
        table_name="source_artifact",
        schema=schema,
    )
@@ -1,66 +0,0 @@
 """adding PostTopic.
 Revision ID: 032e26bbfcb5
 Revises: a7b91c4e2d30
 Create Date: 2026-04-26 14:34:35.688341
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 import sqlalchemy as sa
 from alembic import op
 from pipelines.orm import DataScienceDevBase
 if TYPE_CHECKING:
    from collections.abc import Sequence
 # revision identifiers, used by Alembic.
 revision: str = "032e26bbfcb5"
 down_revision: str | None = "a7b91c4e2d30"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 schema = DataScienceDevBase.schema_name
 def upgrade() -> None:
    """Upgrade."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.create_table(
        "post_topic",
        sa.Column("post_id", sa.BigInteger(), nullable=False),
        sa.Column("topic_id", sa.SmallInteger(), nullable=False),
        sa.Column("topic_label", sa.String(), nullable=True),
        sa.Column("model_version", sa.String(), nullable=True),
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column(
            "created",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.Column(
            "updated",
            sa.DateTime(timezone=True),
            server_default=sa.text("now()"),
            nullable=False,
        ),
        sa.PrimaryKeyConstraint("id", name=op.f("pk_post_topic")),
        schema=schema,
    )
    op.create_index(
        "ix_post_topic_post_id", "post_topic", ["post_id"], unique=False, schema=schema
    )
    # ### end Alembic commands ###
 def downgrade() -> None:
    """Downgrade."""
    # ### commands auto generated by Alembic - please adjust! ###
    op.drop_index("ix_post_topic_post_id", table_name="post_topic", schema=schema)
    op.drop_table("post_topic", schema=schema)
    # ### end Alembic commands ###
@@ -1,35 +0,0 @@
 """adding PG Vector.
 Revision ID: b9360b0b0c22
 Revises: 032e26bbfcb5
 Create Date: 2026-04-26 14:35:08.770128
 """
 from __future__ import annotations
 from typing import TYPE_CHECKING
 from alembic import op
 from pipelines.orm import DataScienceDevBase
 if TYPE_CHECKING:
    from collections.abc import Sequence
 # revision identifiers, used by Alembic.
 revision: str = "b9360b0b0c22"
 down_revision: str | None = "032e26bbfcb5"
 branch_labels: str | Sequence[str] | None = None
 depends_on: str | Sequence[str] | None = None
 schema = DataScienceDevBase.schema_name
 def upgrade() -> None:
    """Upgrade."""
    op.execute("CREATE EXTENSION IF NOT EXISTS vector")
 def downgrade() -> None:
    """Downgrade."""
    op.execute("DROP EXTENSION IF EXISTS vector")
@@ -1,116 +0,0 @@
 """Nornsight — BERTopic POC Inference Script.
 Loads the trained model and labels a small batch of posts,
 writing results to main.post_topic for inspection.
 POC: processes a single batch of 1k posts to validate the pipeline end-to-end.
 """
 from __future__ import annotations
 import logging
 import time
 from collections import Counter
 from pathlib import Path
 from bertopic import BERTopic
 from sqlalchemy import Engine, func, insert, select
 from sqlalchemy.orm import Session
 from pipelines.config import BertTopicInferConfig, get_bertopic_infer_config
 from pipelines.orm.common import get_postgres_engine
 from pipelines.orm.data_science_dev.posts import PostTopic, Posts
 from pipelines.orm.data_science_dev.posts.lang_filters import ENGLISH_LANGS
 from pipelines.pipelines.common import configure_logger
 logger = logging.getLogger(__name__)
 def main() -> None:
    """Run BERTopic inference against a sample of posts."""
    configure_logger()
    config = get_bertopic_infer_config()
    run_inference(config)
    logger.info(
        "POC inference complete. Check main.post_topic in DBeaver to inspect results."
    )
 def run_inference(config: BertTopicInferConfig) -> None:
    model_save_path = Path(config.model_save_path)
    logger.info(f"Loading BERTopic model from {model_save_path}")
    topic_model = BERTopic.load(str(model_save_path))
    topic_info = topic_model.get_topic_info()
    label_map: dict[int, str] = dict(zip(topic_info["Topic"], topic_info["Name"]))
    logger.info(f"Model loaded with {len(label_map)} topics")
    engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
    post_ids, texts = get_post_ids_and_test(engine, config)
    logger.info(f"Fetched {len(texts)} posts")
    logger.info("Running BERTopic transform")
    start = time.perf_counter()
    topics, _probabilities = topic_model.transform(texts)
    elapsed = time.perf_counter() - start
    logger.info(f"Transform complete in {elapsed:.1f}s")
    # Write results to main.post_topic
    records = [
        {
            "post_id": pid,
            "topic_id": int(topic_id),
            "topic_label": label_map.get(int(topic_id), "unknown"),
            "model_version": config.model_version,
        }
        for pid, topic_id in zip(post_ids, topics)
    ]
    with Session(engine) as session:
        session.execute(insert(PostTopic), records)
        session.commit()
    count_topics(records)
    logger.info(f"Wrote {len(records)} topic labels to main.post_topic")
 def get_post_ids_and_test(
    engine: Engine,
    config: BertTopicInferConfig,
 ) -> None | tuple[list[int], list[str]]:
    with Session(engine) as session:
        logger.info(f"Fetching {config.poc_batch_size} posts for inference")
        # Pull a fresh batch for inference — distinct from training sample
        # using a fixed seed offset so we're not re-labeling training posts
        stmt = select(Posts).where(
            Posts.text.is_not(None),
            Posts.langs.in_(ENGLISH_LANGS),
            func.length(Posts.text) > config.min_text_length,
        )
        if config.poc_batch_size > 0:
            stmt = stmt.limit(config.poc_batch_size)
        posts = session.scalars(stmt).all()
        if not posts:
            logger.warning("No posts were selected for inference")
            return [], []
        post_ids = [post.post_id for post in posts]
        texts = [post.text.strip() for post in posts]
    return post_ids, texts
 def count_topics(records: list[dict]) -> None:
    topic_counts = Counter(record.get("topic_label", "unknown") for record in records)
    logger.info("Topic distribution in this batch:")
    for label, count in topic_counts.most_common(10):
        logger.info("  %s: %d", label, count)
 if __name__ == "__main__":
    main()
@@ -1,119 +0,0 @@
 """Nornsight — BERTopic POC Training Script.
 Pulls a small stratified sample (~11.5k posts) from main.posts,
 trains BERTopic with MiniBatchKMeans on Jeeves, and saves the model locally.
 POC sample rate: random() < 0.00005 (~0.005% of 230M = ~11.5k posts)
 Full training rate will be: random() < 0.005 (~1.08M posts)
 """
 from __future__ import annotations
 import logging
 import time
 from pathlib import Path
 from bertopic import BERTopic
 from sklearn.cluster import MiniBatchKMeans
 from sqlalchemy import func, select
 from sqlalchemy.orm import Session
 from pipelines.config import BertTopicTrainConfig, get_bertopic_train_config
 from pipelines.orm.common import get_postgres_engine
 from pipelines.orm.data_science_dev.posts import Posts
 from pipelines.orm.data_science_dev.posts.lang_filters import ENGLISH_LANGS
 from pipelines.pipelines.common import configure_logger
 logger = logging.getLogger(__name__)
 def main() -> None:
    """Train and persist the BERTopic model."""
    configure_logger()
    config = get_bertopic_train_config()
    docs = load_sample(config)
    if not docs:
        logger.warning("No training documents were selected")
        return
    train(docs, config)
    logger.info(f"Done. Model saved as version {config.model_version}")
    logger.info("Next: run infer.py to label a sample of posts in the database")
 def load_sample(config: BertTopicTrainConfig) -> list[str]:
    logger.info("Connecting to PostgreSQL via SQLAlchemy")
    engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
    logger.info(f"Pulling sample from main.posts (sample_rate={config.sample_rate})")
    start = time.perf_counter()
    with Session(engine) as session:
        texts = session.scalars(
            select(Posts.text).where(
                Posts.text.is_not(None),
                Posts.langs.in_(ENGLISH_LANGS),
                func.length(Posts.text) > config.min_text_length,
                func.random() < config.sample_rate,
            )
        ).all()
    elapsed = time.perf_counter() - start
    logger.info(f"Fetched {len(texts)} rows in {elapsed:.1f}s")
    # Basic cleaning — strip whitespace and deduplicate
    docs = list({text.strip() for text in texts})
    logger.info(f"After cleaning and dedup: {len(docs)} posts")
    return docs
 def train(docs: list[str], config: BertTopicTrainConfig) -> None:
    logger.info(
        f"Initialising BERTopic with MiniBatchKMeans (n_topics={config.n_topics})"
    )
    cluster_model = MiniBatchKMeans(
        n_clusters=config.n_topics,
        random_state=42,
        batch_size=1024,
        n_init=3,
        verbose=1,
    )
    topic_model = BERTopic(
        hdbscan_model=cluster_model,
        language="english",
        calculate_probabilities=False,  # saves memory
        verbose=True,
    )
    logger.info(f"Starting fit_transform on {len(docs)} posts (CPU)")
    start = time.perf_counter()
    topic_model.fit_transform(docs)
    elapsed = time.perf_counter() - start
    logger.info(f"Training complete in {elapsed:.1f}s ({elapsed / 60:.1f} min)")
    # Log topic summary for quick inspection
    topic_info = topic_model.get_topic_info()
    logger.info(f"Topics found: {len(topic_info)}")
    logger.info(f"\n{topic_info.to_string()}")
    model_save_path = Path(config.model_save_path)
    model_save_path.mkdir(parents=True, exist_ok=True)
    logger.info(f"Saving model to {model_save_path}")
    topic_model.save(
        str(model_save_path),
        serialization="safetensors",
        save_ctfidf=True,
        save_embedding_model=True,
    )
    logger.info("Model saved")
 if __name__ == "__main__":
    main()
@@ -1,8 +1,6 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from os import getenv
 from datetime import date
 from pathlib import Path
 import tomllib
@@ -51,7 +49,6 @@ class FinetuneConfig:
        )
@dataclass
 class BenchmarkConfig:
    """Top-level benchmark configuration loaded from TOML."""
@@ -71,77 +68,6 @@ class BenchmarkConfig:
        return cls(**raw)
@dataclass
 class OpenAIConfig:
    """OpenAI API configuration."""
    api_key: str
    openai_project_id: str
    openai_chat_completions_url: str
    model: str
    timeout_seconds: int
    @classmethod
    def from_toml(cls, config_path: Path) -> OpenAIConfig:
        """Load OpenAI config from a TOML file."""
        raw = tomllib.loads(config_path.read_text()).get("openai", {})
        api_key = getenv("CLOSEDAI_TOKEN")
        if not api_key:
            message = "CLOSEDAI_TOKEN is required"
            raise KeyError(message)
        return cls(
            api_key=api_key,
            openai_project_id=raw.get(
                "openai_project_id", "proj_fQBPEXFgnS87Fk6wZwploFwE"
            ),
            openai_chat_completions_url=raw.get(
                "openai_chat_completions_url",
                "https://api.openai.com/v1/chat/completions",
            ),
            model=raw.get("model", "gpt-5.4-mini"),
            timeout_seconds=raw.get("timeout_seconds", 60),
        )
@dataclass
 class BertTopicTrainConfig:
    """BERTopic training configuration loaded from TOML."""
    sample_rate: float
    min_text_length: int
    n_topics: int
    model_save_path: str
    model_version: str | None = None
    @classmethod
    def from_toml(cls, config_path: Path) -> BertTopicTrainConfig:
        """Load BERTopic training config from a TOML file."""
        raw = tomllib.loads(config_path.read_text())["bertopic"]["train"]
        today = date.today().isoformat()
        if raw.get("model_version") is None:
            raw["model_version"] = (
                f"{today}-{raw['sample_rate']}-{raw['min_text_length']}-{raw['n_topics']}"
            )
        return cls(**raw)
@dataclass
 class BertTopicInferConfig:
    """BERTopic inference configuration loaded from TOML."""
    min_text_length: int
    poc_batch_size: int
    model_version: str
    model_save_path: str
    @classmethod
    def from_toml(cls, config_path: Path) -> BertTopicInferConfig:
        """Load BERTopic inference config from a TOML file."""
        raw = tomllib.loads(config_path.read_text())["bertopic"]["infer"]
        return cls(**raw)
 def get_config_dir() -> Path:
    """Get the path to the config directory."""
    return Path(__file__).resolve().parents[2] / "config"
@@ -152,12 +78,6 @@ def default_config_path() -> Path:
    return get_config_dir() / "config.toml"
 def get_openai_config(config_path: Path | None = None) -> OpenAIConfig:
    if config_path is None:
        config_path = default_config_path()
    return OpenAIConfig.from_toml(config_path)
 def get_finetune_config(config_path: Path | None = None) -> FinetuneConfig:
    if config_path is None:
        config_path = default_config_path()
@@ -168,19 +88,3 @@ def get_benchmark_config(config_path: Path | None = None) -> BenchmarkConfig:
    if config_path is None:
        config_path = default_config_path()
    return BenchmarkConfig.from_toml(config_path)
 def get_bertopic_train_config(
    config_path: Path | None = None,
 ) -> BertTopicTrainConfig:
    if config_path is None:
        config_path = default_config_path()
    return BertTopicTrainConfig.from_toml(config_path)
 def get_bertopic_infer_config(
    config_path: Path | None = None,
 ) -> BertTopicInferConfig:
    if config_path is None:
        config_path = default_config_path()
    return BertTopicInferConfig.from_toml(config_path)
@@ -1,235 +0,0 @@
 """Docker container lifecycle management for BERTopic jobs on Jeeves."""
 from __future__ import annotations
 import logging
 import os
 import subprocess
 from pathlib import Path
 from typing import Annotated, Literal
 import typer
 logger = logging.getLogger(__name__)
 JOBMode = Literal["train", "infer"]
 IMAGE_NAME = "bert-topic:latest"
 REPO_DIR = Path(__file__).resolve().parents[3]
 DEFAULT_CACHE_ROOT = Path("/zfs/storage/main/ds_thing/models/bert_topic")
 DEFAULT_POSTGRES_SOCKET_DIR = Path("/run/postgresql")
 DB_ENV_VARS = (
    "DATA_SCIENCE_DEV_DB",
    "DATA_SCIENCE_DEV_HOST",
    "DATA_SCIENCE_DEV_PORT",
    "DATA_SCIENCE_DEV_USER",
    "DATA_SCIENCE_DEV_PASSWORD",
 )
 app = typer.Typer(help="BERTopic container management.")
 def _container_name(mode: JOBMode) -> str:
    """Return the Docker container name for the selected BERTopic job."""
    return f"bert-topic-{mode}"
 def _module_name(mode: JOBMode) -> str:
    """Return the Python module to run inside the container."""
    return f"pipelines.bert_topic.{mode}"
 def _env_args(*, use_postgres_socket: bool) -> list[str]:
    """Pass through database environment variables from the host shell."""
    required = [
        "DATA_SCIENCE_DEV_DB",
        "DATA_SCIENCE_DEV_PORT",
        "DATA_SCIENCE_DEV_USER",
    ]
    if not use_postgres_socket:
        required.append("DATA_SCIENCE_DEV_HOST")
    missing = [name for name in required if not os.getenv(name)]
    if missing:
        message = "Missing required database environment variables: " + ", ".join(
            missing
        )
        raise RuntimeError(message)
    args: list[str] = []
    if use_postgres_socket:
        args.extend(["-e", f"DATA_SCIENCE_DEV_HOST={DEFAULT_POSTGRES_SOCKET_DIR}"])
    for name in DB_ENV_VARS:
        if use_postgres_socket and name == "DATA_SCIENCE_DEV_HOST":
            continue
        if os.getenv(name):
            args.extend(["-e", name])
    return args
 def build_image() -> None:
    """Build the BERTopic Docker image."""
    dockerfile = REPO_DIR / "pipelines/containers/docker_files/Dockerfile.bert_topic"
    logger.info("Building BERTopic image: %s", IMAGE_NAME)
    result = subprocess.run(
        [
            "docker",
            "build",
            "--network",
            "host",
            "-f",
            str(dockerfile),
            "-t",
            IMAGE_NAME,
            str(REPO_DIR),
        ],
        capture_output=True,
        text=True,
        check=False,
    )
    if result.returncode != 0:
        message = (
            "Failed to build BERTopic image. "
            f"docker build stderr:\n{result.stderr.strip()}"
        )
        raise RuntimeError(message)
    logger.info("Image built: %s", IMAGE_NAME)
 def stop_job(*, mode: JOBMode) -> None:
    """Stop and remove the BERTopic container for the selected mode."""
    container_name = _container_name(mode)
    logger.info("Stopping BERTopic container: %s", container_name)
    subprocess.run(["docker", "stop", container_name], capture_output=True, check=False)
    subprocess.run(
        ["docker", "rm", "-f", container_name], capture_output=True, check=False
    )
 def start_job(
    *,
    mode: JOBMode,
    cache_root: Path = DEFAULT_CACHE_ROOT,
    postgres_socket_dir: Path = DEFAULT_POSTGRES_SOCKET_DIR,
    detach: bool = False,
 ) -> None:
    """Run BERTopic training or inference in Docker on Jeeves."""
    cache_root = cache_root.resolve()
    cache_root.mkdir(parents=True, exist_ok=True)
    postgres_socket_dir = postgres_socket_dir.resolve()
    stop_job(mode=mode)
    use_postgres_socket = postgres_socket_dir.exists()
    command = [
        "docker",
        "run",
        "--name",
        _container_name(mode),
        "--ipc=host",
        "-v",
        f"{cache_root}:/cache",
        *_env_args(use_postgres_socket=use_postgres_socket),
        IMAGE_NAME,
        _module_name(mode),
    ]
    if use_postgres_socket:
        command[7:7] = ["-v", f"{postgres_socket_dir}:{DEFAULT_POSTGRES_SOCKET_DIR}"]
    if detach:
        command.insert(2, "-d")
    logger.info("Starting BERTopic %s container", mode)
    logger.info("  Cache root: %s", cache_root)
    if use_postgres_socket:
        logger.info("  Postgres socket: %s", postgres_socket_dir)
    result = subprocess.run(command, text=True, capture_output=detach, check=False)
    if result.returncode != 0:
        detail = (
            result.stderr.strip() if result.stderr else f"exit code {result.returncode}"
        )
        raise RuntimeError(f"BERTopic container failed to start: {detail}")
    if detach:
        logger.info("Container started: %s", result.stdout.strip()[:12])
    else:
        logger.info("BERTopic %s run complete", mode)
 def logs_job(*, mode: JOBMode) -> str | None:
    """Return recent logs from the BERTopic container, or None if absent."""
    result = subprocess.run(
        ["docker", "logs", "--tail", "100", _container_name(mode)],
        capture_output=True,
        text=True,
        check=False,
    )
    if result.returncode != 0:
        return None
    return result.stdout + result.stderr
@app.command()
 def build(
    log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
 ) -> None:
    """Build the BERTopic Docker image."""
    logging.basicConfig(
        level=log_level,
        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
    )
    build_image()
@app.command("run")
 def run_job_command(
    mode: Annotated[JOBMode, typer.Option(help="Which BERTopic job to run")] = "train",
    cache_root: Annotated[
        Path, typer.Option(help="Host path mounted to /cache for model and HF cache")
    ] = DEFAULT_CACHE_ROOT,
    postgres_socket_dir: Annotated[
        Path, typer.Option(help="Host Postgres socket directory to mount into the container")
    ] = DEFAULT_POSTGRES_SOCKET_DIR,
    detach: Annotated[
        bool, typer.Option(help="Start the container in the background")
    ] = False,
    log_level: Annotated[str, typer.Option(help="Log level")] = "INFO",
 ) -> None:
    """Run BERTopic training or inference inside Docker."""
    logging.basicConfig(
        level=log_level,
        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
    )
    start_job(
        mode=mode,
        cache_root=cache_root,
        postgres_socket_dir=postgres_socket_dir,
        detach=detach,
    )
@app.command("stop")
 def stop_job_command(
    mode: Annotated[
        JOBMode, typer.Option(help="Which BERTopic container to stop")
    ] = "train",
 ) -> None:
    """Stop and remove the BERTopic container."""
    stop_job(mode=mode)
@app.command("logs")
 def logs_job_command(
    mode: Annotated[
        JOBMode, typer.Option(help="Which BERTopic container logs to show")
    ] = "train",
 ) -> None:
    """Show recent logs from the BERTopic container."""
    output = logs_job(mode=mode)
    if output is None:
        typer.echo(f"No BERTopic container found for mode={mode}.")
        raise typer.Exit(code=1)
    typer.echo(output)
 def cli() -> None:
    """Typer entry point."""
    app()
 if __name__ == "__main__":
    cli()
@@ -1,38 +0,0 @@
 FROM python:3.12-bookworm
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 ENV PIP_NO_CACHE_DIR=1
 RUN apt-get update && apt-get install -y \
    build-essential \
    gcc \
    g++ \
    git \
    libgomp1 \
    libpq-dev \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 COPY pipelines ./pipelines
 RUN python -m pip install --upgrade pip setuptools wheel && \
    python -m pip install \
    torch \
    --index-url https://download.pytorch.org/whl/cpu && \
    python -m pip install \
    typer \
    sqlalchemy \
    bertopic \
    sentence-transformers \
    scikit-learn \
    pandas \
    numpy \
    "psycopg[binary]"
 ENV HF_HOME=/cache/huggingface
 ENV TRANSFORMERS_CACHE=/cache/huggingface
 ENTRYPOINT ["python", "-m"]
 CMD ["pipelines.bert_topic.train"]
@@ -1,11 +0,0 @@
 FROM ghcr.io/unslothai/unsloth:latest
 RUN pip install --no-cache-dir typer
 WORKDIR /workspace
 COPY python/prompt_bench/finetune.py python/prompt_bench/finetune.py
 COPY config/prompts/summarization_prompts.toml config/prompts/summarization_prompts.toml
 COPY python/prompt_bench/__init__.py python/prompt_bench/__init__.py
 COPY python/__init__.py python/__init__.py
 ENTRYPOINT ["python", "-m", "pipelines.prompt_bench.finetune"]
@@ -12,36 +12,16 @@ Usage:
 from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from datetime import date
 from pathlib import Path  # noqa: TC003 needed at runtime for typer CLI argument
 from typing import TYPE_CHECKING, Annotated
 import orjson
 import typer
 import yaml
-from sqlalchemy import func, select
+from sqlalchemy import select
 from sqlalchemy.orm import Session
-from pipelines.common import configure_logger
+from pipelines.pipelines.common import configure_logger
 from pipelines.pipelines.jobs.congress_vote_context import (
    build_billstatus_text_version_index,
    build_vote_action_matches,
    classify_votes,
    coerce_raw_ref,
    derive_session_number,
    finish_ingest_run,
    filter_context_supported_congress_dirs,
    ingest_bill_status_context as rebuild_bill_status_context,
    parse_date_like,
    parse_vote_source_url,
    parsed_vote_datetime,
    register_source_artifact,
    require_billstatus_artifacts,
    resolve_vote_position_meanings,
    resolve_vote_text_targets,
    start_ingest_run,
 )
 from pipelines.orm.common import get_postgres_engine
 from pipelines.orm.data_science_dev.congress import (
    Bill,
@@ -49,23 +29,17 @@ from pipelines.orm.data_science_dev.congress import (
    Legislator,
    LegislatorSocialMedia,
    Vote,
    VoteActionMatch,
    VoteClassification,
    VoteContextAudit,
    VoteRecord,
 )
 from pipelines.parallelize import parallelize_thread
 if TYPE_CHECKING:
-    from collections.abc import Iterator, Sequence
+    from collections.abc import Iterator
    from sqlalchemy.engine import Engine
 logger = logging.getLogger(__name__)
 BATCH_SIZE = 10_000
 PARALLEL_FILE_CHUNK_SIZE = 1_000
 PARALLEL_PROGRESS_TRACKER = 250
 app = typer.Typer(help="Ingest unitedstates/congress data into data_science_dev.")
@@ -84,12 +58,7 @@ def main(
    only: Annotated[
        str | None,
        typer.Option(
-            help=(
+            help="Only run a specific step: legislators, social-media, bills, votes, bill-text"
                "Only run a specific step: legislators, legislators-social-media, "
                "bills, bill-text, votes, bill-status-context, vote-context-match, "
                "vote-context-classify, vote-text-resolve, vote-polarity-resolve, "
                "vote-context-diagnostics"
            )
        ),
    ] = None,
 ) -> None:
@@ -118,32 +87,12 @@ def main(
    logger.info("Found %d congress directories to process", len(congress_dirs))
    with Session(engine) as session:
        ingest_run = start_ingest_run(
            session,
            source_snapshot_label=str(parent_dir),
            repo_root=Path(__file__).resolve().parent.parent,
        )
        ingest_run_id = ingest_run.id
    steps: dict[str, tuple] = {
        "legislators": (ingest_legislators, (engine, legislators_dir)),
        "legislators-social-media": (ingest_social_media, (engine, legislators_dir)),
        "bills": (ingest_bills, (engine, congress_dirs)),
-        "bill-text": (ingest_bill_text, (engine, congress_dirs, ingest_run_id)),
+        "votes": (ingest_votes, (engine, congress_dirs)),
-        "votes": (ingest_votes, (engine, congress_dirs, ingest_run_id)),
+        "bill-text": (ingest_bill_text, (engine, congress_dirs)),
        "bill-status-context": (
            ingest_bill_status_context_step,
            (engine, congress_dirs, ingest_run_id),
        ),
        "vote-context-match": (vote_context_match_step, (engine, congress_dirs)),
        "vote-context-classify": (vote_context_classify_step, (engine, congress_dirs)),
        "vote-text-resolve": (vote_text_resolve_step, (engine, congress_dirs)),
        "vote-polarity-resolve": (vote_polarity_resolve_step, (engine, congress_dirs)),
        "vote-context-diagnostics": (
            vote_context_diagnostics_step,
            (engine, congress_dirs),
        ),
    }
    if only:
@@ -154,18 +103,11 @@ def main(
            raise typer.Exit(code=1)
        steps = {only: steps[only]}
-    try:
+    for step_name, (step_func, step_args) in steps.items():
-        for step_name, (step_func, step_args) in steps.items():
+        logger.info("=== Starting step: %s ===", step_name)
-            logger.info("=== Starting step: %s ===", step_name)
+        step_func(*step_args)
-            step_func(*step_args)
+        logger.info("=== Finished step: %s ===", step_name)
            logger.info("=== Finished step: %s ===", step_name)
    except Exception:
        with Session(engine) as session:
            finish_ingest_run(session, ingest_run_id, status="failed")
        raise
    with Session(engine) as session:
        finish_ingest_run(session, ingest_run_id, status="completed")
    logger.info("ingest-congress done")
@@ -191,56 +133,6 @@ def _flush_batch(session: Session, batch: list[object], label: str) -> int:
    return count
@dataclass(frozen=True)
 class LoadedJsonFile:
    path: Path
    data: dict | None
@dataclass(frozen=True)
 class PreparedBillTextInput:
    bill_id: int
    bill_key: tuple[int, str, int]
    version_code: str
    text_content: str | None
    version_data: dict | None
    source_file: Path | None
    billstatus_version: object | None
 def _chunked[T](items: Sequence[T], chunk_size: int) -> Iterator[Sequence[T]]:
    """Yield fixed-size slices from a sequence."""
    for start in range(0, len(items), chunk_size):
        yield items[start : start + chunk_size]
 def _load_json_file(*, path: Path) -> LoadedJsonFile:
    """Read one JSON file off disk for later serial DB processing."""
    return LoadedJsonFile(path=path, data=_read_json(path))
 def _prepare_bill_text_input(
    *,
    bill_id: int,
    bill_key: tuple[int, str, int],
    version_dir: Path,
    billstatus_version: object | None,
 ) -> PreparedBillTextInput:
    """Load one bill-text directory off disk for later serial DB processing."""
    source_file = version_dir / "data.json"
    if not source_file.exists():
        source_file = _find_text_source_file(version_dir)
    return PreparedBillTextInput(
        bill_id=bill_id,
        bill_key=bill_key,
        version_code=version_dir.name,
        text_content=_read_bill_text(version_dir),
        version_data=_read_json(version_dir / "data.json"),
        source_file=source_file if source_file and source_file.exists() else None,
        billstatus_version=billstatus_version,
    )
 # ---------------------------------------------------------------------------
 # Legislators — loaded from congress-legislators YAML files
 # ---------------------------------------------------------------------------
@@ -449,26 +341,16 @@ def ingest_bills(engine: Engine, congress_dirs: list[Path]) -> None:
            bills_dir = congress_dir / "bills"
            if not bills_dir.is_dir():
                continue
-            bill_files = sorted(bills_dir.rglob("data.json"))
+            logger.info("Scanning bills from %s", congress_dir.name)
-            logger.info(
+            for bill_file in bills_dir.rglob("data.json"):
-                "Scanning %d bill files from %s",
+                data = _read_json(bill_file)
-                len(bill_files),
+                if data is None:
-                congress_dir.name,
+                    continue
-            )
+                bill = _parse_bill(data, existing_bills)
-            for chunk in _chunked(bill_files, PARALLEL_FILE_CHUNK_SIZE):
+                if bill is not None:
-                results = parallelize_thread(
+                    batch.append(bill)
-                    _load_json_file,
+                    if len(batch) >= BATCH_SIZE:
-                    [{"path": bill_file} for bill_file in chunk],
+                        total_inserted += _flush_batch(session, batch, "bills")
                    progress_tracker=PARALLEL_PROGRESS_TRACKER,
                )
                for loaded in results.results:
                    if loaded.data is None:
                        continue
                    bill = _parse_bill(loaded.data, existing_bills)
                    if bill is not None:
                        batch.append(bill)
                        if len(batch) >= BATCH_SIZE:
                            total_inserted += _flush_batch(session, batch, "bills")
        total_inserted += _flush_batch(session, batch, "bills")
    logger.info("Inserted %d new bills total", total_inserted)
@@ -510,17 +392,15 @@ def _parse_bill(data: dict, existing_bills: set[tuple[int, str, int]]) -> Bill |
 # ---------------------------------------------------------------------------
-def ingest_votes(
+def ingest_votes(engine: Engine, congress_dirs: list[Path]) -> None:
-    engine: Engine,
+    """Load vote data.json files with their vote records."""
    congress_dirs: list[Path],
    ingest_run_id: int | None,
 ) -> None:
    """Load raw vote data and member positions without any guessed text linkage."""
    legislator_map = _build_legislator_map_for_engine(engine)
    logger.info("Loaded %d legislators into lookup map", len(legislator_map))
    with Session(engine) as session:
        legislator_map = _build_legislator_map(session)
        logger.info("Loaded %d legislators into lookup map", len(legislator_map))
        bill_map = _build_bill_map(session)
        logger.info("Loaded %d bills into lookup map", len(bill_map))
        existing_votes = {
-            (vote.congress, vote.chamber, vote.session_number, vote.roll_number)
+            (vote.congress, vote.chamber, vote.session, vote.number)
            for vote in session.scalars(select(Vote)).all()
        }
        logger.info("Found %d existing votes in DB", len(existing_votes))
@@ -531,43 +411,16 @@ def ingest_votes(
            votes_dir = congress_dir / "votes"
            if not votes_dir.is_dir():
                continue
-            vote_files = sorted(votes_dir.rglob("data.json"))
+            logger.info("Scanning votes from %s", congress_dir.name)
-            logger.info(
+            for vote_file in votes_dir.rglob("data.json"):
-                "Scanning %d vote files from %s",
+                data = _read_json(vote_file)
-                len(vote_files),
+                if data is None:
-                congress_dir.name,
+                    continue
-            )
+                vote = _parse_vote(data, legislator_map, bill_map, existing_votes)
-            for chunk in _chunked(vote_files, PARALLEL_FILE_CHUNK_SIZE):
+                if vote is not None:
-                results = parallelize_thread(
+                    batch.append(vote)
-                    _load_json_file,
+                    if len(batch) >= BATCH_SIZE:
-                    [{"path": vote_file} for vote_file in chunk],
+                        total_inserted += _flush_batch(session, batch, "votes")
                    progress_tracker=PARALLEL_PROGRESS_TRACKER,
                )
                for loaded in results.results:
                    if loaded.data is None:
                        continue
                    chamber = normalize_vote_chamber(loaded.data.get("chamber"))
                    if chamber is None:
                        continue
                    artifact = register_source_artifact(
                        session,
                        path=loaded.path,
                        source_kind="vote_json",
                        congress=int(loaded.data.get("congress", congress_dir.name)),
                        chamber=chamber,
                        ingest_run_id=ingest_run_id,
                        source_url=parse_vote_source_url(loaded.data),
                    )
                    vote = _parse_vote(
                        loaded.data,
                        legislator_map,
                        existing_votes,
                        raw_vote_source_artifact_id=artifact.id,
                    )
                    if vote is not None:
                        batch.append(vote)
                        if len(batch) >= BATCH_SIZE:
                            total_inserted += _flush_batch(session, batch, "votes")
        total_inserted += _flush_batch(session, batch, "votes")
    logger.info("Inserted %d new votes total", total_inserted)
@@ -589,24 +442,11 @@ def _build_bill_map(session: Session) -> dict[tuple[int, str, int], int]:
    }
 def _build_legislator_map_for_engine(engine: Engine) -> dict[str, int]:
    """Build the legislator lookup in a short-lived read session."""
    with Session(engine) as session:
        return _build_legislator_map(session)
 def _build_bill_map_for_engine(engine: Engine) -> dict[tuple[int, str, int], int]:
    """Build the bill lookup in a short-lived read session."""
    with Session(engine) as session:
        return _build_bill_map(session)
 def _parse_vote(
    data: dict,
    legislator_map: dict[str, int],
    bill_map: dict[tuple[int, str, int], int],
    existing_votes: set[tuple[int, str, int, int]],
    *,
    raw_vote_source_artifact_id: int | None,
 ) -> Vote | None:
    """Parse a vote data.json dict into a Vote ORM object with records."""
    raw_congress = data.get("congress")
@@ -622,23 +462,30 @@ def _parse_vote(
        return None
    raw_session = data.get("session")
    if raw_session is None:
        return None
    congress = int(raw_congress)
    number = int(raw_number)
-    parsed_vote_date = _coerce_iso_date(vote_date)
+    session_number = int(raw_session)
    session_year = parsed_vote_date.year
    if raw_session is None:
        session_number = derive_session_number(congress, session_year)
    else:
        session_number = int(raw_session)
    # Normalize chamber from "h"/"s" to "House"/"Senate"
-    chamber_normalized = normalize_vote_chamber(chamber)
+    chamber_normalized = {"h": "House", "s": "Senate"}.get(chamber, chamber)
    if chamber_normalized is None:
        return None
    if (congress, chamber_normalized, session_number, number) in existing_votes:
        return None
    # Resolve linked bill
    bill_id = None
    bill_ref = data.get("bill")
    if bill_ref:
        bill_key = (
            int(bill_ref.get("congress", congress)),
            bill_ref.get("type"),
            int(bill_ref.get("number", 0)),
        )
        bill_id = bill_map.get(bill_key)
    raw_votes = data.get("votes", {})
    vote_counts = _count_votes(raw_votes)
    vote_records = _build_vote_records(raw_votes, legislator_map)
@@ -646,21 +493,14 @@ def _parse_vote(
    return Vote(
        congress=congress,
        chamber=chamber_normalized,
-        session_year=session_year,
+        session=session_number,
-        session_number=session_number,
+        number=number,
        roll_number=number,
        vote_type=data.get("type"),
        question=data.get("question"),
        result=data.get("result"),
        result_text=data.get("result_text"),
-        vote_date=parsed_vote_date,
+        vote_date=vote_date[:10] if isinstance(vote_date, str) else vote_date,
-        vote_datetime=parsed_vote_datetime(data),
+        bill_id=bill_id,
        raw_vote_source_url=parse_vote_source_url(data),
        raw_bill_ref=coerce_raw_ref(data.get("bill")),
        raw_amendment_ref=coerce_raw_ref(data.get("amendment")),
        raw_nomination_ref=coerce_raw_ref(data.get("nomination")),
        raw_treaty_ref=coerce_raw_ref(data.get("treaty")),
        raw_vote_source_artifact_id=raw_vote_source_artifact_id,
        vote_records=vote_records,
        **vote_counts,
    )
@@ -714,36 +554,16 @@ def _build_vote_records(
    return records
 def normalize_vote_chamber(raw_chamber: str | None) -> str | None:
    """Normalize vote JSON chamber codes."""
    if raw_chamber is None:
        return None
    value = raw_chamber.strip().lower()
    return {"h": "House", "house": "House", "s": "Senate", "senate": "Senate"}.get(
        value,
        raw_chamber,
    )
 # ---------------------------------------------------------------------------
 # Bill Text
 # ---------------------------------------------------------------------------
-def ingest_bill_text(
+def ingest_bill_text(engine: Engine, congress_dirs: list[Path]) -> None:
-    engine: Engine,
+    """Load bill text from text-versions directories."""
    congress_dirs: list[Path],
    ingest_run_id: int | None,
 ) -> None:
    """Load bill text from text-versions directories with official metadata when available."""
    bill_map = _build_bill_map_for_engine(engine)
    with Session(engine) as session:
        bill_map = _build_bill_map(session)
        logger.info("Loaded %d bills into lookup map", len(bill_map))
        billstatus_text_index = build_billstatus_text_version_index(congress_dirs)
        logger.info(
            "Loaded bill status text metadata for %d bills",
            len(billstatus_text_index),
        )
        existing_bill_texts = {
            (bill_text.bill_id, bill_text.version_code)
            for bill_text in session.scalars(select(BillText)).all()
@@ -757,12 +577,7 @@ def ingest_bill_text(
        for congress_dir in congress_dirs:
            logger.info("Scanning bill texts from %s", congress_dir.name)
            for bill_text in _iter_bill_texts(
-                session,
+                congress_dir, bill_map, existing_bill_texts
                congress_dir,
                bill_map,
                existing_bill_texts,
                billstatus_text_index,
                ingest_run_id,
            ):
                batch.append(bill_text)
                if len(batch) >= BATCH_SIZE:
@@ -773,19 +588,15 @@ def ingest_bill_text(
 def _iter_bill_texts(
    session: Session,
    congress_dir: Path,
    bill_map: dict[tuple[int, str, int], int],
    existing_bill_texts: set[tuple[int, str]],
    billstatus_text_index: dict[tuple[int, str, int], dict[str, object]],
    ingest_run_id: int | None,
 ) -> Iterator[BillText]:
    """Yield BillText objects for a single congress directory, skipping existing."""
    bills_dir = congress_dir / "bills"
    if not bills_dir.is_dir():
        return
    tasks: list[dict[str, object]] = []
    for bill_dir in bills_dir.rglob("text-versions"):
        if not bill_dir.is_dir():
            continue
@@ -801,52 +612,14 @@ def _iter_bill_texts(
                continue
            if (bill_id, version_dir.name) in existing_bill_texts:
                continue
-            tasks.append(
+            text_content = _read_bill_text(version_dir)
-                {
+            version_data = _read_json(version_dir / "data.json")
                    "bill_id": bill_id,
                    "bill_key": bill_key,
                    "version_dir": version_dir,
                    "billstatus_version": billstatus_text_index.get(bill_key, {}).get(
                        version_dir.name.lower()
                    ),
                }
            )
    for chunk in _chunked(tasks, PARALLEL_FILE_CHUNK_SIZE):
        results = parallelize_thread(
            _prepare_bill_text_input,
            list(chunk),
            progress_tracker=PARALLEL_PROGRESS_TRACKER,
        )
        for prepared in results.results:
            source_artifact_id = None
            if prepared.source_file is not None:
                artifact = register_source_artifact(
                    session,
                    path=prepared.source_file,
                    source_kind="bill_text_artifact",
                    congress=prepared.bill_key[0],
                    chamber=None,
                    ingest_run_id=ingest_run_id,
                    source_url=None,
                )
                source_artifact_id = artifact.id
            metadata = _merge_bill_text_metadata(
                version_code=prepared.version_code,
                version_data=prepared.version_data,
                billstatus_version=prepared.billstatus_version,
            )
            yield BillText(
-                bill_id=prepared.bill_id,
+                bill_id=bill_id,
-                version_code=prepared.version_code,
+                version_code=version_dir.name,
-                version_name=metadata["version_name"],
+                version_name=version_data.get("version_name") if version_data else None,
-                date=metadata["date"],
+                date=version_data.get("issued_on") if version_data else None,
-                text_content=prepared.text_content,
+                text_content=text_content,
                source_datetime_raw=metadata["source_datetime_raw"],
                text_url_xml=metadata["text_url_xml"],
                text_url_pdf=metadata["text_url_pdf"],
                text_url_html=metadata["text_url_html"],
                source_artifact_id=source_artifact_id,
            )
@@ -878,192 +651,6 @@ def _read_bill_text(version_dir: Path) -> str | None:
    return None
 def _find_text_source_file(version_dir: Path) -> Path | None:
    """Locate one representative local file for the bill text artifact manifest."""
    for extension in ("txt", "htm", "html", "xml"):
        candidates = list(version_dir.glob(f"document.{extension}"))
        if not candidates:
            candidates = list(version_dir.glob(f"*.{extension}"))
        if candidates:
            return candidates[0]
    return None
 def _merge_bill_text_metadata(
    *,
    version_code: str,
    version_data: dict | None,
    billstatus_version: object | None,
 ) -> dict[str, object | None]:
    """Merge unitedstates/congress text-version metadata with official billstatus metadata."""
    version_name = None
    issued_on = None
    source_datetime_raw = None
    text_url_xml = None
    text_url_pdf = None
    text_url_html = None
    if version_data:
        version_name = version_data.get("version_name")
        issued_on = parse_date_like(version_data.get("issued_on"))
        urls = version_data.get("urls")
        if isinstance(urls, dict):
            text_url_xml = urls.get("xml") or urls.get("formatted_xml")
            text_url_pdf = urls.get("pdf")
            text_url_html = urls.get("html") or urls.get("formatted_text")
    if billstatus_version is not None:
        version_name = getattr(billstatus_version, "version_name", None) or version_name
        issued_on = getattr(billstatus_version, "version_date", None) or issued_on
        source_datetime_raw = (
            getattr(billstatus_version, "source_datetime_raw", None) or source_datetime_raw
        )
        text_url_xml = getattr(billstatus_version, "text_url_xml", None) or text_url_xml
        text_url_pdf = getattr(billstatus_version, "text_url_pdf", None) or text_url_pdf
        text_url_html = getattr(billstatus_version, "text_url_html", None) or text_url_html
    return {
        "version_code": version_code,
        "version_name": version_name,
        "date": issued_on,
        "source_datetime_raw": source_datetime_raw,
        "text_url_xml": text_url_xml,
        "text_url_pdf": text_url_pdf,
        "text_url_html": text_url_html,
    }
 def _coerce_iso_date(value: str | date) -> date:
    """Normalize YYYY-MM-DD strings from the source data into date objects."""
    if isinstance(value, date):
        return value
    return date.fromisoformat(value[:10])
 def ingest_bill_status_context_step(
    engine: Engine,
    congress_dirs: list[Path],
    ingest_run_id: int | None,
 ) -> None:
    """Rebuild official bill/amendment context from offline artifacts."""
    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
    if not supported_congress_dirs:
        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
        return
    require_billstatus_artifacts(supported_congress_dirs)
    bill_map = _build_bill_map_for_engine(engine)
    with Session(engine) as session:
        rebuild_bill_status_context(
            session,
            congress_dirs=supported_congress_dirs,
            bill_map=bill_map,
            ingest_run_id=ingest_run_id,
        )
 def vote_context_match_step(engine: Engine, congress_dirs: list[Path]) -> None:
    """Persist canonical vote->action matches from recorded vote tuples."""
    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
    if not supported_congress_dirs:
        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
        return
    require_billstatus_artifacts(supported_congress_dirs)
    congress_numbers = [int(path.name) for path in supported_congress_dirs]
    with Session(engine) as session:
        build_vote_action_matches(session, congress_numbers=congress_numbers)
 def vote_context_classify_step(engine: Engine, congress_dirs: list[Path]) -> None:
    """Classify votes and measure relationships after action matching."""
    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
    if not supported_congress_dirs:
        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
        return
    require_billstatus_artifacts(supported_congress_dirs)
    congress_numbers = [int(path.name) for path in supported_congress_dirs]
    bill_map = _build_bill_map_for_engine(engine)
    with Session(engine) as session:
        classify_votes(
            session,
            congress_numbers=congress_numbers,
            bill_map=bill_map,
        )
 def vote_text_resolve_step(engine: Engine, congress_dirs: list[Path]) -> None:
    """Resolve official text targets for direct legislative text votes."""
    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
    if not supported_congress_dirs:
        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
        return
    require_billstatus_artifacts(supported_congress_dirs)
    congress_numbers = [int(path.name) for path in supported_congress_dirs]
    with Session(engine) as session:
        resolve_vote_text_targets(session, congress_numbers=congress_numbers)
 def vote_polarity_resolve_step(engine: Engine, congress_dirs: list[Path]) -> None:
    """Resolve position polarity/effect metadata for all classified votes."""
    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
    if not supported_congress_dirs:
        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
        return
    require_billstatus_artifacts(supported_congress_dirs)
    congress_numbers = [int(path.name) for path in supported_congress_dirs]
    with Session(engine) as session:
        resolve_vote_position_meanings(session, congress_numbers=congress_numbers)
 def vote_context_diagnostics_step(engine: Engine, congress_dirs: list[Path]) -> None:
    """Log aggregate vote-context coverage and unresolved audit rows."""
    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
    if not supported_congress_dirs:
        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
        return
    congress_numbers = [int(path.name) for path in supported_congress_dirs]
    with Session(engine) as session:
        classification_counts = session.execute(
            select(
                VoteClassification.subject_type,
                VoteClassification.vote_relationship,
                func.count(VoteClassification.vote_id),
            )
            .join(Vote, Vote.id == VoteClassification.vote_id)
            .where(Vote.congress.in_(congress_numbers))
            .group_by(
                VoteClassification.subject_type,
                VoteClassification.vote_relationship,
            )
            .order_by(
                VoteClassification.subject_type,
                VoteClassification.vote_relationship,
            )
        ).all()
        match_counts = session.execute(
            select(VoteActionMatch.match_method, func.count(VoteActionMatch.id))
            .join(Vote, Vote.id == VoteActionMatch.vote_id)
            .where(Vote.congress.in_(congress_numbers), VoteActionMatch.is_selected.is_(True))
            .group_by(VoteActionMatch.match_method)
            .order_by(VoteActionMatch.match_method)
        ).all()
        unresolved_audits = session.scalar(
            select(func.count(VoteContextAudit.id))
            .join(Vote, Vote.id == VoteContextAudit.vote_id)
            .where(Vote.congress.in_(congress_numbers), VoteContextAudit.severity.in_(("warning", "error")))
        )
    for subject_type, vote_relationship, count in classification_counts:
        logger.info(
            "vote-context subject=%s relationship=%s count=%d",
            subject_type,
            vote_relationship,
            count,
        )
    for match_method, count in match_counts:
        logger.info("vote-context selected_match method=%s count=%d", match_method, count)
    logger.info("vote-context unresolved audit rows=%d", unresolved_audits or 0)
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -1,574 +0,0 @@
 """Calculate legislator topic scores from bill topics and roll-call votes."""
 from __future__ import annotations
 import logging
 from dataclasses import dataclass
 from typing import Annotated, Sequence
 import typer
 from sqlalchemy import (
    ColumnElement,
    Integer,
    Select,
    and_,
    case,
    cast,
    delete,
    extract,
    func,
    or_,
    select,
    tuple_,
 )
 from sqlalchemy.orm import Session
 from pipelines.jobs.congress_vote_context import create_score_run, finalize_score_run
 from pipelines.orm.common import get_postgres_engine
 from pipelines.orm.data_science_dev.congress import (
    BillTopic,
    BillTopicPosition,
    LegislatorScore,
    SubjectType,
    Vote,
    VoteClassification,
    VoteEffect,
    VoteMeasureLink,
    VoteMeasureRole,
    VotePositionMeaning,
    VoteRelationship,
    VoteRecord,
 )
 from pipelines.jobs.extract_bill_topics import normalize_topic_label
 from pipelines.web.scoring import (
    OPPOSE_POSITIONS,
    SUPPORT_POSITIONS,
    normalized_position_expression,
 )
 logger = logging.getLogger(__name__)
 DELETE_BATCH_SIZE = 5_000
@dataclass(frozen=True)
 class ScoreDiagnostics:
    """Counts for the input stages required to calculate legislator scores."""
    bill_topic_rows: int
    linked_vote_rows: int
    vote_record_rows: int
    topic_vote_links: int
    scorable_vote_records: int
@dataclass(frozen=True)
 class LegislatorScoreInput:
    """One aggregated score ready to store in legislator_score."""
    legislator_id: int
    year: int
    topic: str
    score: float
    supportive: int
    opposed: int
 def create_legislator_score_query(
    *,
    congress: int | None = None,
    bill_ids: Sequence[int] | None = None,
    topics: Sequence[str] | None = None,
 ) -> Select:
    """Build the aggregate score query from extracted bill topics and vote records."""
    normalized_vote = normalized_position_expression(VoteRecord.position)
    supportive_vote = _supportive_vote_expression(normalized_vote)
    opposed_vote = _opposed_vote_expression(normalized_vote)
    supportive_count = func.sum(supportive_vote)
    opposed_count = func.sum(opposed_vote)
    total_count = supportive_count + opposed_count
    vote_year = cast(extract("year", Vote.vote_date), Integer)
    score = (100.0 * supportive_count / func.nullif(total_count, 0)).label("score")
    stmt = (
        select(
            VoteRecord.legislator_id.label("legislator_id"),
            vote_year.label("year"),
            BillTopic.topic.label("topic"),
            score,
            supportive_count.label("supportive"),
            opposed_count.label("opposed"),
            total_count.label("total"),
        )
        .select_from(VoteRecord)
        .join(Vote, Vote.id == VoteRecord.vote_id)
        .join(VoteClassification, VoteClassification.vote_id == Vote.id)
        .join(VotePositionMeaning, VotePositionMeaning.vote_id == Vote.id)
        .join(
            VoteMeasureLink,
            and_(
                VoteMeasureLink.vote_id == Vote.id,
                VoteMeasureLink.role == VoteMeasureRole.VOTED_ON,
            ),
        )
        .join(BillTopic, BillTopic.bill_id == VoteMeasureLink.measure_id)
        .where(
            *_eligible_vote_filters(),
            _is_scorable_position(normalized_vote),
        )
        .group_by(VoteRecord.legislator_id, vote_year, BillTopic.topic)
        .having(total_count > 0)
        .order_by(VoteRecord.legislator_id, vote_year, BillTopic.topic)
    )
    if congress is not None:
        stmt = stmt.where(Vote.congress == congress)
    if bill_ids:
        stmt = stmt.where(VoteMeasureLink.measure_id.in_(list(bill_ids)))
    normalized_topics = _normalize_topics(topics)
    if normalized_topics:
        stmt = stmt.where(BillTopic.topic.in_(normalized_topics))
    return stmt
 def collect_legislator_scores(
    session: Session,
    *,
    congress: int | None = None,
    bill_ids: Sequence[int] | None = None,
    topics: Sequence[str] | None = None,
 ) -> list[LegislatorScoreInput]:
    """Run the aggregate query and return score rows."""
    rows = session.execute(
        create_legislator_score_query(
            congress=congress,
            bill_ids=bill_ids,
            topics=topics,
        )
    )
    return [
        LegislatorScoreInput(
            legislator_id=int(row.legislator_id),
            year=int(row.year),
            topic=str(row.topic),
            score=float(row.score),
            supportive=int(row.supportive),
            opposed=int(row.opposed),
        )
        for row in rows
        if row.score is not None
    ]
 def collect_score_diagnostics(
    session: Session,
    *,
    congress: int | None = None,
    bill_ids: Sequence[int] | None = None,
    topics: Sequence[str] | None = None,
 ) -> ScoreDiagnostics:
    """Count score pipeline inputs for explaining empty score runs."""
    normalized_topics = _normalize_topics(topics)
    vote_filters = _vote_scope_filters(congress=congress, bill_ids=bill_ids)
    topic_filters = _topic_scope_filters(bill_ids=bill_ids, topics=normalized_topics)
    normalized_vote = normalized_position_expression(VoteRecord.position)
    eligible_vote_filters = _eligible_vote_filters()
    bill_topic_rows = session.scalar(
        select(func.count(BillTopic.id)).where(*topic_filters)
    )
    linked_vote_rows = session.scalar(
        select(func.count(func.distinct(Vote.id)))
        .select_from(Vote)
        .join(VoteClassification, VoteClassification.vote_id == Vote.id)
        .join(
            VoteMeasureLink,
            and_(
                VoteMeasureLink.vote_id == Vote.id,
                VoteMeasureLink.role == VoteMeasureRole.VOTED_ON,
            ),
        )
        .where(*vote_filters, *eligible_vote_filters)
    )
    vote_record_rows = session.scalar(
        select(func.count())
        .select_from(VoteRecord)
        .join(Vote, Vote.id == VoteRecord.vote_id)
        .join(VoteClassification, VoteClassification.vote_id == Vote.id)
        .where(*vote_filters, *eligible_vote_filters)
    )
    topic_vote_links = session.scalar(
        select(func.count())
        .select_from(VoteRecord)
        .join(Vote, Vote.id == VoteRecord.vote_id)
        .join(VoteClassification, VoteClassification.vote_id == Vote.id)
        .join(VotePositionMeaning, VotePositionMeaning.vote_id == Vote.id)
        .join(
            VoteMeasureLink,
            and_(
                VoteMeasureLink.vote_id == Vote.id,
                VoteMeasureLink.role == VoteMeasureRole.VOTED_ON,
            ),
        )
        .join(BillTopic, BillTopic.bill_id == VoteMeasureLink.measure_id)
        .where(*vote_filters, *topic_filters, *eligible_vote_filters)
    )
    scorable_vote_records = session.scalar(
        select(func.count())
        .select_from(VoteRecord)
        .join(Vote, Vote.id == VoteRecord.vote_id)
        .join(VoteClassification, VoteClassification.vote_id == Vote.id)
        .join(VotePositionMeaning, VotePositionMeaning.vote_id == Vote.id)
        .join(
            VoteMeasureLink,
            and_(
                VoteMeasureLink.vote_id == Vote.id,
                VoteMeasureLink.role == VoteMeasureRole.VOTED_ON,
            ),
        )
        .join(BillTopic, BillTopic.bill_id == VoteMeasureLink.measure_id)
        .where(
            *vote_filters,
            *topic_filters,
            *eligible_vote_filters,
            _is_scorable_position(normalized_vote),
        )
    )
    return ScoreDiagnostics(
        bill_topic_rows=bill_topic_rows or 0,
        linked_vote_rows=linked_vote_rows or 0,
        vote_record_rows=vote_record_rows or 0,
        topic_vote_links=topic_vote_links or 0,
        scorable_vote_records=scorable_vote_records or 0,
    )
 def store_legislator_scores(
    session: Session,
    rows: Sequence[LegislatorScoreInput],
    *,
    score_run_id: int | None,
    replace_all: bool = False,
 ) -> int:
    """Replace matching score rows and insert the newly calculated scores."""
    if replace_all:
        session.execute(delete(LegislatorScore))
    elif rows:
        keys = [
            (row.legislator_id, row.year, row.topic)
            for row in rows
        ]
        for key_batch in _batched(keys, DELETE_BATCH_SIZE):
            session.execute(
                delete(LegislatorScore).where(
                    tuple_(
                        LegislatorScore.legislator_id,
                        LegislatorScore.year,
                        LegislatorScore.topic,
                    ).in_(key_batch)
                )
            )
    session.add_all(
        [
            LegislatorScore(
                legislator_id=row.legislator_id,
                year=row.year,
                topic=row.topic,
                score=row.score,
                score_run_id=score_run_id,
            )
            for row in rows
        ]
    )
    return len(rows)
 def _supportive_vote_expression(
    normalized_vote: ColumnElement[str | None],
 ) -> ColumnElement[int]:
    supports_text = _position_matches_effect(normalized_vote, VoteEffect.SUPPORTS_TEXT)
    opposes_text = _position_matches_effect(normalized_vote, VoteEffect.OPPOSES_TEXT)
    return case(
        (
            and_(
                BillTopic.support_position == BillTopicPosition.FOR,
                supports_text,
            ),
            1,
        ),
        (
            and_(
                BillTopic.support_position == BillTopicPosition.AGAINST,
                opposes_text,
            ),
            1,
        ),
        else_=0,
    )
 def _opposed_vote_expression(
    normalized_vote: ColumnElement[str | None],
 ) -> ColumnElement[int]:
    supports_text = _position_matches_effect(normalized_vote, VoteEffect.SUPPORTS_TEXT)
    opposes_text = _position_matches_effect(normalized_vote, VoteEffect.OPPOSES_TEXT)
    return case(
        (
            and_(
                BillTopic.support_position == BillTopicPosition.FOR,
                opposes_text,
            ),
            1,
        ),
        (
            and_(
                BillTopic.support_position == BillTopicPosition.AGAINST,
                supports_text,
            ),
            1,
        ),
        else_=0,
    )
 def _position_matches_effect(
    normalized_vote: ColumnElement[str | None],
    effect: VoteEffect,
 ) -> ColumnElement[bool]:
    return or_(
        and_(
            normalized_vote.in_(sorted(SUPPORT_POSITIONS)),
            VotePositionMeaning.yea_effect == effect,
        ),
        and_(
            normalized_vote.in_(sorted(OPPOSE_POSITIONS)),
            VotePositionMeaning.nay_effect == effect,
        ),
        and_(
            normalized_vote == "present",
            VotePositionMeaning.present_effect == effect,
        ),
    )
 def _is_scorable_position(normalized_vote: ColumnElement[str | None]) -> ColumnElement[bool]:
    return or_(
        _position_matches_effect(normalized_vote, VoteEffect.SUPPORTS_TEXT),
        _position_matches_effect(normalized_vote, VoteEffect.OPPOSES_TEXT),
    )
 def _normalize_topics(topics: Sequence[str] | None) -> list[str]:
    normalized: list[str] = []
    seen: set[str] = set()
    for topic in topics or []:
        value = normalize_topic_label(topic)
        if value and value not in seen:
            normalized.append(value)
            seen.add(value)
    return normalized
 def _batched[T](items: Sequence[T], batch_size: int) -> list[Sequence[T]]:
    return [
        items[index : index + batch_size]
        for index in range(0, len(items), batch_size)
    ]
 def _vote_scope_filters(
    *,
    congress: int | None,
    bill_ids: Sequence[int] | None,
 ) -> list[ColumnElement[bool]]:
    filters: list[ColumnElement[bool]] = []
    if congress is not None:
        filters.append(Vote.congress == congress)
    if bill_ids:
        filters.append(VoteMeasureLink.measure_id.in_(list(bill_ids)))
    return filters
 def _topic_scope_filters(
    *,
    bill_ids: Sequence[int] | None,
    topics: Sequence[str],
 ) -> list[ColumnElement[bool]]:
    filters: list[ColumnElement[bool]] = []
    if bill_ids:
        filters.append(BillTopic.bill_id.in_(list(bill_ids)))
    if topics:
        filters.append(BillTopic.topic.in_(list(topics)))
    return filters
 def _has_score_scope(
    *,
    congress: int | None,
    bill_ids: Sequence[int] | None,
    topics: Sequence[str] | None,
 ) -> bool:
    return congress is not None or bool(bill_ids) or bool(topics)
 def _eligible_vote_filters() -> list[ColumnElement[bool]]:
    return [
        VoteClassification.subject_type == SubjectType.MEASURE,
        VoteClassification.vote_relationship == VoteRelationship.DIRECT_TEXT_VOTE,
        VoteClassification.is_direct_vote_on_legislative_text.is_(True),
        VoteClassification.is_substantive_policy_vote.is_(True),
        VoteClassification.is_special_rule.is_(False),
    ]
 def main(
    congress: Annotated[
        int | None,
        typer.Option(help="Only score votes from one Congress."),
    ] = None,
    bill_ids: Annotated[
        list[int] | None,
        typer.Option(
            "--bill-id",
            help="Only score votes linked to one internal bill.id. Repeatable.",
        ),
    ] = None,
    topics: Annotated[
        list[str] | None,
        typer.Option("--topic", help="Only score one normalized topic. Repeatable."),
    ] = None,
    replace_all: Annotated[
        bool,
        typer.Option(
            help="Delete every existing legislator score before inserting. "
            "Unfiltered runs do this automatically."
        ),
    ] = False,
    dry_run: Annotated[
        bool,
        typer.Option(help="Calculate scores without writing to the database."),
    ] = False,
    log_level: Annotated[str, typer.Option(help="Log level.")] = "INFO",
    diagnose: Annotated[
        bool,
        typer.Option(help="Log input-stage counts even when rows are calculated."),
    ] = False,
 ) -> None:
    """CLI entrypoint for calculating and storing legislator topic scores."""
    logging.basicConfig(
        level=log_level,
        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
    )
    engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
    with Session(engine) as session:
        rows = collect_legislator_scores(
            session,
            congress=congress,
            bill_ids=bill_ids,
            topics=topics,
        )
        logger.info("Calculated %d legislator topic score rows", len(rows))
        if diagnose or not rows:
            diagnostics = collect_score_diagnostics(
                session,
                congress=congress,
                bill_ids=bill_ids,
                topics=topics,
            )
            _log_diagnostics(diagnostics)
        if dry_run:
            session.rollback()
            return
        score_run = create_score_run(session)
        should_replace_all = replace_all or not _has_score_scope(
            congress=congress,
            bill_ids=bill_ids,
            topics=topics,
        )
        written = store_legislator_scores(
            session,
            rows,
            score_run_id=score_run.id,
            replace_all=should_replace_all,
        )
        included_vote_count = session.scalar(
            select(func.count(func.distinct(Vote.id)))
            .select_from(VoteRecord)
            .join(Vote, Vote.id == VoteRecord.vote_id)
            .join(VoteClassification, VoteClassification.vote_id == Vote.id)
            .join(VotePositionMeaning, VotePositionMeaning.vote_id == Vote.id)
            .join(
                VoteMeasureLink,
                and_(
                    VoteMeasureLink.vote_id == Vote.id,
                    VoteMeasureLink.role == VoteMeasureRole.VOTED_ON,
                ),
            )
            .join(BillTopic, BillTopic.bill_id == VoteMeasureLink.measure_id)
            .where(
                *_vote_scope_filters(congress=congress, bill_ids=bill_ids),
                *_topic_scope_filters(bill_ids=bill_ids, topics=_normalize_topics(topics)),
                *_eligible_vote_filters(),
                _is_scorable_position(normalized_position_expression(VoteRecord.position)),
            )
        ) or 0
        total_scoped_votes = session.scalar(
            select(func.count(func.distinct(Vote.id)))
            .select_from(Vote)
            .join(VoteClassification, VoteClassification.vote_id == Vote.id)
            .join(
                VoteMeasureLink,
                and_(
                    VoteMeasureLink.vote_id == Vote.id,
                    VoteMeasureLink.role == VoteMeasureRole.VOTED_ON,
                ),
            )
            .where(*_vote_scope_filters(congress=congress, bill_ids=bill_ids))
        ) or 0
        finalize_score_run(
            session,
            score_run=score_run,
            included_vote_count=included_vote_count,
            excluded_vote_count=max(total_scoped_votes - included_vote_count, 0),
        )
        session.commit()
        logger.info("Stored %d legislator topic score rows", written)
 def _log_diagnostics(diagnostics: ScoreDiagnostics) -> None:
    logger.info(
        "Score input diagnostics: bill_topic_rows=%d linked_vote_rows=%d "
        "vote_record_rows=%d topic_vote_links=%d scorable_vote_records=%d",
        diagnostics.bill_topic_rows,
        diagnostics.linked_vote_rows,
        diagnostics.vote_record_rows,
        diagnostics.topic_vote_links,
        diagnostics.scorable_vote_records,
    )
    if diagnostics.bill_topic_rows == 0:
        logger.warning(
            "No extracted bill topics matched the score scope. Run "
            "pipelines.tools.extract_bill_topics after bill summarization."
        )
    elif diagnostics.linked_vote_rows == 0:
        logger.warning("No direct substantive text votes matched the score scope.")
    elif diagnostics.vote_record_rows == 0:
        logger.warning("No individual vote records matched the score scope.")
    elif diagnostics.topic_vote_links == 0:
        logger.warning(
            "Bill topics exist, but none are attached to bills that have eligible scored votes."
        )
    elif diagnostics.scorable_vote_records == 0:
        logger.warning(
            "Topic-vote links exist, but no joined vote records had Yea/Aye/Yes/Nay/No positions."
        )
 if __name__ == "__main__":
    typer.run(main)
@@ -1,682 +0,0 @@
 """Extract bill topics from bill text using a configurable topic catalog."""
 from __future__ import annotations
 import json
 import logging
 import re
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Annotated, Any, Sequence
 import httpx
 import typer
 from sqlalchemy import ColumnElement, Select, delete, exists, func, select
 from sqlalchemy.orm import Session, selectinload
 from pipelines.config import OpenAIConfig, get_config_dir, get_openai_config
 from pipelines.orm.common import get_postgres_engine
 from pipelines.orm.data_science_dev.congress import (
    Bill,
    BillText,
    BillTopic,
    BillTopicPosition,
    SubjectType,
    VoteClassification,
    VoteRelationship,
    VoteTextTarget,
 )
 logger = logging.getLogger(__name__)
 OPENAI_PROJECT_ID = "proj_fQBPEXFgnS87Fk6wZwploFwE"
 OPENAI_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions"
 REQUEST_TIMEOUT_SECONDS = 60
 DEFAULT_TOPICS_PATH = get_config_dir() / "congressional_issues_comprehensive.json"
 class TopicExtractionError(RuntimeError):
    """Raised when a topic extraction request or response is invalid."""
@dataclass(frozen=True)
 class TopicCatalog:
    """Loaded topic catalog with categories for prompting and flat candidates."""
    topics_by_category: dict[str, list[str]]
    candidate_topics: list[str]
@dataclass(frozen=True)
 class TopicExtractionDiagnostics:
    """Counts for the bill summary inputs needed by topic extraction."""
    bill_rows: int
    bill_text_rows: int
    summarized_bill_text_rows: int
    bills_with_summaries: int
    bill_topic_rows: int
    selected_bills: int
@dataclass(frozen=True)
 class ExtractedBillTopic:
    """One extracted bill topic and yes-vote stance."""
    topic: str
    support_position: BillTopicPosition
    confidence: float | None = None
    evidence: str | None = None
 def _select_bill_text_for_topic_extraction(bill: Bill) -> BillText | None:
    """Pick one summarized bill_text row from the already-loaded relationship."""
    for bill_text in bill.bill_texts:
        if bill_text.summary and bill_text.summary.strip():
            return bill_text
    return None
 def normalize_topic_label(value: str) -> str:
    """Normalize a topic label for storage, comparison, and de-duping."""
    normalized = value.strip().strip("\"'")
    normalized = normalized.strip().rstrip(".").strip()
    return re.sub(r"\s+", " ", normalized).lower()
 def load_topic_catalog(path: Path | None = None) -> TopicCatalog:
    """Load, validate, normalize, and flatten the bill topic catalog."""
    topics_path = path or DEFAULT_TOPICS_PATH
    try:
        raw = json.loads(topics_path.read_text())
    except FileNotFoundError as exc:
        msg = f"Topic catalog not found: {topics_path}"
        raise TopicExtractionError(msg) from exc
    except json.JSONDecodeError as exc:
        msg = f"Topic catalog is not valid JSON: {topics_path}: {exc}"
        raise TopicExtractionError(msg) from exc
    if not isinstance(raw, dict):
        msg = "Topic catalog root must be an object mapping category names to lists"
        raise TopicExtractionError(msg)
    topics_by_category: dict[str, list[str]] = {}
    candidate_topics: list[str] = []
    seen_topics: set[str] = set()
    for category, topics in raw.items():
        if not isinstance(category, str) or not category.strip():
            msg = "Topic catalog category names must be non-empty strings"
            raise TopicExtractionError(msg)
        if not isinstance(topics, list):
            msg = f"Topic catalog category {category!r} must contain a list"
            raise TopicExtractionError(msg)
        normalized_topics: list[str] = []
        for topic in topics:
            if not isinstance(topic, str):
                msg = f"Topic catalog category {category!r} contains a non-string topic"
                raise TopicExtractionError(msg)
            normalized_topic = normalize_topic_label(topic)
            if not normalized_topic:
                msg = f"Topic catalog category {category!r} contains a blank topic"
                raise TopicExtractionError(msg)
            if normalized_topic in seen_topics:
                continue
            seen_topics.add(normalized_topic)
            normalized_topics.append(normalized_topic)
            candidate_topics.append(normalized_topic)
        topics_by_category[category.strip()] = normalized_topics
    return TopicCatalog(
        topics_by_category=topics_by_category,
        candidate_topics=candidate_topics,
    )
 def build_topic_extraction_messages(
    *,
    bill: Bill,
    bill_text: str,
    candidate_topics: Sequence[str],
 ) -> list[dict[str, str]]:
    """Build GPT messages for extracting a bill's scored topics."""
    normalized_candidates = [normalize_topic_label(topic) for topic in candidate_topics]
    candidate_list = "\n".join(f"- {topic}" for topic in normalized_candidates)
    metadata = "\n".join(
        (
            f"Congress: {bill.congress}",
            f"Bill: {bill.bill_type} {bill.number}",
            f"Title: {bill.title_short or bill.title or bill.official_title or ''}",
            f"Top subject term: {bill.subjects_top_term or ''}",
        )
    )
    system_prompt = (
        "You extract policy topics from U.S. congressional bills.\n"
        'For each selected topic, decide whether a Yes/Yea vote on the bill is "for" or "against" that topic.\n'
        'Use "support_position": "for" when a Yes/Yea vote advances or supports the topic.\n'
        'Use "support_position": "against" when a Yes/Yea vote restricts, repeals, blocks, or opposes the topic.\n'
        "Select only topics from the provided candidate topic list.\n"
        "Omit topics that are not materially addressed by the bill.\n"
        "Return strict JSON only, with this shape:\n"
        '{"topics":[{"topic":"candidate topic","support_position":"for","confidence":0.0,"evidence":"short reason"}]}'
    )
    user_prompt = "\n\n".join(
        (
            "BILL METADATA:",
            metadata,
            "CANDIDATE TOPICS:",
            candidate_list,
            "BILL TEXT:",
            bill_text,
        )
    )
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]
 def call_openai_topic_extraction(
    *,
    openai_config: OpenAIConfig,
    messages: list[dict[str, str]],
 ) -> str:
    """Call GPT and return the assistant message content."""
    response = httpx.post(
        openai_config.openai_chat_completions_url,
        headers={
            "Authorization": f"Bearer {openai_config.api_key}",
            "OpenAI-Project": openai_config.openai_project_id,
            "Content-Type": "application/json",
        },
        json={
            "model": "gpt-5.4-mini",
            "messages": messages,
        },
        timeout=openai_config.timeout_seconds,
    )
    response.raise_for_status()
    return extract_message_content(response.json())
 def extract_message_content(data: dict[str, Any]) -> str:
    """Extract message content from a chat-completions response body."""
    choices = data.get("choices")
    if not isinstance(choices, list) or not choices:
        msg = "Chat completion response did not contain choices"
        raise TopicExtractionError(msg)
    first = choices[0]
    if not isinstance(first, dict):
        msg = "Chat completion choice must be an object"
        raise TopicExtractionError(msg)
    message = first.get("message")
    if isinstance(message, dict) and isinstance(message.get("content"), str):
        return message["content"]
    if isinstance(first.get("text"), str):
        return first["text"]
    msg = "Chat completion response did not contain message content"
    raise TopicExtractionError(msg)
 def parse_topic_extraction_response(response_text: str) -> list[ExtractedBillTopic]:
    """Parse, normalize, validate, and de-dupe a topic extraction response."""
    payload = _load_json_response(response_text)
    topics = payload.get("topics")
    if not isinstance(topics, list):
        msg = "Topic extraction response must contain a topics list"
        raise TopicExtractionError(msg)
    deduped: dict[tuple[str, BillTopicPosition], ExtractedBillTopic] = {}
    for item in topics:
        if not isinstance(item, dict):
            msg = "Topic extraction response topics must be objects"
            raise TopicExtractionError(msg)
        raw_topic = _extract_topic_label(item)
        topic = normalize_topic_label(raw_topic)
        if not topic:
            msg = "Topic extraction response topic must not be blank"
            raise TopicExtractionError(msg)
        raw_position = item.get("support_position")
        try:
            support_position = BillTopicPosition(raw_position)
        except ValueError as exc:
            msg = f"Invalid support_position: {raw_position!r}"
            raise TopicExtractionError(msg) from exc
        confidence = _parse_confidence(item.get("confidence"))
        evidence = item.get("evidence")
        if evidence is not None and not isinstance(evidence, str):
            evidence = str(evidence)
        extracted = ExtractedBillTopic(
            topic=topic,
            support_position=support_position,
            confidence=confidence,
            evidence=evidence,
        )
        key = (topic, support_position)
        existing = deduped.get(key)
        if existing is None or _confidence_rank(extracted) > _confidence_rank(existing):
            deduped[key] = extracted
    return list(deduped.values())
 def extract_topics_for_bill_text(
    *,
    openai_config: OpenAIConfig,
    bill: Bill,
    text: str,
    candidate_topics: Sequence[str],
 ) -> list[ExtractedBillTopic]:
    """Extract accepted catalog topics for a bill text string."""
    normalized_candidates = {normalize_topic_label(topic) for topic in candidate_topics}
    messages = build_topic_extraction_messages(
        bill=bill,
        bill_text=text,
        candidate_topics=sorted(normalized_candidates),
    )
    response_text = call_openai_topic_extraction(
        openai_config=openai_config,
        messages=messages,
    )
    extracted_topics = parse_topic_extraction_response(response_text)
    return [topic for topic in extracted_topics if topic.topic in normalized_candidates]
 def store_bill_topic_result(
    *,
    session: Session,
    bill: Bill,
    topics: Sequence[ExtractedBillTopic],
    replace_existing: bool = True,
 ) -> None:
    """Store extracted topics for one bill."""
    if replace_existing:
        session.execute(delete(BillTopic).where(BillTopic.bill_id == bill.id))
    for topic in topics:
        session.add(
            BillTopic(
                bill_id=bill.id,
                topic=normalize_topic_label(topic.topic),
                support_position=topic.support_position,
            )
        )
 def create_select_bills_for_topic_extraction(
    congress: int | None = None,
    bill_ids: list[int] | None = None,
    bill_text_ids: list[int] | None = None,
    with_votes_only: bool = False,
    force: bool = False,
    limit: int | None = None,
 ) -> Select[tuple[Bill]]:
    """Select bill rows that have summarized bill_text rows for topic extraction."""
    has_summary = (BillText.summary.is_not(None), BillText.summary != "")
    summarized_text_filters: list[ColumnElement[bool]] = [
        BillText.bill_id == Bill.id,
        *has_summary,
    ]
    if with_votes_only:
        summarized_text_filters.append(
            exists(
                select(VoteTextTarget.vote_id)
                .join(
                    VoteClassification,
                    VoteClassification.vote_id == VoteTextTarget.vote_id,
                )
                .where(
                    VoteTextTarget.voted_text_version_id == BillText.id,
                    VoteClassification.subject_type == SubjectType.MEASURE,
                    VoteClassification.vote_relationship
                    == VoteRelationship.DIRECT_TEXT_VOTE,
                    VoteClassification.is_direct_vote_on_legislative_text.is_(True),
                    VoteClassification.is_substantive_policy_vote.is_(True),
                    VoteClassification.is_special_rule.is_(False),
                )
            )
        )
    summarized_text_exists = exists(select(BillText.id).where(*summarized_text_filters))
    stmt = (
        select(Bill)
        .where(summarized_text_exists)
        .options(selectinload(Bill.bill_texts.and_(*summarized_text_filters[1:])))
        .order_by(Bill.id)
    )
    if congress is not None:
        stmt = stmt.where(Bill.congress == congress)
    if bill_ids:
        stmt = stmt.where(Bill.id.in_(bill_ids))
    if bill_text_ids:
        selected_text_exists = exists(
            select(BillText.id).where(
                BillText.bill_id == Bill.id,
                BillText.id.in_(bill_text_ids),
                *summarized_text_filters[1:],
            )
        )
        stmt = stmt.where(selected_text_exists)
    if not force:
        stmt = stmt.where(
            ~exists(select(BillTopic.id).where(BillTopic.bill_id == Bill.id))
        )
    if limit is not None:
        stmt = stmt.limit(limit)
    return stmt
 def collect_topic_extraction_diagnostics(
    session: Session,
    *,
    congress: int | None = None,
    bill_ids: list[int] | None = None,
    bill_text_ids: list[int] | None = None,
    with_votes_only: bool = False,
    force: bool = False,
    limit: int | None = None,
 ) -> TopicExtractionDiagnostics:
    """Count topic extraction inputs for explaining empty selections."""
    bill_filters = []
    bill_text_filters: list[ColumnElement[bool]] = []
    if congress is not None:
        bill_filters.append(Bill.congress == congress)
    if bill_ids:
        bill_filters.append(Bill.id.in_(bill_ids))
        bill_text_filters.append(BillText.bill_id.in_(bill_ids))
    if bill_text_ids:
        bill_text_filters.append(BillText.id.in_(bill_text_ids))
    if with_votes_only:
        bill_text_filters.append(
            exists(
                select(VoteTextTarget.vote_id)
                .join(
                    VoteClassification,
                    VoteClassification.vote_id == VoteTextTarget.vote_id,
                )
                .where(
                    VoteTextTarget.voted_text_version_id == BillText.id,
                    VoteClassification.subject_type == SubjectType.MEASURE,
                    VoteClassification.vote_relationship
                    == VoteRelationship.DIRECT_TEXT_VOTE,
                    VoteClassification.is_direct_vote_on_legislative_text.is_(True),
                    VoteClassification.is_substantive_policy_vote.is_(True),
                    VoteClassification.is_special_rule.is_(False),
                )
            )
        )
    has_summary = (BillText.summary.is_not(None), BillText.summary != "")
    summary_filters = [*bill_text_filters, *has_summary]
    bills_with_summaries = session.scalar(
        select(func.count(func.distinct(Bill.id)))
        .select_from(Bill)
        .join(BillText, BillText.bill_id == Bill.id)
        .where(*bill_filters, *summary_filters)
    )
    selected_bills = session.scalar(
        select(func.count()).select_from(
            create_select_bills_for_topic_extraction(
                congress=congress,
                bill_ids=bill_ids,
                bill_text_ids=bill_text_ids,
                with_votes_only=with_votes_only,
                force=force,
                limit=limit,
            ).subquery()
        )
    )
    return TopicExtractionDiagnostics(
        bill_rows=session.scalar(select(func.count(Bill.id)).where(*bill_filters)) or 0,
        bill_text_rows=_count_bill_texts(
            session,
            bill_filters=bill_filters,
            bill_text_filters=bill_text_filters,
        ),
        summarized_bill_text_rows=_count_bill_texts(
            session,
            bill_filters=bill_filters,
            bill_text_filters=summary_filters,
        ),
        bills_with_summaries=bills_with_summaries or 0,
        bill_topic_rows=session.scalar(select(func.count(BillTopic.id))) or 0,
        selected_bills=selected_bills or 0,
    )
 def _load_json_response(response_text: str) -> dict[str, Any]:
    text = response_text.strip()
    fenced = re.fullmatch(r"```(?:json)?\s*(.*?)\s*```", text, flags=re.DOTALL)
    if fenced:
        text = fenced.group(1).strip()
    try:
        payload = json.loads(text)
    except json.JSONDecodeError as exc:
        msg = f"Topic extraction response is not valid JSON: {exc}"
        raise TopicExtractionError(msg) from exc
    if not isinstance(payload, dict):
        msg = "Topic extraction response must be a JSON object"
        raise TopicExtractionError(msg)
    return payload
 def _parse_confidence(raw: Any) -> float | None:
    if raw is None:
        return None
    try:
        return float(raw)
    except (TypeError, ValueError) as exc:
        msg = f"Invalid confidence: {raw!r}"
        raise TopicExtractionError(msg) from exc
 def _confidence_rank(topic: ExtractedBillTopic) -> tuple[int, float]:
    if topic.confidence is None:
        return (0, 0.0)
    return (1, topic.confidence)
 def _extract_topic_label(item: dict[str, Any]) -> str:
    raw_topic = item.get("topic")
    if isinstance(raw_topic, str):
        return raw_topic
    if isinstance(raw_topic, dict):
        for key in ("topic", "label", "name", "title"):
            value = raw_topic.get(key)
            if isinstance(value, str):
                return value
    msg = "Topic extraction response topic must be a string"
    raise TopicExtractionError(msg)
 def _count_bill_texts(
    session: Session,
    *,
    bill_filters: Sequence[ColumnElement[bool]],
    bill_text_filters: Sequence[ColumnElement[bool]],
 ) -> int:
    stmt = select(func.count(BillText.id))
    if bill_filters:
        stmt = stmt.join(Bill, Bill.id == BillText.bill_id).where(*bill_filters)
    return session.scalar(stmt.where(*bill_text_filters)) or 0
 def main(
    topics_path: Annotated[
        Path, typer.Option(help="Path to congressional issue topic JSON.")
    ] = DEFAULT_TOPICS_PATH,
    congress: Annotated[
        int | None, typer.Option(help="Only process one Congress.")
    ] = None,
    bill_ids: Annotated[
        list[int] | None,
        typer.Option(
            "--bill-id",
            help="Only process one internal bill.id. Repeat for multiple bills.",
        ),
    ] = None,
    bill_text_ids: Annotated[
        list[int] | None,
        typer.Option(
            "--bill-text-id",
            help="Only process one internal bill_text.id. Repeat for multiple rows.",
        ),
    ] = None,
    with_votes_only: Annotated[
        bool,
        typer.Option(
            "--with-votes-only",
            help="Only process summarized bill_text rows linked to at least one vote.",
        ),
    ] = True,
    limit: Annotated[int | None, typer.Option(help="Maximum rows to process.")] = None,
    force: Annotated[
        bool,
        typer.Option(help="Regenerate topics for bills that already have topics."),
    ] = False,
    dry_run: Annotated[
        bool,
        typer.Option(help="Select bills and print diagnostics without calling OpenAI."),
    ] = False,
    diagnose: Annotated[
        bool,
        typer.Option(help="Log input-stage counts before processing."),
    ] = False,
    log_level: Annotated[str, typer.Option(help="Log level.")] = "INFO",
 ) -> None:
    """CLI entrypoint for generating and storing bill topics."""
    logging.basicConfig(
        level=log_level,
        format="%(asctime)s %(levelname)s %(name)s: %(message)s",
    )
    topic_catalog = load_topic_catalog(topics_path)
    logger.info(
        "Loaded %d candidate topics from %s",
        len(topic_catalog.candidate_topics),
        topics_path,
    )
    engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
    with Session(engine) as session:
        if diagnose or dry_run:
            diagnostics = collect_topic_extraction_diagnostics(
                session,
                congress=congress,
                bill_ids=bill_ids,
                bill_text_ids=bill_text_ids,
                with_votes_only=with_votes_only,
                force=force,
                limit=limit,
            )
            _log_topic_extraction_diagnostics(diagnostics)
            if dry_run:
                return
        openai_config = get_openai_config()
        stmt = create_select_bills_for_topic_extraction(
            congress=congress,
            bill_ids=bill_ids,
            bill_text_ids=bill_text_ids,
            with_votes_only=with_votes_only,
            force=force,
            limit=limit,
        )
        bills = session.scalars(stmt).all()
        logger.info("Selected %d bills for topic extraction", len(bills))
        written = 0
        failed = 0
        for index, bill in enumerate(bills, 1):
            bill_text = _select_bill_text_for_topic_extraction(bill)
            if bill_text is None:
                logger.warning("Skipping bill id=%s: no usable summary", bill.id)
                continue
            summary = bill_text.summary.strip()
            try:
                extracted_topics = extract_topics_for_bill_text(
                    openai_config=openai_config,
                    bill=bill,
                    text=summary,
                    candidate_topics=topic_catalog.candidate_topics,
                )
            except (httpx.HTTPError, TopicExtractionError):
                failed += 1
                logger.exception(
                    "Skipping bill id=%s after topic extraction failure", bill.id
                )
                continue
            store_bill_topic_result(
                session=session,
                bill=bill,
                topics=extracted_topics,
                replace_existing=True,
            )
            written += 1
            if index % 100 == 0:
                session.commit()
            logger.info(
                "Stored %d topics for bill id=%s",
                len(extracted_topics),
                bill.id,
            )
        session.commit()
        logger.info(
            "Done: stored topic results for %d bills; failed %d bills",
            written,
            failed,
        )
 def _log_topic_extraction_diagnostics(
    diagnostics: TopicExtractionDiagnostics,
 ) -> None:
    logger.info(
        "Topic extraction diagnostics: bill_rows=%d bill_text_rows=%d "
        "summarized_bill_text_rows=%d bills_with_summaries=%d "
        "bill_topic_rows=%d selected_bills=%d",
        diagnostics.bill_rows,
        diagnostics.bill_text_rows,
        diagnostics.summarized_bill_text_rows,
        diagnostics.bills_with_summaries,
        diagnostics.bill_topic_rows,
        diagnostics.selected_bills,
    )
    if diagnostics.bill_rows == 0:
        logger.warning("No bills matched the topic extraction scope.")
    elif diagnostics.bill_text_rows == 0:
        logger.warning("No bill_text rows matched the topic extraction scope.")
    elif diagnostics.summarized_bill_text_rows == 0:
        logger.warning(
            "No summarized bill_text rows matched the topic extraction scope. "
            "Run pipelines.tools.summarize_bills first."
        )
    elif diagnostics.selected_bills == 0 and diagnostics.bill_topic_rows > 0:
        logger.warning(
            "No bills selected because matching bills already have topics. "
            "Use --force to regenerate them."
        )
    elif diagnostics.selected_bills == 0:
        logger.warning("No bills selected for topic extraction.")
 if __name__ == "__main__":
    typer.run(main)
@@ -2,85 +2,25 @@
 from pipelines.orm.data_science_dev.congress.bill import (
    Bill,
    BillAction,
    BillActionRecordedVote,
    BillRelation,
    BillText,
    BillTopic,
    BillTopicPosition,
 )
 from pipelines.orm.data_science_dev.congress.amendment import (
    Amendment,
    AmendmentAction,
    AmendmentActionRecordedVote,
 )
 from pipelines.orm.data_science_dev.congress.context import (
    ClassificationMethod,
    ConfidenceLevel,
    IngestRun,
    MeasureFunction,
    MeasureSubtype,
    ScoreRun,
    SourceArtifact,
    SubjectType,
    TextResolutionMethod,
    TextTargetBasis,
    TextTargetType,
    VoteActionMatch,
    VoteActionScope,
    VoteClassification,
    VoteContextAudit,
    VoteEffect,
    VoteMeasureLink,
    VoteMeasureRole,
    VotePositionMeaning,
    VoteRelationship,
    VoteTextTarget,
 )
 from pipelines.orm.data_science_dev.congress.legislator import (
    Legislator,
    LegislatorScore,
    LegislatorSocialMedia,
    LegislatorScoreFake,
 )
 from pipelines.orm.data_science_dev.congress.vote import Vote, VoteRecord
 __all__ = [
    "Amendment",
    "AmendmentAction",
    "AmendmentActionRecordedVote",
    "Bill",
    "BillAction",
    "BillActionRecordedVote",
    "BillRelation",
    "BillText",
    "BillTopic",
    "BillTopicPosition",
    "ClassificationMethod",
    "ConfidenceLevel",
    "IngestRun",
    "Legislator",
    "LegislatorScore",
    "LegislatorScoreFake",
    "LegislatorSocialMedia",
    "MeasureFunction",
    "MeasureSubtype",
    "ScoreRun",
    "SourceArtifact",
    "SubjectType",
    "TextResolutionMethod",
    "TextTargetBasis",
    "TextTargetType",
    "Vote",
    "VoteActionMatch",
    "VoteActionScope",
    "VoteClassification",
    "VoteContextAudit",
    "VoteEffect",
    "VoteMeasureLink",
    "VoteMeasureRole",
    "VotePositionMeaning",
    "VoteRelationship",
    "VoteRecord",
    "VoteTextTarget",
 ]
@@ -1,127 +0,0 @@
 """Amendment models and official action context."""
 from __future__ import annotations
 from datetime import date, datetime
 from sqlalchemy import DateTime, ForeignKey, Index, UniqueConstraint
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
 class Amendment(DataScienceDevTableBase):
    """Congressional amendment linked to a bill or to another amendment."""
    __tablename__ = "amendment"
    __table_args__ = (
        UniqueConstraint(
            "congress",
            "amendment_type",
            "number",
            name="uq_amendment_congress_type_number",
        ),
    )
    congress: Mapped[int]
    amendment_type: Mapped[str]
    number: Mapped[int]
    chamber: Mapped[str]
    description: Mapped[str | None]
    purpose: Mapped[str | None]
    amended_bill_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.bill.id", ondelete="SET NULL")
    )
    amended_amendment_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.amendment.id", ondelete="SET NULL")
    )
    source_path: Mapped[str | None]
    source_artifact_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.source_artifact.id", ondelete="SET NULL")
    )
    actions: Mapped[list[AmendmentAction]] = relationship(
        "AmendmentAction",
        back_populates="amendment",
        cascade="all, delete-orphan",
    )
    amended_amendment: Mapped[Amendment | None] = relationship(
        "Amendment",
        remote_side="Amendment.id",
    )
 class AmendmentAction(DataScienceDevTableBase):
    """Official action row for an amendment."""
    __tablename__ = "amendment_action"
    __table_args__ = (
        UniqueConstraint(
            "amendment_id",
            "sequence",
            name="uq_amendment_action_amendment_id_sequence",
        ),
    )
    amendment_id: Mapped[int] = mapped_column(
        ForeignKey("main.amendment.id", ondelete="CASCADE")
    )
    sequence: Mapped[int]
    action_date: Mapped[date]
    action_time: Mapped[str | None]
    action_text: Mapped[str]
    action_type: Mapped[str | None]
    action_code: Mapped[str | None]
    source_system_code: Mapped[str | None]
    source_system_name: Mapped[str | None]
    source_artifact_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.source_artifact.id", ondelete="SET NULL")
    )
    amendment: Mapped[Amendment] = relationship(
        "Amendment",
        back_populates="actions",
    )
    recorded_votes: Mapped[list[AmendmentActionRecordedVote]] = relationship(
        "AmendmentActionRecordedVote",
        back_populates="amendment_action",
        cascade="all, delete-orphan",
    )
 class AmendmentActionRecordedVote(DataScienceDevTableBase):
    """Recorded vote nested under one official amendment action."""
    __tablename__ = "amendment_action_recorded_vote"
    __table_args__ = (
        UniqueConstraint(
            "amendment_action_id",
            "congress",
            "chamber",
            "session_number",
            "roll_number",
            name="uq_amendment_action_recorded_vote_match_key",
        ),
        Index(
            "ix_amendment_action_recorded_vote_match_tuple",
            "congress",
            "chamber",
            "session_number",
            "roll_number",
        ),
    )
    amendment_action_id: Mapped[int] = mapped_column(
        ForeignKey("main.amendment_action.id", ondelete="CASCADE")
    )
    congress: Mapped[int]
    chamber: Mapped[str]
    session_number: Mapped[int]
    roll_number: Mapped[int]
    vote_datetime: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    vote_url: Mapped[str | None]
    amendment_action: Mapped[AmendmentAction] = relationship(
        "AmendmentAction",
        back_populates="recorded_votes",
    )
@@ -1,4 +1,4 @@
-"""Bill models for legislation, official actions, text versions, and topic tags."""
+"""Bill model - legislation introduced in Congress."""
 from __future__ import annotations
@@ -12,7 +12,7 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
 from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
 if TYPE_CHECKING:
-    from pipelines.orm.data_science_dev.congress.context import VoteMeasureLink
+    from pipelines.orm.data_science_dev.congress.vote import Vote
 class BillTopicPosition(StrEnum):
@@ -22,17 +22,6 @@ class BillTopicPosition(StrEnum):
    AGAINST = "against"
 def _enum_column(enum_cls: type[StrEnum], *, name: str) -> Enum:
    """Build a portable SQLAlchemy enum column for StrEnum values."""
    return Enum(
        enum_cls,
        values_callable=lambda enum_type: [member.value for member in enum_type],
        native_enum=False,
        name=name,
    )
 class Bill(DataScienceDevTableBase):
    """Legislation with congress number, type, titles, status, and sponsor."""
@@ -60,6 +49,10 @@ class Bill(DataScienceDevTableBase):
    subjects_top_term: Mapped[str | None]
    score_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    votes: Mapped[list[Vote]] = relationship(
        "Vote",
        back_populates="bill",
    )
    bill_texts: Mapped[list[BillText]] = relationship(
        "BillText",
        back_populates="bill",
@@ -70,28 +63,6 @@ class Bill(DataScienceDevTableBase):
        back_populates="bill",
        cascade="all, delete-orphan",
    )
    bill_actions: Mapped[list[BillAction]] = relationship(
        "BillAction",
        back_populates="bill",
        cascade="all, delete-orphan",
    )
    outgoing_bill_relations: Mapped[list[BillRelation]] = relationship(
        "BillRelation",
        foreign_keys="BillRelation.bill_id",
        back_populates="bill",
        cascade="all, delete-orphan",
    )
    incoming_bill_relations: Mapped[list[BillRelation]] = relationship(
        "BillRelation",
        foreign_keys="BillRelation.related_bill_id",
        back_populates="related_bill",
        cascade="all, delete-orphan",
    )
    vote_measure_links: Mapped[list[VoteMeasureLink]] = relationship(
        "VoteMeasureLink",
        back_populates="measure",
        cascade="all, delete-orphan",
    )
 class BillText(DataScienceDevTableBase):
@@ -113,113 +84,10 @@ class BillText(DataScienceDevTableBase):
    summarization_user_prompt_version: Mapped[str | None]
    summarization_system_prompt_version: Mapped[str | None]
    date: Mapped[date | None]
    source_datetime_raw: Mapped[str | None]
    text_url_xml: Mapped[str | None]
    text_url_pdf: Mapped[str | None]
    text_url_html: Mapped[str | None]
    source_artifact_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.source_artifact.id", ondelete="SET NULL")
    )
    bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
-
+# suport multipu summary prer bill
 class BillAction(DataScienceDevTableBase):
    """Official action row from Bill Status XML."""
    __tablename__ = "bill_action"
    __table_args__ = (
        UniqueConstraint("bill_id", "sequence", name="uq_bill_action_bill_id_sequence"),
    )
    bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
    sequence: Mapped[int]
    action_date: Mapped[date]
    action_time: Mapped[str | None]
    action_text: Mapped[str]
    action_type: Mapped[str | None]
    action_code: Mapped[str | None]
    source_system_code: Mapped[str | None]
    source_system_name: Mapped[str | None]
    source_artifact_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.source_artifact.id", ondelete="SET NULL")
    )
    bill: Mapped[Bill] = relationship("Bill", back_populates="bill_actions")
    recorded_votes: Mapped[list[BillActionRecordedVote]] = relationship(
        "BillActionRecordedVote",
        back_populates="bill_action",
        cascade="all, delete-orphan",
    )
 class BillActionRecordedVote(DataScienceDevTableBase):
    """Recorded vote nested under one official bill action."""
    __tablename__ = "bill_action_recorded_vote"
    __table_args__ = (
        UniqueConstraint(
            "bill_action_id",
            "congress",
            "chamber",
            "session_number",
            "roll_number",
            name="uq_bill_action_recorded_vote_match_key",
        ),
        Index(
            "ix_bill_action_recorded_vote_match_tuple",
            "congress",
            "chamber",
            "session_number",
            "roll_number",
        ),
    )
    bill_action_id: Mapped[int] = mapped_column(
        ForeignKey("main.bill_action.id", ondelete="CASCADE")
    )
    congress: Mapped[int]
    chamber: Mapped[str]
    session_number: Mapped[int]
    roll_number: Mapped[int]
    vote_datetime: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    vote_url: Mapped[str | None]
    bill_action: Mapped[BillAction] = relationship(
        "BillAction",
        back_populates="recorded_votes",
    )
 class BillRelation(DataScienceDevTableBase):
    """Relationship between one bill/resolution and another."""
    __tablename__ = "bill_relation"
    __table_args__ = (
        Index("ix_bill_relation_bill_id", "bill_id"),
        Index("ix_bill_relation_related_bill_id", "related_bill_id"),
    )
    bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
    related_bill_id: Mapped[int] = mapped_column(
        ForeignKey("main.bill.id", ondelete="CASCADE")
    )
    relationship_type: Mapped[str]
    identified_by: Mapped[str | None]
    latest_action_date: Mapped[date | None]
    latest_action_text: Mapped[str | None]
    bill: Mapped[Bill] = relationship(
        "Bill",
        foreign_keys=[bill_id],
        back_populates="outgoing_bill_relations",
    )
    related_bill: Mapped[Bill] = relationship(
        "Bill",
        foreign_keys=[related_bill_id],
        back_populates="incoming_bill_relations",
    )
 class BillTopic(DataScienceDevTableBase):
    """One bill stance on one topic used to score roll-call votes."""
@@ -238,7 +106,12 @@ class BillTopic(DataScienceDevTableBase):
    bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
    topic: Mapped[str]
    support_position: Mapped[BillTopicPosition] = mapped_column(
-        _enum_column(BillTopicPosition, name="bill_topic_position")
+        Enum(
            BillTopicPosition,
            values_callable=lambda enum_cls: [member.value for member in enum_cls],
            native_enum=False,
            name="bill_topic_position",
        )
    )
    bill: Mapped[Bill] = relationship("Bill", back_populates="topics")
@@ -1,462 +0,0 @@
 """Canonical vote context, artifact tracking, and run metadata models."""
 from __future__ import annotations
 from datetime import datetime
 from enum import StrEnum
 from typing import TYPE_CHECKING
 from sqlalchemy import DateTime, Enum, ForeignKey, Index, func, text
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
 if TYPE_CHECKING:
    from pipelines.orm.data_science_dev.congress.amendment import Amendment, AmendmentAction
    from pipelines.orm.data_science_dev.congress.bill import Bill, BillAction, BillText
    from pipelines.orm.data_science_dev.congress.legislator import LegislatorScore
    from pipelines.orm.data_science_dev.congress.vote import Vote
 def _enum_column(enum_cls: type[StrEnum], *, name: str) -> Enum:
    """Build a portable SQLAlchemy enum column for StrEnum values."""
    return Enum(
        enum_cls,
        values_callable=lambda enum_type: [member.value for member in enum_type],
        native_enum=False,
        name=name,
    )
 class ConfidenceLevel(StrEnum):
    """Low/medium/high confidence buckets."""
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
 class VoteActionScope(StrEnum):
    """Whether a matched action came from bill or amendment context."""
    BILL = "bill"
    AMENDMENT = "amendment"
 class SubjectType(StrEnum):
    """The direct legal/procedural subject of the vote."""
    MEASURE = "measure"
    AMENDMENT = "amendment"
    NOMINATION = "nomination"
    TREATY = "treaty"
    QUORUM = "quorum"
    CHAMBER_ADMIN = "chamber_admin"
    UNKNOWN = "unknown"
 class MeasureSubtype(StrEnum):
    """Formal congressional measure subtype."""
    BILL = "bill"
    JOINT_RESOLUTION = "joint_resolution"
    CONCURRENT_RESOLUTION = "concurrent_resolution"
    SIMPLE_RESOLUTION = "simple_resolution"
 class MeasureFunction(StrEnum):
    """Semantic function of a measure beyond its formal subtype."""
    SUBSTANTIVE_MEASURE = "substantive_measure"
    SPECIAL_RULE = "special_rule"
    BUDGET_RESOLUTION = "budget_resolution"
    CHAMBER_INTERNAL = "chamber_internal"
    COMMEMORATIVE_OR_SENSE_OF = "commemorative_or_sense_of"
    UNKNOWN = "unknown"
 class VoteRelationship(StrEnum):
    """The vote's relationship to the direct subject and its text."""
    DIRECT_TEXT_VOTE = "direct_text_vote"
    AMENDMENT_TEXT_VOTE = "amendment_text_vote"
    PROCEDURAL_RELATED_TO_MEASURE = "procedural_related_to_measure"
    PROCEDURAL_RELATED_TO_AMENDMENT = "procedural_related_to_amendment"
    NON_LEGISLATIVE = "non_legislative"
    UNKNOWN = "unknown"
 class ClassificationMethod(StrEnum):
    """How the final classification was derived."""
    RECORDED_VOTE_ACTION_EXACT = "recorded_vote_action_exact"
    RECORDED_VOTE_ACTION_DUPLICATE_SOURCE_DEDUPED = (
        "recorded_vote_action_duplicate_source_deduped"
    )
    VOTE_XML_ONLY = "vote_xml_only"
    QUESTION_TEXT_ONLY = "question_text_only"
    MANUAL_REVIEW = "manual_review"
 class VoteMeasureRole(StrEnum):
    """How one measure relates to one classified vote."""
    VOTED_ON = "voted_on"
    RULE_FOR = "rule_for"
    UNDERLYING_BILL = "underlying_bill"
    PROCEDURAL_TARGET = "procedural_target"
    AMENDS = "amends"
    AMENDED_BY = "amended_by"
    CONFERENCE_REPORT_FOR = "conference_report_for"
    RELATED_ONLY = "related_only"
 class TextTargetType(StrEnum):
    """Which kind of legislative text was the object of a vote."""
    BILL_TEXT = "bill_text"
    RESOLUTION_TEXT = "resolution_text"
    AMENDMENT_TEXT = "amendment_text"
    NONE = "none"
    UNKNOWN = "unknown"
 class TextTargetBasis(StrEnum):
    """How the text target should be interpreted."""
    EXACT_ACTION_TEXT_VERSION = "exact_action_text_version"
    RESULTING_ENGROSSED_VERSION = "resulting_engrossed_version"
    RECEIVED_PRIOR_CHAMBER_VERSION = "received_prior_chamber_version"
    AMENDMENT_TEXT = "amendment_text"
    RULE_RESOLUTION_TEXT = "rule_resolution_text"
    NO_TEXT_TARGET = "no_text_target"
    UNKNOWN = "unknown"
 class TextResolutionMethod(StrEnum):
    """How the official text target was resolved."""
    TEXT_EXACT_ACTION_DATE_AND_CODE = "text_exact_action_date_and_code"
    TEXT_EXACT_ACTION_DATE_WRONG_CODE = "text_exact_action_date_wrong_code"
    TEXT_PRIOR_VERSION_CODE_MATCH = "text_prior_version_code_match"
    TEXT_RECEIVED_PRIOR_CHAMBER_VERSION = "text_received_prior_chamber_version"
    TEXT_RESULTING_ENROLLED_ONLY = "text_resulting_enrolled_only"
    AMENDMENT_TEXT_UNMODELED_PHASE1 = "amendment_text_unmodeled_phase1"
    NO_TEXT_TARGET = "no_text_target"
    UNKNOWN = "unknown"
 class VoteEffect(StrEnum):
    """Meaning of one member position relative to the target text/procedure."""
    SUPPORTS_TEXT = "supports_text"
    OPPOSES_TEXT = "opposes_text"
    ADVANCES_PROCEDURE = "advances_procedure"
    BLOCKS_PROCEDURE = "blocks_procedure"
    UNKNOWN = "unknown"
 class IngestRun(DataScienceDevTableBase):
    """One full ingestion or context rebuild run."""
    __tablename__ = "ingest_run"
    started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    git_sha: Mapped[str | None]
    classifier_version: Mapped[str | None]
    source_snapshot_label: Mapped[str | None]
    status: Mapped[str]
    source_artifacts: Mapped[list[SourceArtifact]] = relationship(
        "SourceArtifact",
        back_populates="ingest_run",
        cascade="all, delete-orphan",
    )
    score_runs: Mapped[list[ScoreRun]] = relationship(
        "ScoreRun",
        back_populates="ingest_run",
        cascade="all, delete-orphan",
    )
 class SourceArtifact(DataScienceDevTableBase):
    """Local artifact manifest entry for reproducibility."""
    __tablename__ = "source_artifact"
    __table_args__ = (
        Index("ix_source_artifact_source_kind", "source_kind"),
        Index("ix_source_artifact_congress", "congress"),
        Index(
            "uq_source_artifact_ingest_identity",
            "ingest_run_id",
            "local_path",
            "sha256",
            unique=True,
        ),
    )
    source_kind: Mapped[str]
    congress: Mapped[int]
    chamber: Mapped[str | None]
    local_path: Mapped[str]
    source_url: Mapped[str | None]
    sha256: Mapped[str]
    byte_size: Mapped[int]
    modified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    ingested_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    ingest_run_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.ingest_run.id", ondelete="SET NULL")
    )
    ingest_run: Mapped[IngestRun | None] = relationship(
        "IngestRun",
        back_populates="source_artifacts",
    )
 class ScoreRun(DataScienceDevTableBase):
    """One full score recomputation tied to one ingest snapshot."""
    __tablename__ = "score_run"
    ingest_run_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.ingest_run.id", ondelete="SET NULL")
    )
    classifier_version: Mapped[str | None]
    scoring_version: Mapped[str | None]
    included_vote_count: Mapped[int]
    excluded_vote_count: Mapped[int]
    started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    ingest_run: Mapped[IngestRun | None] = relationship(
        "IngestRun",
        back_populates="score_runs",
    )
    scores: Mapped[list[LegislatorScore]] = relationship(
        "LegislatorScore",
        back_populates="score_run",
        cascade="all, delete-orphan",
    )
 class VoteActionMatch(DataScienceDevTableBase):
    """A candidate or selected official action match for one raw vote."""
    __tablename__ = "vote_action_match"
    __table_args__ = (
        Index("ix_vote_action_match_vote_id", "vote_id"),
        Index(
            "uq_vote_action_match_selected_vote_id",
            "vote_id",
            unique=True,
            postgresql_where=text("is_selected"),
        ),
    )
    vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
    action_scope: Mapped[VoteActionScope] = mapped_column(
        _enum_column(VoteActionScope, name="vote_action_scope")
    )
    bill_action_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.bill_action.id", ondelete="CASCADE")
    )
    amendment_action_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.amendment_action.id", ondelete="CASCADE")
    )
    is_selected: Mapped[bool]
    match_method: Mapped[str]
    match_reason: Mapped[str | None]
    match_confidence: Mapped[ConfidenceLevel] = mapped_column(
        _enum_column(ConfidenceLevel, name="vote_action_match_confidence")
    )
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
    )
    vote: Mapped[Vote] = relationship("Vote", back_populates="action_matches")
    bill_action: Mapped[BillAction | None] = relationship("BillAction")
    amendment_action: Mapped[AmendmentAction | None] = relationship("AmendmentAction")
 class VoteClassification(DataScienceDevTableBase):
    """Normalized classification for what a vote was legally/procedurally on."""
    __tablename__ = "vote_classification"
    __table_args__ = (
        Index("ix_vote_classification_subject_type", "subject_type"),
        Index(
            "ix_vote_classification_eligible_vote_id",
            "vote_id",
            postgresql_where=text(
                "subject_type = 'measure' "
                "AND vote_relationship = 'direct_text_vote' "
                "AND is_direct_vote_on_legislative_text "
                "AND is_substantive_policy_vote "
                "AND NOT is_special_rule"
            ),
        ),
    )
    vote_id: Mapped[int] = mapped_column(
        ForeignKey("main.vote.id", ondelete="CASCADE"),
        unique=True,
    )
    subject_type: Mapped[SubjectType] = mapped_column(
        _enum_column(SubjectType, name="vote_subject_type")
    )
    measure_type: Mapped[str | None]
    measure_subtype: Mapped[MeasureSubtype | None] = mapped_column(
        _enum_column(MeasureSubtype, name="vote_measure_subtype")
    )
    measure_function: Mapped[MeasureFunction | None] = mapped_column(
        _enum_column(MeasureFunction, name="vote_measure_function")
    )
    vote_relationship: Mapped[VoteRelationship] = mapped_column(
        _enum_column(VoteRelationship, name="vote_relationship")
    )
    is_legislation_related: Mapped[bool]
    is_direct_vote_on_legislative_text: Mapped[bool]
    is_substantive_policy_vote: Mapped[bool]
    is_lawmaking_vehicle: Mapped[bool]
    is_special_rule: Mapped[bool]
    classification_method: Mapped[ClassificationMethod] = mapped_column(
        _enum_column(ClassificationMethod, name="vote_classification_method")
    )
    classification_confidence_reason: Mapped[str | None]
    confidence: Mapped[ConfidenceLevel] = mapped_column(
        _enum_column(ConfidenceLevel, name="vote_classification_confidence")
    )
    classified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    classification_version: Mapped[str]
    vote: Mapped[Vote] = relationship("Vote", back_populates="classification")
 class VoteMeasureLink(DataScienceDevTableBase):
    """Relationship between a classified vote and one bill/resolution measure."""
    __tablename__ = "vote_measure_link"
    __table_args__ = (
        Index("ix_vote_measure_link_vote_id", "vote_id"),
        Index("ix_vote_measure_link_vote_id_role", "vote_id", "role"),
        Index("ix_vote_measure_link_measure_id_role", "measure_id", "role"),
    )
    vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
    measure_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
    role: Mapped[VoteMeasureRole] = mapped_column(
        _enum_column(VoteMeasureRole, name="vote_measure_role")
    )
    source: Mapped[str]
    confidence: Mapped[ConfidenceLevel] = mapped_column(
        _enum_column(ConfidenceLevel, name="vote_measure_link_confidence")
    )
    notes: Mapped[str | None]
    vote: Mapped[Vote] = relationship("Vote", back_populates="vote_measure_links")
    measure: Mapped[Bill] = relationship("Bill", back_populates="vote_measure_links")
 class VoteTextTarget(DataScienceDevTableBase):
    """Official text target, if any, resolved for one classified vote."""
    __tablename__ = "vote_text_target"
    __table_args__ = (
        Index(
            "ix_vote_text_target_voted_text_version_id",
            "voted_text_version_id",
            postgresql_where=text("voted_text_version_id IS NOT NULL"),
        ),
    )
    vote_id: Mapped[int] = mapped_column(
        ForeignKey("main.vote.id", ondelete="CASCADE"),
        unique=True,
    )
    text_target_type: Mapped[TextTargetType] = mapped_column(
        _enum_column(TextTargetType, name="vote_text_target_type")
    )
    voted_text_version_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.bill_text.id", ondelete="SET NULL")
    )
    resulting_text_version_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.bill_text.id", ondelete="SET NULL")
    )
    related_amendment_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.amendment.id", ondelete="SET NULL")
    )
    text_target_basis: Mapped[TextTargetBasis] = mapped_column(
        _enum_column(TextTargetBasis, name="vote_text_target_basis")
    )
    text_resolution_method: Mapped[TextResolutionMethod] = mapped_column(
        _enum_column(TextResolutionMethod, name="vote_text_resolution_method")
    )
    text_resolution_confidence_reason: Mapped[str | None]
    confidence: Mapped[ConfidenceLevel] = mapped_column(
        _enum_column(ConfidenceLevel, name="vote_text_target_confidence")
    )
    notes: Mapped[str | None]
    vote: Mapped[Vote] = relationship("Vote", back_populates="text_target")
    voted_text_version: Mapped[BillText | None] = relationship(
        "BillText",
        foreign_keys=[voted_text_version_id],
    )
    resulting_text_version: Mapped[BillText | None] = relationship(
        "BillText",
        foreign_keys=[resulting_text_version_id],
    )
    related_amendment: Mapped[Amendment | None] = relationship("Amendment")
 class VotePositionMeaning(DataScienceDevTableBase):
    """Meaning of Yea/Nay/Present positions for one classified vote."""
    __tablename__ = "vote_position_meaning"
    vote_id: Mapped[int] = mapped_column(
        ForeignKey("main.vote.id", ondelete="CASCADE"),
        unique=True,
    )
    yea_effect: Mapped[VoteEffect] = mapped_column(
        _enum_column(VoteEffect, name="vote_yea_effect")
    )
    nay_effect: Mapped[VoteEffect] = mapped_column(
        _enum_column(VoteEffect, name="vote_nay_effect")
    )
    present_effect: Mapped[VoteEffect] = mapped_column(
        _enum_column(VoteEffect, name="vote_present_effect")
    )
    polarity_confidence: Mapped[ConfidenceLevel] = mapped_column(
        _enum_column(ConfidenceLevel, name="vote_polarity_confidence")
    )
    polarity_method: Mapped[str]
    notes: Mapped[str | None]
    vote: Mapped[Vote] = relationship("Vote", back_populates="position_meaning")
 class VoteContextAudit(DataScienceDevTableBase):
    """Audit/event row for ambiguous or noteworthy vote-context decisions."""
    __tablename__ = "vote_context_audit"
    __table_args__ = (
        Index("ix_vote_context_audit_vote_id", "vote_id"),
        Index("ix_vote_context_audit_severity_vote_id", "severity", "vote_id"),
    )
    vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
    step: Mapped[str]
    message: Mapped[str]
    severity: Mapped[str]
    source_path: Mapped[str | None]
    created_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True),
        server_default=func.now(),
    )
    vote: Mapped[Vote] = relationship("Vote", back_populates="context_audit_rows")
@@ -11,7 +11,6 @@ from sqlalchemy.orm import Mapped, mapped_column, relationship
 from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
 if TYPE_CHECKING:
    from pipelines.orm.data_science_dev.congress.context import ScoreRun
    from pipelines.orm.data_science_dev.congress.vote import VoteRecord
@@ -19,7 +18,6 @@ class Legislator(DataScienceDevTableBase):
    """Members of Congress with identification and current term info."""
    __tablename__ = "legislator"
    __table_args__ = (Index("ix_legislator_current_chamber", "current_chamber"),)
    bioguide_id: Mapped[str] = mapped_column(Text, unique=True, index=True)
@@ -93,39 +91,8 @@ class LegislatorScore(DataScienceDevTableBase):
        ForeignKey("main.legislator.id", ondelete="CASCADE"),
        index=True,
    )
    score_run_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.score_run.id", ondelete="CASCADE"),
        index=True,
    )
    year: Mapped[int]
    topic: Mapped[str]
    score: Mapped[float]
    legislator: Mapped[Legislator] = relationship(back_populates="scores")
    score_run: Mapped[ScoreRun | None] = relationship(
        "ScoreRun",
        back_populates="scores",
    )
 class LegislatorScoreFake(DataScienceDevTableBase):
    """Computed topic score for a legislator in one calendar year."""
    __tablename__ = "legislator_score_fake"
    __table_args__ = (
        UniqueConstraint(
            "legislator_id",
            "year",
            "topic",
            name="uq_legislator_score_fake_legislator_id_year_topic",
        ),
        Index("ix_legislator_score_fake_year_topic", "year", "topic"),
    )
    legislator_id: Mapped[int] = mapped_column(
        ForeignKey("main.legislator.id", ondelete="CASCADE"),
        index=True,
    )
    year: Mapped[int]
    topic: Mapped[str]
    score: Mapped[float]
@@ -1,12 +1,11 @@
-"""Vote models for raw roll-call data and member positions."""
+"""Vote model - roll call votes in Congress."""
 from __future__ import annotations
-from datetime import date, datetime
+from datetime import date
 from typing import TYPE_CHECKING
-from sqlalchemy import DateTime, ForeignKey, Index, UniqueConstraint
+from sqlalchemy import ForeignKey, Index, UniqueConstraint
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from pipelines.orm.data_science_dev.base import (
@@ -15,15 +14,9 @@ from pipelines.orm.data_science_dev.base import (
 )
 if TYPE_CHECKING:
-    from pipelines.orm.data_science_dev.congress.context import (
+    from pipelines.orm.data_science_dev.congress.bill import Bill
        VoteActionMatch,
        VoteClassification,
        VoteContextAudit,
        VoteMeasureLink,
        VotePositionMeaning,
        VoteTextTarget,
    )
    from pipelines.orm.data_science_dev.congress.legislator import Legislator
    from pipelines.orm.data_science_dev.congress.vote import Vote
 class VoteRecord(DataScienceDevBase):
@@ -48,16 +41,16 @@ class VoteRecord(DataScienceDevBase):
 class Vote(DataScienceDevTableBase):
-    """Raw roll call vote facts from House or Senate vote sources."""
+    """Roll call votes with counts and optional bill linkage."""
    __tablename__ = "vote"
    __table_args__ = (
        UniqueConstraint(
            "congress",
            "chamber",
-            "session_number",
+            "session",
-            "roll_number",
+            "number",
-            name="uq_vote_congress_chamber_session_number_roll_number",
+            name="uq_vote_congress_chamber_session_number",
        ),
        Index("ix_vote_date", "vote_date"),
        Index("ix_vote_congress_chamber", "congress", "chamber"),
@@ -65,9 +58,8 @@ class Vote(DataScienceDevTableBase):
    congress: Mapped[int]
    chamber: Mapped[str]
-    session_year: Mapped[int]
+    session: Mapped[int]
-    session_number: Mapped[int]
+    number: Mapped[int]
    roll_number: Mapped[int]
    vote_type: Mapped[str | None]
    question: Mapped[str | None]
@@ -75,57 +67,18 @@ class Vote(DataScienceDevTableBase):
    result_text: Mapped[str | None]
    vote_date: Mapped[date]
    vote_datetime: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
    raw_vote_source_url: Mapped[str | None]
    yea_count: Mapped[int | None]
    nay_count: Mapped[int | None]
    not_voting_count: Mapped[int | None]
    present_count: Mapped[int | None]
-    raw_bill_ref: Mapped[dict | None] = mapped_column(JSONB)
+    bill_id: Mapped[int | None] = mapped_column(ForeignKey("main.bill.id"))
    raw_amendment_ref: Mapped[dict | None] = mapped_column(JSONB)
    raw_nomination_ref: Mapped[dict | None] = mapped_column(JSONB)
    raw_treaty_ref: Mapped[dict | None] = mapped_column(JSONB)
    raw_vote_source_artifact_id: Mapped[int | None] = mapped_column(
        ForeignKey("main.source_artifact.id", ondelete="SET NULL")
    )
    bill: Mapped[Bill | None] = relationship("Bill", back_populates="votes")
    vote_records: Mapped[list[VoteRecord]] = relationship(
        "VoteRecord",
        back_populates="vote",
        cascade="all, delete-orphan",
    )
-    action_matches: Mapped[list[VoteActionMatch]] = relationship(
+
        "VoteActionMatch",
        back_populates="vote",
        cascade="all, delete-orphan",
    )
    classification: Mapped[VoteClassification | None] = relationship(
        "VoteClassification",
        back_populates="vote",
        cascade="all, delete-orphan",
        uselist=False,
    )
    vote_measure_links: Mapped[list[VoteMeasureLink]] = relationship(
        "VoteMeasureLink",
        back_populates="vote",
        cascade="all, delete-orphan",
    )
    text_target: Mapped[VoteTextTarget | None] = relationship(
        "VoteTextTarget",
        back_populates="vote",
        cascade="all, delete-orphan",
        uselist=False,
    )
    position_meaning: Mapped[VotePositionMeaning | None] = relationship(
        "VotePositionMeaning",
        back_populates="vote",
        cascade="all, delete-orphan",
        uselist=False,
    )
    context_audit_rows: Mapped[list[VoteContextAudit]] = relationship(
        "VoteContextAudit",
        back_populates="vote",
        cascade="all, delete-orphan",
    )
@@ -3,80 +3,26 @@
 from __future__ import annotations
 from pipelines.orm.data_science_dev.congress import (
    Amendment,
    AmendmentAction,
    AmendmentActionRecordedVote,
    Bill,
    BillAction,
    BillActionRecordedVote,
    BillRelation,
    BillText,
    BillTopic,
    BillTopicPosition,
    ClassificationMethod,
    ConfidenceLevel,
    IngestRun,
    Legislator,
    LegislatorScore,
    MeasureFunction,
    MeasureSubtype,
    ScoreRun,
    SourceArtifact,
    SubjectType,
    TextResolutionMethod,
    TextTargetBasis,
    TextTargetType,
    Vote,
    VoteActionMatch,
    VoteActionScope,
    VoteClassification,
    VoteContextAudit,
    VoteEffect,
    VoteMeasureLink,
    VoteMeasureRole,
    VotePositionMeaning,
    VoteRelationship,
    VoteRecord,
    VoteTextTarget,
 )
 from pipelines.orm.data_science_dev.posts import partitions  # noqa: F401 — registers partition classes in metadata
 from pipelines.orm.data_science_dev.posts.tables import Posts
 __all__ = [
    "Amendment",
    "AmendmentAction",
    "AmendmentActionRecordedVote",
    "Bill",
    "BillAction",
    "BillActionRecordedVote",
    "BillRelation",
    "BillText",
    "BillTopic",
    "BillTopicPosition",
    "ClassificationMethod",
    "ConfidenceLevel",
    "IngestRun",
    "Legislator",
    "LegislatorScore",
    "MeasureFunction",
    "MeasureSubtype",
    "Posts",
    "ScoreRun",
    "SourceArtifact",
    "SubjectType",
    "TextResolutionMethod",
    "TextTargetBasis",
    "TextTargetType",
    "Vote",
    "VoteActionMatch",
    "VoteActionScope",
    "VoteClassification",
    "VoteContextAudit",
    "VoteEffect",
    "VoteMeasureLink",
    "VoteMeasureRole",
    "VotePositionMeaning",
    "VoteRelationship",
    "VoteRecord",
    "VoteTextTarget",
 ]
@@ -3,10 +3,9 @@
 from __future__ import annotations
 from pipelines.orm.data_science_dev.posts.failed_ingestion import FailedIngestion
-from pipelines.orm.data_science_dev.posts.tables import Posts, PostTopic
+from pipelines.orm.data_science_dev.posts.tables import Posts
 __all__ = [
    "FailedIngestion",
    "Posts",
    "PostTopic",
 ]
@@ -1,195 +0,0 @@
 """Shared language filter constants for post sampling queries."""
 from __future__ import annotations
 ENGLISH_LANGS = (
    '["", "", ""]',
    '[""]',
    "[]",
    '["", "eng"]',
    '["eng", "", ""]',
    '["eng", ""]',
    '["eng"]',
    '["eng", "aar"]',
    '["eng", "abk", "afr"]',
    '["eng", "afr"]',
    '["eng", "afr", "abk"]',
    '["eng", "afr", "anp"]',
    '["eng", "afr", "ber"]',
    '["eng", "afr", "dan"]',
    '["eng", "afr", "deu"]',
    '["eng", "afr", "est"]',
    '["eng", "afr", "fra"]',
    '["eng", "afr", "ind"]',
    '["eng", "afr", "lat"]',
    '["eng", "afr", "nld"]',
    '["eng", "afr", "nor"]',
    '["eng", "afr", "pol"]',
    '["eng", "afr", "por"]',
    '["eng", "afr", "ron"]',
    '["eng", "afr", "slk"]',
    '["eng", "afr", "spa"]',
    '["eng", "afr", "tgl"]',
    '["eng", "afr", "tuk"]',
    '["eng", "afr", "tur"]',
    '["eng", "afr", "ukr"]',
    '["eng", "afr", "vol"]',
    '["eng", "agq"]',
    '["eng", "ain"]',
    '["eng", "ain", "amh"]',
    '["eng", "ain", "jpn"]',
    '["eng", "aka"]',
    '["eng", "amh"]',
    '["eng", "amh", "afr"]',
    '["eng", "amh", "ara"]',
    '["eng", "amh", "fra"]',
    '["eng", "anp"]',
    '["eng", "anp", "hye"]',
    '["eng", "anp", "sqi"]',
    '["eng", "", "ara"]',
    '["eng", "ara", ""]',
    '["eng", "ara"]',
    '["eng", "ara", "afr"]',
    '["eng", "ara", "anp"]',
    '["eng", "ara", "ars"]',
    '["eng", "ara", "bul"]',
    '["eng", "ara", "cat"]',
    '["eng", "ara", "deu"]',
    '["eng", "ara", "ell"]',
    '["eng", "ara", "fas"]',
    '["eng", "ara", "fra"]',
    '["eng", "ara", "heb"]',
    '["eng", "ara", "hin"]',
    '["eng", "ara", "ind"]',
    '["eng", "ara", "ita"]',
    '["eng", "ara", "jpn"]',
    '["eng", "ara", "kas"]',
    '["eng", "ara", "kor"]',
    '["eng", "ara", "nob"]',
    '["eng", "ara", "nor"]',
    '["eng", "ara", "rus"]',
    '["eng", "ara", "spa"]',
    '["eng", "ara", "swe"]',
    '["eng", "ara", "tam"]',
    '["eng", "ara", "tur"]',
    '["eng", "ara", "urd"]',
    '["eng", "ara", "zho"]',
    '["eng", "arg"]',
    '["eng", "arg", "amh"]',
    '["eng", "arg", "aze"]',
    '["eng", "ars"]',
    '["eng", "ars", "ara"]',
    '["eng", "asm"]',
    '["eng", "ava", "sqi"]',
    '["eng", "ave"]',
    '["eng", "aze"]',
    '["eng", "aze", "deu"]',
    '["eng", "aze", "hye"]',
    '["eng", "aze", "ita"]',
    '["eng", "aze", "rus"]',
    '["eng", "bam", ""]',
    '["eng", "bel"]',
    '["eng", "bel", "rus"]',
    '["eng", "ben"]',
    '["eng", "ben", "deu"]',
    '["eng", "ben", "fra"]',
    '["eng", "ben", "hin"]',
    '["eng", "ben", "mya"]',
    '["eng", "ber"]',
    '["eng", "ber", "afr"]',
    '["eng", "ber", "deu"]',
    '["eng", "ber", "est"]',
    '["eng", "ber", "hun"]',
    '["eng", "ber", "isl"]',
    '["eng", "ber", "jpn"]',
    '["eng", "ber", "lat"]',
    '["eng", "ber", "nor"]',
    '["eng", "ber", "pol"]',
    '["eng", "ber", "por"]',
    '["eng", "ber", "ron"]',
    '["eng", "ber", "run"]',
    '["eng", "ber", "slk"]',
    '["eng", "ber", "spa"]',
    '["eng", "ber", "tgl"]',
    '["eng", "ber", "tlh"]',
    '["eng", "ber", "tuk"]',
    '["eng", "bod"]',
    '["eng", "bod", "nep"]',
    '["eng", "bos", "hrv"]',
    '["eng", "bos", "srp"]',
    '["eng", "bul"]',
    '["eng", "bul", "deu"]',
    '["eng", "bul", "fra"]',
    '["eng", "bul", "jpn"]',
    '["eng", "bul", "mkd"]',
    '["eng", "bul", "mri"]',
    '["eng", "bul", "nld"]',
    '["eng", "bul", "rus"]',
    '["eng", "bul", "srp"]',
    '["eng", "cat"]',
    '["eng", "cat", "fra"]',
    '["eng", "cat", "ind"]',
    '["eng", "cat", "isl"]',
    '["eng", "cat", "jpn"]',
    '["eng", "cat", "nld"]',
    '["eng", "cat", "spa"]',
    '["eng", "ces"]',
    '["eng", "ces", "deu"]',
    '["eng", "ces", "ell"]',
    '["eng", "ces", "haw"]',
    '["eng", "ces", "ind"]',
    '["eng", "ces", "ita"]',
    '["eng", "ces", "jpn"]',
    '["eng", "ces", "por"]',
    '["eng", "ces", "rus"]',
    '["eng", "ces", "slk"]',
    '["eng", "ces", "spa"]',
    '["eng", "ces", "tuk"]',
    '["eng", "cha"]',
    '["eng", "chr"]',
    '["eng", "chr", "ara"]',
    '["eng", "chr", "deu"]',
    '["eng", "chr", "ell"]',
    '["eng", "chr", "fil"]',
    '["eng", "chr", "isl"]',
    '["eng", "chr", "kor"]',
    '["eng", "chr", "rus"]',
    '["eng", "chr", "spa"]',
    '["eng", "chr", "zho"]',
    '["eng", "chu", "oci"]',
    '["eng", "cor"]',
    '["eng", "", "cos"]',
    '["eng", "cos"]',
    '["eng", "cym"]',
    '["eng", "cym", "deu"]',
    '["eng", "cym", "fra"]',
    '["eng", "cym", "jpn"]',
    '["eng", "cym", "spa"]',
    '["eng", "cym", "zho"]',
    '["eng", "dan"]',
    '["eng", "dan", "ber"]',
    '["eng", "dan", "deu"]',
    '["eng", "dan", "ell"]',
    '["eng", "dan", "est"]',
    '["eng", "dan", "fas"]',
    '["eng", "dan", "fin"]',
    '["eng", "dan", "fra"]',
    '["eng", "dan", "gle"]',
    '["eng", "dan", "hun"]',
    '["eng", "dan", "isl"]',
    '["eng", "dan", "ita"]',
    '["eng", "dan", "jpn"]',
    '["eng", "dan", "lat"]',
    '["eng", "dan", "nld"]',
    '["eng", "dan", "nob"]',
    '["eng", "dan", "nor"]',
    '["eng", "dan", "por"]',
    '["eng", "dan", "rus"]',
    '["eng", "dan", "slk"]',
    '["eng", "dan", "spa"]',
    '["eng", "dan", "swe"]',
    '["eng", "dan", "tuk"]',
    '["eng", "dan", "zho"]',
    '["eng", "deu", ""]',
    '["eng", "deu"]',
 )
@@ -1,36 +1,13 @@
-"""Posts parent table and PostTopic table for the data_science_dev database."""
+"""Posts parent table with PostgreSQL weekly range partitioning on date column."""
 from __future__ import annotations
-from pipelines.orm.data_science_dev.base import (
+from pipelines.orm.data_science_dev.base import DataScienceDevBase
    DataScienceDevBase,
    DataScienceDevTableBase,
 )
 from pipelines.orm.data_science_dev.posts.columns import PostsColumns
 from sqlalchemy import BigInteger, Index, SmallInteger
 from sqlalchemy.orm import Mapped, mapped_column
 class Posts(PostsColumns, DataScienceDevBase):
    """Parent partitioned table for posts, partitioned by week on `date`."""
    __tablename__ = "posts"
    __table_args__ = ({"postgresql_partition_by": "RANGE (date)"},)
 class PostTopic(DataScienceDevTableBase):
    """Stores BERTopic topic assignments for posts.
    post_id references main.posts but without a FK constraint
    since posts is a partitioned table.
    """
    __tablename__ = "post_topic"
    __table_args__ = (Index("ix_post_topic_post_id", "post_id"),)
    post_id: Mapped[int] = mapped_column(BigInteger)
    topic_id: Mapped[int] = mapped_column(SmallInteger)
    topic_label: Mapped[str | None]
    model_version: Mapped[str | None]
@@ -0,0 +1,25 @@
 # Unsloth fine-tuning container for Qwen 3.5 4B on RTX 3090.
 #
 # Build:
 #   docker build -f python/prompt_bench/Dockerfile.finetune -t bill-finetune .
 #
 # Run:
 #   docker run --rm --device=nvidia.com/gpu=all --ipc=host \
 #     -v $(pwd)/output:/workspace/output \
 #     -v $(pwd)/output/finetune_dataset.jsonl:/workspace/dataset.jsonl:ro \
 #     -v /zfs/models/hf:/models \
 #     bill-finetune \
 #     --dataset /workspace/dataset.jsonl \
 #     --output-dir /workspace/output/qwen-bill-summarizer
 FROM ghcr.io/unslothai/unsloth:latest
 RUN pip install --no-cache-dir typer
 WORKDIR /workspace
 COPY python/prompt_bench/finetune.py python/prompt_bench/finetune.py
 COPY config/prompts/summarization_prompts.toml config/prompts/summarization_prompts.toml
 COPY python/prompt_bench/__init__.py python/prompt_bench/__init__.py
 COPY python/__init__.py python/__init__.py
 ENTRYPOINT ["python", "-m", "pipelines.prompt_bench.finetune"]
@@ -9,7 +9,7 @@ from typing import Annotated
 import typer
-from pipelines.pipelines.containers.lib import check_gpu_free
+from pipelines.tools.containers.lib import check_gpu_free
 logger = logging.getLogger(__name__)
@@ -27,7 +27,7 @@ def build_image() -> None:
            "docker",
            "build",
            "-f",
-            str(REPO_DIR / "pipelines/containers/docker_files/Dockerfile.finetune"),
+            str(REPO_DIR / "python/prompt_bench/Dockerfile.finetune"),
            "-t",
            FINETUNE_IMAGE,
            ".",
@@ -11,8 +11,8 @@ from typing import Annotated
 import typer
-from pipelines.containers.lib import check_gpu_free
+from pipelines.tools.containers.lib import check_gpu_free
-from pipelines.containers.vllm import start_vllm, stop_vllm
+from pipelines.tools.containers.vllm import start_vllm, stop_vllm
 from pipelines.tools.downloader import is_model_present
 from pipelines.tools.models import BenchmarkConfig
 from pipelines.tools.vllm_client import VLLMClient
@@ -5,41 +5,33 @@ from __future__ import annotations
 import logging
 import tomllib
 from os import getenv
 from pathlib import Path
 from typing import Annotated, Any
 import httpx
 import typer
-from sqlalchemy import Select, exists, or_, select
+from sqlalchemy import Select, or_, select
 from sqlalchemy.orm import Session, selectinload
 from tiktoken import get_encoding
 from pipelines.config import get_config_dir
 from pipelines.orm.common import get_postgres_engine
-from pipelines.orm.data_science_dev.congress import (
+from pipelines.orm.data_science_dev.congress import Bill, BillText
    Bill,
    BillText,
    SubjectType,
    VoteClassification,
    VoteRelationship,
    VoteTextTarget,
 )
 from pipelines.tools.bill_token_compression import compress_bill_text
 logger = logging.getLogger(__name__)
 OPENAI_CHAT_COMPLETIONS_URL = "https://api.openai.com/v1/chat/completions"
 OPENAI_PROJECT_ID = "proj_fQBPEXFgnS87Fk6wZwploFwE"
-REQUEST_TIMEOUT_SECONDS = 60
+
 def _find_prompts_path() -> Path:
    return get_config_dir() / "prompts" / "summarization_prompts.toml"
 def load_summarization_prompts(
    section: str = "summarization",
 ) -> dict[str, str]:
-    summarization_prompts = get_config_dir() / "prompts" / "summarization_prompts.toml"
+    return tomllib.loads(_find_prompts_path().read_text())[section]
    return tomllib.loads(summarization_prompts.read_text())[section]
 class BillSummaryError(RuntimeError):
@@ -58,7 +50,7 @@ def call_openai_summary(
        raise BillSummaryError(msg)
    response = httpx.post(
-        OPENAI_CHAT_COMPLETIONS_URL,
+        "https://api.openai.com/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {api_key}",
            "OpenAI-Project": OPENAI_PROJECT_ID,
@@ -68,9 +60,8 @@ def call_openai_summary(
            "model": model,
            "messages": messages,
        },
-        timeout=REQUEST_TIMEOUT_SECONDS,
+        timeout=60,
    )
    logger.info(f"{response.text=}")
    response.raise_for_status()
    return extract_message_content(response.json())
@@ -78,52 +69,53 @@ def call_openai_summary(
 def build_bill_summary_messages(
    *,
    bill_text: BillText,
    summarization_prompts: dict[str, str],
 ) -> list[dict[str, str]]:
-    """Build the GPT prompt messages plus compressed text and user prompt."""
+    """Build the GPT prompt messages for one bill text row."""
    if not bill_text.text_content:
        msg = f"bill_text id={bill_text.id} has no text_content"
        raise BillSummaryError(msg)
    bill = bill_text.bill
    compressed_text = compress_bill_text(bill_text.text_content)
    if not compressed_text:
        msg = f"bill_text id={bill_text.id} has no summarizable text_content"
        raise BillSummaryError(msg)
-    user_prompt = summarization_prompts["user_template"].format(
+    metadata = "\n".join(
-        text_content=compressed_text
+        (
            f"Congress: {bill.congress}",
            f"Bill: {bill.bill_type} {bill.number}",
            f"Title: {bill.title_short or bill.title or bill.official_title or ''}",
            f"Top subject term: {bill.subjects_top_term or ''}",
            f"Text version: {bill_text.version_code}"
            + (f" ({bill_text.version_name})" if bill_text.version_name else ""),
        )
    )
    summarization_prompts = load_summarization_prompts()
    user_prompt = "\n\n".join(
        (
            "BILL METADATA:",
            metadata,
            summarization_prompts["user_template"].format(text_content=compressed_text),
        )
    )
-    user_prompt_tokens = len(get_encoding("o200k_base").encode(user_prompt))
+    return [
    logger.info(f"{user_prompt_tokens=}")
    messages = [
        {"role": "system", "content": summarization_prompts["system_prompt"]},
        {
            "role": "user",
            "content": user_prompt,
        },
    ]
    return messages, user_prompt_tokens
 def summarize_bill_text(
    *,
    model: str,
    bill_text: BillText,
    summarization_prompts: dict[str, str],
 ) -> str:
    """Generate and return a summary for one bill_text row."""
-    messages, user_prompt_tokens = build_bill_summary_messages(
+    messages = build_bill_summary_messages(bill_text=bill_text)
        bill_text=bill_text,
        summarization_prompts=summarization_prompts,
    )
    # This may only be for gpt-5.4 mini I need to read the docs
    if user_prompt_tokens > 272000:
        msg = f"Compressed bill_text id={bill_text.id} is too long for summarization ({user_prompt_tokens} tokens)"
        logger.warning(msg)
        return None
    summary = call_openai_summary(
        model=model,
        messages=messages,
@@ -143,7 +135,7 @@ def store_bill_summary_result(
    """Store a generated summary and the prompt/model metadata that produced it."""
    bill_text.summary = summary
    bill_text.summarization_model = model
-    bill_text.summarization_system_prompt_version = "v1.2"
+    bill_text.summarization_system_prompt_version = "v1"
    bill_text.summarization_user_prompt_version = "v1"
@@ -151,7 +143,6 @@ def create_select_bill_texts_for_summarization(
    congress: int | None = None,
    bill_ids: list[int] | None = None,
    bill_text_ids: list[int] | None = None,
    with_votes_only: bool = False,
    force: bool = False,
    limit: int | None = None,
 ) -> Select:
@@ -169,25 +160,6 @@ def create_select_bill_texts_for_summarization(
        stmt = stmt.where(BillText.bill_id.in_(bill_ids))
    if bill_text_ids:
        stmt = stmt.where(BillText.id.in_(bill_text_ids))
    if with_votes_only:
        stmt = stmt.where(
            exists(
                select(VoteTextTarget.vote_id)
                .join(
                    VoteClassification,
                    VoteClassification.vote_id == VoteTextTarget.vote_id,
                )
                .where(
                    VoteTextTarget.voted_text_version_id == BillText.id,
                    VoteClassification.subject_type == SubjectType.MEASURE,
                    VoteClassification.vote_relationship
                    == VoteRelationship.DIRECT_TEXT_VOTE,
                    VoteClassification.is_direct_vote_on_legislative_text.is_(True),
                    VoteClassification.is_substantive_policy_vote.is_(True),
                    VoteClassification.is_special_rule.is_(False),
                )
            )
        )
    if not force:
        stmt = stmt.where(or_(BillText.summary.is_(None), BillText.summary == ""))
    if limit is not None:
@@ -236,13 +208,6 @@ def main(
            help="Only process one internal bill_text.id. Repeat for multiple rows.",
        ),
    ] = None,
    with_votes_only: Annotated[
        bool,
        typer.Option(
            "--with-votes-only",
            help="Only process bill_text rows linked to at least one vote.",
        ),
    ] = False,
    limit: Annotated[int | None, typer.Option(help="Maximum rows to process.")] = None,
    force: Annotated[
        bool,
@@ -262,14 +227,12 @@ def main(
        message = "CLOSEDAI_TOKEN is required"
        raise typer.BadParameter(message)
    summarization_prompts = load_summarization_prompts()
    engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
    with Session(engine) as session:
        stmt = create_select_bill_texts_for_summarization(
            congress=congress,
            bill_ids=bill_ids,
            bill_text_ids=bill_text_ids,
            with_votes_only=with_votes_only,
            force=force,
            limit=limit,
        )
@@ -281,11 +244,7 @@ def main(
            summary = summarize_bill_text(
                model=model,
                bill_text=bill_text,
                summarization_prompts=summarization_prompts,
            )
            if summary is None:
                logger.warning("Skipping bill_text id=%s", bill_text.id)
                continue
            store_bill_summary_result(
                bill_text=bill_text,
                summary=summary,
@@ -1 +0,0 @@
 """FastAPI HTMX front end for the legislative database."""
@@ -1,31 +0,0 @@
 """Database access for the FastAPI web app."""
 from __future__ import annotations
 from collections.abc import Iterator
 from contextlib import contextmanager
 from functools import lru_cache
 from sqlalchemy.engine import Engine
 from sqlalchemy.orm import Session
 from pipelines.orm.common import get_postgres_engine
@lru_cache(maxsize=1)
 def get_engine() -> Engine:
    """Return the lazily-created DATA_SCIENCE_DEV SQLAlchemy engine."""
    return get_postgres_engine(name="DATA_SCIENCE_DEV")
 def validate_database_connection() -> None:
    """Fail fast if the configured DATA_SCIENCE_DEV database is unavailable."""
    with get_engine().connect():
        pass
@contextmanager
 def session_scope() -> Iterator[Session]:
    """Yield a SQLAlchemy session for a read-only request."""
    with Session(get_engine()) as session:
        yield session
@@ -1,589 +0,0 @@
 """FastAPI app for the HTMX legislative dashboard."""
 from __future__ import annotations
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 import hashlib
 import hmac
 from os import getenv
 from pathlib import Path
 import secrets
 from typing import Any
 from urllib.parse import parse_qs
 from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
 from fastapi.responses import HTMLResponse, PlainTextResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from pipelines.web import repository
 from pipelines.web.db import session_scope, validate_database_connection
 from pipelines.web.repository import Chamber, RankingResult
 from pipelines.web.scoring import normalize_issues
 from pipelines.web.svg import render_compare_radar_svg, render_score_history_svg
 BASE_DIR = Path(__file__).resolve().parent
 TEMPLATES_DIR = BASE_DIR / "templates"
 STATIC_DIR = BASE_DIR / "static"
 templates = Jinja2Templates(directory=TEMPLATES_DIR)
 ADMIN_USERNAME = "admin"
 ADMIN_PASSWORD = "admin"
 SESSION_COOKIE = "nornsight_admin"
 SESSION_SECRET = "nornsight-local-dev-session-secret"
@asynccontextmanager
 async def lifespan(_: FastAPI):
    """Validate database access when the CLI starts the web server."""
    if getenv("PYTEST_CURRENT_TEST") is None:
        validate_database_connection()
    yield
 app = FastAPI(title="Nornsight Legislative Dashboard", lifespan=lifespan)
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
@dataclass(frozen=True)
 class DashboardState:
    """Dashboard query-string state."""
    issues: list[str]
    chamber: Chamber
    congress: int | None
    compare: list[int]
@app.get("/healthz", response_class=PlainTextResponse)
 def healthz() -> str:
    """Return a simple liveness response."""
    return "ok"
@app.get("/login", response_class=HTMLResponse)
 def login(request: Request) -> Response:
    """Render the integrated login page."""
    next_path = _safe_next_path(request.query_params.get("next"))
    if _authenticated_user(request) is not None:
        return RedirectResponse(next_path, status_code=status.HTTP_303_SEE_OTHER)
    return templates.TemplateResponse(
        request,
        "login.html",
        {
            "error": "",
            "is_authenticated": False,
            "show_primary_nav": False,
            "next_path": next_path,
            "username": "",
        },
    )
@app.post("/login", response_class=HTMLResponse)
 async def login_submit(request: Request) -> Response:
    """Authenticate the hard-coded admin user and set a session cookie."""
    form = parse_qs((await request.body()).decode())
    username = form.get("username", [""])[0]
    password = form.get("password", [""])[0]
    next_path = _safe_next_path(form.get("next", [request.query_params.get("next")])[0])
    username_ok = secrets.compare_digest(username, ADMIN_USERNAME)
    password_ok = secrets.compare_digest(password, ADMIN_PASSWORD)
    if not (username_ok and password_ok):
        return templates.TemplateResponse(
            request,
            "login.html",
            {
                "error": "Invalid username or password.",
                "is_authenticated": False,
                "show_primary_nav": False,
                "next_path": next_path,
                "username": username,
            },
            status_code=status.HTTP_401_UNAUTHORIZED,
        )
    response = RedirectResponse(next_path, status_code=status.HTTP_303_SEE_OTHER)
    response.set_cookie(
        SESSION_COOKIE,
        _sign_session(username),
        httponly=True,
        samesite="lax",
    )
    return response
@app.get("/logout")
 def logout() -> Response:
    """Clear the local admin session."""
    response = RedirectResponse("/login", status_code=status.HTTP_303_SEE_OTHER)
    response.delete_cookie(SESSION_COOKIE)
    return response
 def require_admin(request: Request) -> str:
    """Redirect unauthenticated users to the in-site login page."""
    username = _authenticated_user(request)
    if username is not None:
        return username
    next_path = request.url.path
    if request.url.query:
        next_path = f"{next_path}?{request.url.query}"
    login_url = request.url_for("login").include_query_params(next=next_path)
    raise HTTPException(
        status_code=status.HTTP_303_SEE_OTHER,
        headers={"Location": str(login_url)},
    )
 def _authenticated_user(request: Request) -> str | None:
    token = request.cookies.get(SESSION_COOKIE)
    if token is None:
        return None
    try:
        username, signature = token.split(":", 1)
    except ValueError:
        return None
    if username != ADMIN_USERNAME:
        return None
    expected = _session_signature(username)
    if secrets.compare_digest(signature, expected):
        return username
    return None
 def _sign_session(username: str) -> str:
    return f"{username}:{_session_signature(username)}"
 def _session_signature(username: str) -> str:
    return hmac.new(
        SESSION_SECRET.encode(),
        username.encode(),
        hashlib.sha256,
    ).hexdigest()
 def _safe_next_path(value: str | None) -> str:
    if value and value.startswith("/") and not value.startswith("//"):
        return value
    return "/"
@app.get("/", response_class=HTMLResponse)
 def dashboard(request: Request, _: str = Depends(require_admin)) -> Response:
    """Render the full dashboard page."""
    context = _dashboard_context(request)
    if request.headers.get("hx-request") == "true":
        return templates.TemplateResponse(request, "partials/_dashboard.html", context)
    return templates.TemplateResponse(request, "dashboard.html", context)
@app.get("/partials/dashboard", response_class=HTMLResponse)
 def dashboard_partial(request: Request, _: str = Depends(require_admin)) -> Response:
    """Render the filter-dependent dashboard body."""
    context = _dashboard_context(request)
    return templates.TemplateResponse(request, "partials/_dashboard.html", context)
@app.get("/partials/issues", response_class=HTMLResponse)
 def issues_partial(request: Request, _: str = Depends(require_admin)) -> Response:
    """Render only issue filters."""
    context = _dashboard_context(request)
    return templates.TemplateResponse(request, "partials/_issue_filters.html", context)
@app.get("/partials/rankings", response_class=HTMLResponse)
 def rankings_partial(request: Request, _: str = Depends(require_admin)) -> Response:
    """Render only ranking panels."""
    context = _dashboard_context(request)
    return templates.TemplateResponse(request, "partials/_rankings.html", context)
@app.get("/partials/chart", response_class=HTMLResponse)
 def chart_partial(request: Request, _: str = Depends(require_admin)) -> Response:
    """Render only the SVG chart panel."""
    context = _dashboard_context(request)
    return templates.TemplateResponse(request, "partials/_chart.html", context)
@app.get("/legislators", response_class=HTMLResponse)
 def legislators(request: Request, _: str = Depends(require_admin)) -> Response:
    """Render the legislator profile/search page."""
    context = _legislators_context(request)
    return templates.TemplateResponse(request, "legislators.html", context)
@app.get("/partials/legislator-suggestions", response_class=HTMLResponse)
 def legislator_suggestions_partial(
    request: Request, _: str = Depends(require_admin)
 ) -> Response:
    """Render legislator search suggestions for the HTMX typeahead."""
    query = request.query_params.get("q", "").strip()
    context: dict[str, Any] = {
        "q": query if len(query) >= 2 else "",
        "matches": [],
        "build_legislator_url": _build_legislator_url,
    }
    if len(query) >= 2:
        with session_scope() as session:
            context["matches"] = repository.search_legislators(
                session, query=query, limit=8
            )
    return templates.TemplateResponse(
        request, "partials/_legislator_suggestions.html", context
    )
@app.get("/compare", response_class=HTMLResponse)
 def compare(request: Request, _: str = Depends(require_admin)) -> Response:
    """Render the legislator radar comparison page."""
    context = _compare_context(request)
    return templates.TemplateResponse(request, "compare.html", context)
 def _dashboard_context(request: Request) -> dict[str, Any]:
    state = _parse_state(request)
    base_context: dict[str, Any] = {
        "state": state,
        "issues": state.issues,
        "selected_issue_label": " + ".join(state.issues) if state.issues else "",
        "chamber": state.chamber,
        "congress": state.congress,
        "latest_score_year": None,
        "last_updated": None,
        "suggestions": [],
        "rankings": RankingResult(supportive=[], opposed=[]),
        "compare": [],
        "chart_svg": render_score_history_svg([]),
        "chart_series": [],
        "has_votes": False,
        "has_scores": False,
        "empty_message": "",
        "build_url": _build_url,
        "toggle_compare": _toggle_compare,
    }
    with session_scope() as session:
        congress = state.congress or repository.latest_congress(session)
        base_context["congress"] = congress
        base_context["has_scores"] = repository.has_scores(session)
        base_context["latest_score_year"] = repository.latest_score_year(session)
        base_context["last_updated"] = repository.latest_vote_date(session, congress)
        base_context["suggestions"] = repository.issue_suggestions(
            session, congress=congress
        )
        if not base_context["has_scores"]:
            base_context["empty_message"] = (
                "No legislator scores are loaded yet. Run the score calculator first."
            )
            return base_context
        if congress is None:
            base_context["congress"] = "Computed"
        if not state.issues:
            base_context["empty_message"] = (
                "Choose one or more issue areas to calculate roll-call support scores."
            )
            return base_context
        rankings = repository.get_rankings(
            session,
            issues=state.issues,
            chamber=state.chamber,
            congress=congress,
        )
        base_context["rankings"] = rankings
        compare = state.compare or [row.legislator_id for row in rankings.supportive[:2]]
        base_context["compare"] = compare
        if not rankings.supportive and not rankings.opposed:
            base_context["empty_message"] = "No matching roll-call votes."
            return base_context
        history = repository.get_score_history(
            session,
            issues=state.issues,
            chamber=state.chamber,
            congress=congress,
            legislator_ids=compare,
        )
        base_context["chart_series"] = history
        base_context["chart_svg"] = render_score_history_svg(history)
    return base_context
 def _parse_state(request: Request) -> DashboardState:
    query = request.query_params
    chamber = query.get("chamber", "senate").lower()
    if chamber not in {"house", "senate", "all"}:
        chamber = "senate"
    congress = _parse_int(query.get("congress"))
    compare = [
        value
        for value in (_parse_int(raw) for raw in query.getlist("compare"))
        if value is not None
    ]
    return DashboardState(
        issues=normalize_issues(query.getlist("issues")),
        chamber=chamber,  # type: ignore[arg-type]
        congress=congress,
        compare=compare,
    )
 def _legislators_context(request: Request) -> dict[str, Any]:
    query = request.query_params.get("q", "").strip()
    legislator_id = _parse_int(request.query_params.get("legislator_id"))
    selected_topic = request.query_params.get("topic", "").strip()
    per_page = _parse_per_page(request.query_params.get("per_page"))
    page = max(_parse_int(request.query_params.get("page")) or 1, 1)
    base_context: dict[str, Any] = {
        "q": query,
        "profile": None,
        "matches": [],
        "result_count": 0,
        "page": page,
        "per_page": per_page,
        "per_page_options": [10, 25, 50],
        "total_pages": 1,
        "previous_page": None,
        "next_page": None,
        "selected_topic": selected_topic,
        "history_svg": render_score_history_svg([]),
        "history_series": [],
        "build_legislator_url": _build_legislator_url,
        "build_legislator_search_url": _build_legislator_search_url,
    }
    with session_scope() as session:
        result_count = repository.count_legislators(session, query=query) if query else 0
        total_pages = max((result_count + per_page - 1) // per_page, 1)
        if page > total_pages:
            page = total_pages
            base_context["page"] = page
        matches = (
            repository.search_legislators(
                session,
                query=query,
                limit=per_page,
                offset=(page - 1) * per_page,
            )
            if query
            else []
        )
        profile = repository.get_legislator_profile(
            session, legislator_id=legislator_id, query=None
        )
        base_context["profile"] = profile
        base_context["matches"] = matches
        base_context["result_count"] = result_count
        base_context["total_pages"] = total_pages
        base_context["previous_page"] = page - 1 if page > 1 else None
        base_context["next_page"] = page + 1 if page < total_pages else None
        if profile is None:
            return base_context
        if not selected_topic:
            if profile.bottom_topics:
                selected_topic = profile.bottom_topics[0].topic
            elif profile.top_topics:
                selected_topic = profile.top_topics[0].topic
        base_context["selected_topic"] = selected_topic
        if selected_topic:
            history = repository.get_single_legislator_history(
                session,
                legislator_id=profile.legislator.legislator_id,
                topic=selected_topic,
            )
            base_context["history_series"] = history
            base_context["history_svg"] = render_score_history_svg(history)
    return base_context
 def _compare_context(request: Request) -> dict[str, Any]:
    selected_legislators = _parse_int_list(
        request.query_params.getlist("legislator_id")
        or request.query_params.getlist("compare")
    )[:4]
    topics = normalize_issues(
        request.query_params.getlist("topic") or request.query_params.getlist("issues")
    )[:8]
    query = request.query_params.get("q", "").strip()
    base_context: dict[str, Any] = {
        "selected_legislators": selected_legislators,
        "selected_legislator_options": [],
        "topics": topics,
        "q": query,
        "series": [],
        "radar_svg": render_compare_radar_svg([], []),
        "legislator_options": [],
        "topic_options": [],
        "build_compare_url": _build_compare_url,
    }
    with session_scope() as session:
        default_legislators, default_topics = repository.get_compare_defaults(session)
        if not selected_legislators and not query:
            selected_legislators = default_legislators[:3]
        if not topics:
            topics = default_topics[:6]
        selected_legislator_options = repository.get_legislator_options(
            session, selected_legislators
        )
        series = repository.get_compare_radar_series(
            session, legislator_ids=selected_legislators, topics=topics
        )
        base_context.update(
            {
                "selected_legislators": selected_legislators,
                "selected_legislator_options": selected_legislator_options,
                "topics": topics,
                "q": query,
                "series": series,
                "radar_svg": render_compare_radar_svg(topics, series),
                "legislator_options": repository.search_legislators(
                    session, query=query or None, limit=12
                ),
                "topic_options": repository.issue_suggestions(
                    session, congress=None, limit=12
                ),
            }
        )
    return base_context
 def _parse_int(value: str | None) -> int | None:
    if value is None or value == "":
        return None
    try:
        return int(value)
    except ValueError:
        return None
 def _parse_int_list(values: list[str]) -> list[int]:
    parsed: list[int] = []
    seen: set[int] = set()
    for value in values:
        integer = _parse_int(value)
        if integer is not None and integer not in seen:
            parsed.append(integer)
            seen.add(integer)
    return parsed
 def _parse_per_page(value: str | None) -> int:
    parsed = _parse_int(value)
    return parsed if parsed in {10, 25, 50} else 10
 def _build_url(
    request: Request,
    *,
    issues: list[str] | None = None,
    chamber: str | None = None,
    congress: int | None = None,
    compare: list[int] | None = None,
 ) -> str:
    params: list[tuple[str, str]] = []
    chosen_issues = (
        issues
        if issues is not None
        else normalize_issues(request.query_params.getlist("issues"))
    )
    chosen_chamber = (
        chamber
        if chamber is not None
        else request.query_params.get("chamber", "senate")
    )
    chosen_congress = (
        congress
        if congress is not None
        else _parse_int(request.query_params.get("congress"))
    )
    chosen_compare = (
        compare
        if compare is not None
        else [
            value
            for value in (
                _parse_int(raw) for raw in request.query_params.getlist("compare")
            )
            if value is not None
        ]
    )
    for issue in chosen_issues:
        params.append(("issues", issue))
    params.append(("chamber", chosen_chamber))
    if chosen_congress is not None:
        params.append(("congress", str(chosen_congress)))
    for legislator_id in chosen_compare:
        params.append(("compare", str(legislator_id)))
    if not params:
        return "/"
    from urllib.parse import urlencode
    return f"/?{urlencode(params, doseq=True)}"
 def _toggle_compare(compare: list[int], legislator_id: int) -> list[int]:
    """Return compare IDs with the legislator added or removed."""
    if legislator_id in compare:
        return [value for value in compare if value != legislator_id]
    return [*compare, legislator_id]
 def _build_legislator_url(
    *,
    legislator_id: int | None = None,
    q: str | None = None,
    topic: str | None = None,
    per_page: int | None = None,
 ) -> str:
    from urllib.parse import urlencode
    params: list[tuple[str, str]] = []
    if legislator_id is not None:
        params.append(("legislator_id", str(legislator_id)))
    if q:
        params.append(("q", q))
    if topic:
        params.append(("topic", topic))
    if per_page in {10, 25, 50} and per_page != 10:
        params.append(("per_page", str(per_page)))
    return f"/legislators?{urlencode(params)}" if params else "/legislators"
 def _build_legislator_search_url(
    *,
    q: str,
    per_page: int,
    page: int = 1,
 ) -> str:
    from urllib.parse import urlencode
    params: list[tuple[str, str]] = []
    if q:
        params.append(("q", q))
    params.append(("per_page", str(per_page)))
    if page > 1:
        params.append(("page", str(page)))
    return f"/legislators?{urlencode(params)}" if params else "/legislators"
 def _build_compare_url(
    *,
    legislator_ids: list[int],
    topics: list[str],
    q: str | None = None,
 ) -> str:
    from urllib.parse import urlencode
    params: list[tuple[str, str]] = []
    for legislator_id in legislator_ids[:4]:
        params.append(("legislator_id", str(legislator_id)))
    for topic in topics[:8]:
        params.append(("topic", topic))
    if q:
        params.append(("q", q))
    return f"/compare?{urlencode(params, doseq=True)}" if params else "/compare"
@@ -1,670 +0,0 @@
 """Congress database queries for the web dashboard."""
 from __future__ import annotations
 from dataclasses import dataclass
 from datetime import date
 from typing import Literal
 from sqlalchemy import ColumnElement, Select, case, desc, false, func, or_, select, true
 from sqlalchemy.orm import Session
 from pipelines.orm.data_science_dev.congress import (
    BillTopic,
    Legislator,
    LegislatorScore,
    Vote,
 )
 from pipelines.web.scoring import normalize_issues
 Chamber = Literal["house", "senate", "all"]
 STATE_ALIASES = {
    "alabama": "AL",
    "alaska": "AK",
    "arizona": "AZ",
    "arkansas": "AR",
    "california": "CA",
    "colorado": "CO",
    "connecticut": "CT",
    "delaware": "DE",
    "florida": "FL",
    "georgia": "GA",
    "hawaii": "HI",
    "idaho": "ID",
    "illinois": "IL",
    "indiana": "IN",
    "iowa": "IA",
    "kansas": "KS",
    "kentucky": "KY",
    "louisiana": "LA",
    "maine": "ME",
    "maryland": "MD",
    "massachusetts": "MA",
    "michigan": "MI",
    "minnesota": "MN",
    "mississippi": "MS",
    "missouri": "MO",
    "montana": "MT",
    "nebraska": "NE",
    "nevada": "NV",
    "new hampshire": "NH",
    "new jersey": "NJ",
    "new mexico": "NM",
    "new york": "NY",
    "north carolina": "NC",
    "north dakota": "ND",
    "ohio": "OH",
    "oklahoma": "OK",
    "oregon": "OR",
    "pennsylvania": "PA",
    "rhode island": "RI",
    "south carolina": "SC",
    "south dakota": "SD",
    "tennessee": "TN",
    "texas": "TX",
    "utah": "UT",
    "vermont": "VT",
    "virginia": "VA",
    "washington": "WA",
    "west virginia": "WV",
    "wisconsin": "WI",
    "wyoming": "WY",
    "district of columbia": "DC",
 }
@dataclass(frozen=True)
 class RankingRow:
    """A legislator support score row."""
    legislator_id: int
    display_name: str
    party: str | None
    state: str | None
    chamber: str | None
    score: float | None
    supportive: int
    opposed: int
    @property
    def total(self) -> int:
        return self.supportive + self.opposed
@dataclass(frozen=True)
 class RankingResult:
    """Supportive and opposed ranking lists."""
    supportive: list[RankingRow]
    opposed: list[RankingRow]
@dataclass(frozen=True)
 class TimePoint:
    """One yearly chart point."""
    year: int
    score: float
@dataclass(frozen=True)
 class ChartSeries:
    """One legislator score-history series."""
    legislator_id: int
    label: str
    party: str | None
    state: str | None
    points: list[TimePoint]
@dataclass(frozen=True)
 class TopicScore:
    """Average score for one topic."""
    topic: str
    score: float
    count: int
@dataclass(frozen=True)
 class LegislatorOption:
    """Compact legislator metadata for search and comparison controls."""
    legislator_id: int
    display_name: str
    party: str | None
    state: str | None
    chamber: str | None
@dataclass(frozen=True)
 class LegislatorProfile:
    """Legislator metadata plus issue score summary."""
    legislator: LegislatorOption
    overall_score: float | None
    serving_since: int | None
    top_topics: list[TopicScore]
    bottom_topics: list[TopicScore]
@dataclass(frozen=True)
 class RadarSeries:
    """One legislator polygon for the compare radar chart."""
    legislator: LegislatorOption
    average_score: float | None
    scores_by_topic: dict[str, float]
 def latest_congress(session: Session) -> int | None:
    """Return the latest congress number in the vote table."""
    return session.scalar(select(func.max(Vote.congress)))
 def latest_vote_date(session: Session, congress: int | None = None) -> date | None:
    """Return the most recent vote date, optionally scoped to a congress."""
    stmt = select(func.max(Vote.vote_date))
    if congress is not None:
        stmt = stmt.where(Vote.congress == congress)
    return session.scalar(stmt)
 def latest_score_year(session: Session) -> int | None:
    """Return the latest year in the precomputed legislator score table."""
    return session.scalar(select(func.max(LegislatorScore.year)))
 def has_scores(session: Session) -> bool:
    """Return True when the database has at least one precomputed score."""
    return session.scalar(select(LegislatorScore.id).limit(1)) is not None
 def issue_suggestions(
    session: Session,
    *,
    congress: int | None,
    limit: int = 12,
 ) -> list[str]:
    """Return common precomputed score topics for issue filter suggestions."""
    stmt = (
        select(LegislatorScore.topic, func.count(LegislatorScore.id).label("score_count"))
        .where(LegislatorScore.topic != "")
        .group_by(LegislatorScore.topic)
        .order_by(desc("score_count"), LegislatorScore.topic)
        .limit(limit)
    )
    suggestions = [row[0] for row in session.execute(stmt).all()]
    if suggestions:
        return suggestions
    fallback = (
        select(BillTopic.topic, func.count(BillTopic.id).label("topic_count"))
        .where(BillTopic.topic != "")
        .group_by(BillTopic.topic)
        .order_by(desc("topic_count"), BillTopic.topic)
        .limit(limit)
    )
    return [row[0] for row in session.execute(fallback).all()]
 def ranking_query(
    *,
    issues: list[str],
    chamber: Chamber,
    congress: int,
 ) -> Select:
    """Build the aggregate ranking query from precomputed scores."""
    average_score = func.avg(LegislatorScore.score).label("score")
    supportive = func.sum(case((LegislatorScore.score >= 50, 1), else_=0)).label(
        "supportive"
    )
    opposed = func.sum(case((LegislatorScore.score < 50, 1), else_=0)).label("opposed")
    stmt = (
        select(
            Legislator.id,
            Legislator.official_full_name,
            Legislator.last_name,
            Legislator.current_party,
            Legislator.current_state,
            Legislator.current_chamber,
            average_score,
            supportive,
            opposed,
        )
        .join(LegislatorScore, LegislatorScore.legislator_id == Legislator.id)
        .where(_score_topic_match_condition(issues))
        .group_by(
            Legislator.id,
            Legislator.official_full_name,
            Legislator.last_name,
            Legislator.current_party,
            Legislator.current_state,
            Legislator.current_chamber,
        )
    )
    if chamber != "all":
        stmt = stmt.where(Legislator.current_chamber == _db_chamber(chamber))
    return stmt
 def get_rankings(
    session: Session,
    *,
    issues: list[str],
    chamber: Chamber,
    congress: int,
    limit: int = 10,
 ) -> RankingResult:
    """Return top supportive and opposed legislators from precomputed scores."""
    rows = [
        _ranking_row(row)
        for row in session.execute(
            ranking_query(issues=issues, chamber=chamber, congress=congress)
        )
    ]
    scored = [row for row in rows if row.score is not None]
    supportive = sorted(
        scored, key=lambda row: (-float(row.score), -row.total, row.display_name)
    )[:limit]
    opposed = sorted(
        scored, key=lambda row: (float(row.score), -row.total, row.display_name)
    )[:limit]
    return RankingResult(supportive=supportive, opposed=opposed)
 def get_score_history(
    session: Session,
    *,
    issues: list[str],
    chamber: Chamber,
    congress: int,
    legislator_ids: list[int],
 ) -> list[ChartSeries]:
    """Return yearly score history from precomputed scores."""
    if not legislator_ids:
        return []
    average_score = func.avg(LegislatorScore.score).label("score")
    stmt = (
        select(
            Legislator.id,
            Legislator.official_full_name,
            Legislator.last_name,
            Legislator.current_party,
            Legislator.current_state,
            LegislatorScore.year,
            average_score,
        )
        .join(LegislatorScore, LegislatorScore.legislator_id == Legislator.id)
        .where(
            Legislator.id.in_(legislator_ids),
            _score_topic_match_condition(issues),
        )
        .group_by(
            Legislator.id,
            Legislator.official_full_name,
            Legislator.last_name,
            Legislator.current_party,
            Legislator.current_state,
            LegislatorScore.year,
        )
        .order_by(Legislator.id, LegislatorScore.year)
    )
    if chamber != "all":
        stmt = stmt.where(Legislator.current_chamber == _db_chamber(chamber))
    by_legislator: dict[int, ChartSeries] = {}
    for row in session.execute(stmt):
        if row.score is None:
            continue
        series = by_legislator.setdefault(
            row.id,
            ChartSeries(
                legislator_id=row.id,
                label=_display_name(row.official_full_name, row.last_name),
                party=row.current_party,
                state=row.current_state,
                points=[],
            ),
        )
        series.points.append(TimePoint(year=int(row.year), score=float(row.score)))
    return list(by_legislator.values())
 def _ranking_row(row: object) -> RankingRow:
    return RankingRow(
        legislator_id=row.id,
        display_name=_display_name(row.official_full_name, row.last_name),
        party=row.current_party,
        state=row.current_state,
        chamber=row.current_chamber,
        score=float(row.score) if row.score is not None else None,
        supportive=row.supportive or 0,
        opposed=row.opposed or 0,
    )
 def _score_topic_match_condition(
    issues: list[str] | tuple[str, ...],
 ) -> ColumnElement[bool]:
    normalized = normalize_issues(list(issues))
    if not normalized:
        return false()
    return or_(*(LegislatorScore.topic.ilike(f"%{issue}%") for issue in normalized))
 def search_legislators(
    session: Session,
    *,
    query: str | None = None,
    limit: int = 12,
    offset: int = 0,
 ) -> list[LegislatorOption]:
    """Search ingested legislators, preferring those with computed scores."""
    return [
        _legislator_option(row)
        for row in session.execute(
            legislator_search_query(query=query, limit=limit, offset=offset)
        )
    ]
 def count_legislators(session: Session, *, query: str | None = None) -> int:
    """Return the total number of legislators matching a search query."""
    return int(
        session.scalar(
            select(func.count(Legislator.id)).where(_legislator_search_condition(query))
        )
        or 0
    )
 def get_legislator_options(
    session: Session, legislator_ids: list[int]
 ) -> list[LegislatorOption]:
    """Return legislator options in the same order as the selected IDs."""
    options = {
        option.legislator_id: option
        for option in (
            _get_legislator_option(session, legislator_id)
            for legislator_id in legislator_ids
        )
        if option is not None
    }
    return [
        options[legislator_id]
        for legislator_id in legislator_ids
        if legislator_id in options
    ]
 def legislator_search_query(
    *,
    query: str | None = None,
    limit: int = 12,
    offset: int = 0,
 ) -> Select:
    """Build the legislator search query used by profile and compare controls."""
    score_count = func.count(LegislatorScore.id).label("score_count")
    stmt = (
        select(
            Legislator.id,
            Legislator.official_full_name,
            Legislator.last_name,
            Legislator.current_party,
            Legislator.current_state,
            Legislator.current_chamber,
            score_count,
        )
        .outerjoin(LegislatorScore, LegislatorScore.legislator_id == Legislator.id)
        .group_by(
            Legislator.id,
            Legislator.official_full_name,
            Legislator.first_name,
            Legislator.last_name,
            Legislator.current_party,
            Legislator.current_state,
            Legislator.current_chamber,
            Legislator.bioguide_id,
        )
        .order_by(desc("score_count"), Legislator.last_name, Legislator.first_name)
        .limit(limit)
        .offset(offset)
    )
    return stmt.where(_legislator_search_condition(query))
 def _legislator_search_condition(query: str | None) -> ColumnElement[bool]:
    cleaned_query = query.strip() if query else ""
    if not cleaned_query:
        return true()
    pattern = f"%{cleaned_query}%"
    state_alias = _state_alias(cleaned_query)
    conditions: list[ColumnElement[bool]] = [
        Legislator.official_full_name.ilike(pattern),
        Legislator.first_name.ilike(pattern),
        Legislator.last_name.ilike(pattern),
        Legislator.current_state.ilike(pattern),
        Legislator.bioguide_id.ilike(pattern),
    ]
    if state_alias is not None:
        conditions.append(Legislator.current_state == state_alias)
    return or_(*conditions)
 def _state_alias(query: str) -> str | None:
    normalized = " ".join(query.lower().replace(".", "").split())
    if len(normalized) == 2 and normalized.isalpha():
        return normalized.upper()
    return STATE_ALIASES.get(normalized)
 def get_legislator_profile(
    session: Session,
    *,
    legislator_id: int | None = None,
    query: str | None = None,
 ) -> LegislatorProfile | None:
    """Return the selected legislator profile and top/bottom topic scores."""
    selected = _get_legislator_option(session, legislator_id)
    cleaned_query = query.strip() if query else ""
    if selected is None and cleaned_query:
        matches = search_legislators(session, query=query, limit=1)
        selected = matches[0] if matches else None
    if selected is None:
        return None
    topic_scores = get_legislator_topic_scores(
        session, legislator_id=selected.legislator_id
    )
    top_topics = sorted(topic_scores, key=lambda item: (-item.score, item.topic))[:3]
    bottom_topics = sorted(topic_scores, key=lambda item: (item.score, item.topic))[:3]
    overall_score = session.scalar(
        select(func.avg(LegislatorScore.score)).where(
            LegislatorScore.legislator_id == selected.legislator_id
        )
    )
    serving_since = session.scalar(
        select(func.min(LegislatorScore.year)).where(
            LegislatorScore.legislator_id == selected.legislator_id
        )
    )
    return LegislatorProfile(
        legislator=selected,
        overall_score=float(overall_score) if overall_score is not None else None,
        serving_since=int(serving_since) if serving_since is not None else None,
        top_topics=top_topics,
        bottom_topics=bottom_topics,
    )
 def get_legislator_topic_scores(
    session: Session,
    *,
    legislator_id: int,
 ) -> list[TopicScore]:
    """Return all average topic scores for one legislator."""
    rows = session.execute(
        select(
            LegislatorScore.topic,
            func.avg(LegislatorScore.score).label("score"),
            func.count(LegislatorScore.id).label("count"),
        )
        .where(LegislatorScore.legislator_id == legislator_id)
        .group_by(LegislatorScore.topic)
        .order_by(LegislatorScore.topic)
    )
    return [
        TopicScore(topic=row.topic, score=float(row.score), count=row.count)
        for row in rows
        if row.score is not None
    ]
 def get_single_legislator_history(
    session: Session,
    *,
    legislator_id: int,
    topic: str,
 ) -> list[ChartSeries]:
    """Return score history for one legislator/topic pair."""
    option = _get_legislator_option(session, legislator_id)
    if option is None:
        return []
    rows = session.execute(
        select(
            LegislatorScore.year,
            func.avg(LegislatorScore.score).label("score"),
        )
        .where(
            LegislatorScore.legislator_id == legislator_id,
            LegislatorScore.topic == topic,
        )
        .group_by(LegislatorScore.year)
        .order_by(LegislatorScore.year)
    )
    points = [
        TimePoint(year=int(row.year), score=float(row.score))
        for row in rows
        if row.score is not None
    ]
    return [
        ChartSeries(
            legislator_id=option.legislator_id,
            label=option.display_name,
            party=option.party,
            state=option.state,
            points=points,
        )
    ]
 def get_compare_defaults(session: Session) -> tuple[list[int], list[str]]:
    """Return default compare legislators and topics."""
    legislators = search_legislators(session, limit=3)
    topics = issue_suggestions(session, congress=None, limit=6)
    return [item.legislator_id for item in legislators], topics
 def get_compare_radar_series(
    session: Session,
    *,
    legislator_ids: list[int],
    topics: list[str],
 ) -> list[RadarSeries]:
    """Return radar chart scores for selected legislators and topics."""
    if not legislator_ids:
        return []
    options = {
        option.legislator_id: option
        for option in (
            _get_legislator_option(session, legislator_id)
            for legislator_id in legislator_ids
        )
        if option is not None
    }
    if not options:
        return []
    scores: dict[int, dict[str, float]] = {
        legislator_id: {} for legislator_id in options
    }
    if topics:
        rows = session.execute(
            select(
                LegislatorScore.legislator_id,
                LegislatorScore.topic,
                func.avg(LegislatorScore.score).label("score"),
            )
            .where(
                LegislatorScore.legislator_id.in_(list(options)),
                LegislatorScore.topic.in_(topics),
            )
            .group_by(LegislatorScore.legislator_id, LegislatorScore.topic)
        )
        for row in rows:
            scores[row.legislator_id][row.topic] = float(row.score)
    series: list[RadarSeries] = []
    for legislator_id in legislator_ids:
        option = options.get(legislator_id)
        if option is None:
            continue
        topic_scores = scores.get(legislator_id, {})
        values = list(topic_scores.values())
        series.append(
            RadarSeries(
                legislator=option,
                average_score=sum(values) / len(values) if values else None,
                scores_by_topic=topic_scores,
            )
        )
    return series
 def _display_name(official_full_name: str | None, last_name: str | None) -> str:
    if official_full_name:
        parts = official_full_name.split()
        if len(parts) > 1:
            return f"{parts[-1]}, {' '.join(parts[:-1])}"
        return official_full_name
    return last_name or "Unknown"
 def _legislator_option(row: object) -> LegislatorOption:
    return LegislatorOption(
        legislator_id=row.id,
        display_name=_display_name(row.official_full_name, row.last_name),
        party=row.current_party,
        state=row.current_state,
        chamber=row.current_chamber,
    )
 def _get_legislator_option(
    session: Session, legislator_id: int | None
 ) -> LegislatorOption | None:
    if legislator_id is None:
        return None
    row = session.execute(
        select(
            Legislator.id,
            Legislator.official_full_name,
            Legislator.last_name,
            Legislator.current_party,
            Legislator.current_state,
            Legislator.current_chamber,
        ).where(Legislator.id == legislator_id)
    ).first()
    return _legislator_option(row) if row is not None else None
 def _db_chamber(chamber: Chamber) -> str:
    return {"house": "House", "senate": "Senate", "all": "all"}[chamber]
@@ -1,100 +0,0 @@
 """Issue matching and voting score helpers."""
 from __future__ import annotations
 from dataclasses import dataclass
 from sqlalchemy import ColumnElement, false, func, or_
 from sqlalchemy.sql.elements import BinaryExpression
 from pipelines.orm.data_science_dev.congress import Bill, BillTopicPosition, Vote
 SUPPORT_POSITIONS = frozenset({"yea", "aye", "yes"})
 OPPOSE_POSITIONS = frozenset({"nay", "no"})
@dataclass(frozen=True)
 class ScoreCounts:
    """Support/opposition counts for one legislator or time bucket."""
    supportive: int
    opposed: int
    @property
    def total(self) -> int:
        return self.supportive + self.opposed
 def normalize_position(position: str | None) -> str | None:
    """Normalize a raw roll-call position into support/oppose/ignore buckets."""
    if position is None:
        return None
    value = position.strip().lower()
    if value in SUPPORT_POSITIONS:
        return "support"
    if value in OPPOSE_POSITIONS:
        return "oppose"
    return None
 def score_vote_position(
    position: str | None,
    support_position: BillTopicPosition | str,
 ) -> str | None:
    """Score a raw vote as support/opposition for an extracted bill topic."""
    normalized_vote = normalize_position(position)
    if normalized_vote is None:
        return None
    topic_position = BillTopicPosition(support_position)
    if topic_position is BillTopicPosition.FOR:
        return normalized_vote
    if normalized_vote == "support":
        return "oppose"
    return "support"
 def calculate_score(counts: ScoreCounts) -> int | None:
    """Calculate the 0-100 support score, or None when there are no scored votes."""
    if counts.total == 0:
        return None
    return round(100 * counts.supportive / counts.total)
 def normalize_issues(issues: list[str] | tuple[str, ...]) -> list[str]:
    """Trim, de-duplicate, and preserve issue order for display and queries."""
    normalized: list[str] = []
    seen: set[str] = set()
    for issue in issues:
        value = issue.strip()
        key = value.casefold()
        if value and key not in seen:
            normalized.append(value)
            seen.add(key)
    return normalized
 def issue_match_condition(issues: list[str] | tuple[str, ...]) -> ColumnElement[bool]:
    """Build the SQLAlchemy condition for issue text matching."""
    normalized = normalize_issues(list(issues))
    if not normalized:
        return false()
    fields: tuple[ColumnElement[str | None], ...] = (
        Bill.subjects_top_term,
        Bill.title,
        Bill.title_short,
        Bill.official_title,
        Vote.question,
        Vote.result_text,
    )
    terms: list[BinaryExpression[bool]] = []
    for issue in normalized:
        pattern = f"%{issue}%"
        terms.extend(field.ilike(pattern) for field in fields)
    return or_(*terms)
 def normalized_position_expression(column: ColumnElement[str]) -> ColumnElement[str | None]:
    """Lowercase and trim a SQL column containing raw vote positions."""
    return func.lower(func.trim(column))
@@ -1,231 +0,0 @@
 """Inline SVG rendering helpers."""
 from __future__ import annotations
 from html import escape
 from math import cos, pi, sin
 from pipelines.web.repository import ChartSeries, RadarSeries
 SERIES_STYLES = (
    {
        "color": "#009e73",
        "dasharray": None,
        "marker": "circle",
    },
    {
        "color": "#0072b2",
        "dasharray": "10 6",
        "marker": "square",
    },
    {
        "color": "#e69f00",
        "dasharray": "4 5",
        "marker": "diamond",
    },
    {
        "color": "#cc79a7",
        "dasharray": "14 5 3 5",
        "marker": "triangle",
    },
 )
 def render_score_history_svg(series: list[ChartSeries]) -> str:
    """Render a responsive inline SVG score history chart."""
    width = 880
    height = 300
    margin_left = 70
    margin_right = 28
    margin_top = 24
    margin_bottom = 48
    plot_width = width - margin_left - margin_right
    plot_height = height - margin_top - margin_bottom
    all_years = sorted({point.year for item in series for point in item.points})
    if not all_years:
        return _empty_svg(width, height, "No score history for this selection")
    min_year = min(all_years)
    max_year = max(all_years)
    year_span = max(max_year - min_year, 1)
    def x_for(year: int) -> float:
        return margin_left + ((year - min_year) / year_span) * plot_width
    def y_for(score: int) -> float:
        return margin_top + ((100 - score) / 100) * plot_height
    parts: list[str] = [
        f'<svg viewBox="0 0 {width} {height}" role="img" aria-label="Score history chart" class="score-chart">',
        '<rect width="100%" height="100%" fill="transparent" />',
    ]
    for score in (0, 25, 50, 75, 100):
        y = y_for(score)
        parts.append(
            f'<line x1="{margin_left}" y1="{y:.2f}" x2="{width - margin_right}" y2="{y:.2f}" class="chart-grid" />'
        )
        parts.append(
            f'<text x="{margin_left - 16}" y="{y + 4:.2f}" text-anchor="end" class="chart-axis-label">{score}</text>'
        )
    tick_years = _tick_years(all_years)
    for year in tick_years:
        x = x_for(year)
        parts.append(
            f'<line x1="{x:.2f}" y1="{margin_top}" x2="{x:.2f}" y2="{height - margin_bottom}" class="chart-year-line" />'
        )
        parts.append(
            f'<text x="{x:.2f}" y="{height - 18}" text-anchor="middle" class="chart-axis-label">{year}</text>'
        )
    parts.append(
        f'<line x1="{margin_left}" y1="{height - margin_bottom}" x2="{width - margin_right}" y2="{height - margin_bottom}" class="chart-axis" />'
    )
    parts.append(
        f'<line x1="{margin_left}" y1="{margin_top}" x2="{margin_left}" y2="{height - margin_bottom}" class="chart-axis" />'
    )
    for index, item in enumerate(series):
        points = sorted(item.points, key=lambda point: point.year)
        if not points:
            continue
        style = SERIES_STYLES[index % len(SERIES_STYLES)]
        color = style["color"]
        path = " ".join(
            f"{'M' if point_index == 0 else 'L'} {x_for(point.year):.2f} {y_for(point.score):.2f}"
            for point_index, point in enumerate(points)
        )
        label = escape(item.label)
        dash_attr = (
            f' stroke-dasharray="{style["dasharray"]}"'
            if style["dasharray"]
            else ""
        )
        parts.append(
            f'<path d="{path}" fill="none" stroke="{color}" stroke-width="3.5" stroke-linecap="round" stroke-linejoin="round"{dash_attr}>'
            f"<title>{label}</title></path>"
        )
        for point in points:
            parts.append(
                _point_marker(
                    marker=style["marker"],
                    x=x_for(point.year),
                    y=y_for(point.score),
                    color=color,
                    label=f"{label}: {point.score:.0f} in {point.year}",
                )
            )
        last = points[-1]
        parts.append(
            f'<text x="{x_for(last.year) - 10:.2f}" y="{y_for(last.score) + 4:.2f}" text-anchor="end" class="chart-series-label" fill="{color}">'
            f"{last.score:.0f}</text>"
        )
    parts.append("</svg>")
    return "".join(parts)
 def _empty_svg(width: int, height: int, message: str) -> str:
    return (
        f'<svg viewBox="0 0 {width} {height}" role="img" aria-label="{escape(message)}" class="score-chart">'
        '<rect width="100%" height="100%" fill="transparent" />'
        f'<text x="{width / 2}" y="{height / 2}" text-anchor="middle" class="chart-empty">{escape(message)}</text>'
        "</svg>"
    )
 def _tick_years(years: list[int]) -> list[int]:
    first = years[0]
    last = years[-1]
    start = first - (first % 5)
    tick_years = {year for year in range(start, last + 1, 5) if first <= year <= last}
    tick_years.add(first)
    tick_years.add(last)
    return sorted(tick_years)
 def render_compare_radar_svg(topics: list[str], series: list[RadarSeries]) -> str:
    """Render a server-side radar chart for legislator comparison."""
    width = 720
    height = 560
    center_x = 285
    center_y = 280
    radius = 200
    if len(topics) < 3 or not series:
        return _empty_svg(width, height, "Choose at least 3 axes and 1 legislator")
    axis_count = len(topics)
    def point_for(index: int, score: float) -> tuple[float, float]:
        angle = -pi / 2 + (2 * pi * index / axis_count)
        distance = radius * max(0, min(score, 100)) / 100
        return center_x + cos(angle) * distance, center_y + sin(angle) * distance
    def ring_points(score: float) -> str:
        return " ".join(
            f"{point_for(index, score)[0]:.2f},{point_for(index, score)[1]:.2f}"
            for index in range(axis_count)
        )
    parts: list[str] = [
        f'<svg viewBox="0 0 {width} {height}" role="img" aria-label="Compare legislators radar chart" class="radar-chart">',
        '<rect width="100%" height="100%" fill="transparent" />',
    ]
    for ring in (25, 50, 75, 100):
        parts.append(f'<polygon points="{ring_points(ring)}" class="radar-ring" />')
    for index, topic in enumerate(topics):
        outer_x, outer_y = point_for(index, 100)
        label_x, label_y = point_for(index, 113)
        parts.append(
            f'<line x1="{center_x}" y1="{center_y}" x2="{outer_x:.2f}" y2="{outer_y:.2f}" class="radar-axis" />'
        )
        anchor = "middle"
        if label_x < center_x - 24:
            anchor = "end"
        elif label_x > center_x + 24:
            anchor = "start"
        parts.append(
            f'<text x="{label_x:.2f}" y="{label_y:.2f}" text-anchor="{anchor}" class="radar-label">{escape(topic)}</text>'
        )
    for index, item in enumerate(series):
        color = SERIES_STYLES[index % len(SERIES_STYLES)]["color"]
        points = " ".join(
            f"{point_for(topic_index, item.scores_by_topic.get(topic, 50.0))[0]:.2f},"
            f"{point_for(topic_index, item.scores_by_topic.get(topic, 50.0))[1]:.2f}"
            for topic_index, topic in enumerate(topics)
        )
        label = escape(item.legislator.display_name)
        parts.append(
            f'<polygon points="{points}" fill="{color}" fill-opacity="0.14" stroke="{color}" stroke-width="3" class="radar-series">'
            f"<title>{label}</title></polygon>"
        )
    parts.append("</svg>")
    return "".join(parts)
 def _point_marker(*, marker: str, x: float, y: float, color: str, label: str) -> str:
    title = f"<title>{escape(label)}</title>"
    if marker == "square":
        return (
            f'<rect x="{x - 4.25:.2f}" y="{y - 4.25:.2f}" width="8.5" height="8.5" '
            f'fill="{color}" rx="1.5" ry="1.5">{title}</rect>'
        )
    if marker == "diamond":
        points = (
            f"{x:.2f},{y - 5.2:.2f} "
            f"{x + 5.2:.2f},{y:.2f} "
            f"{x:.2f},{y + 5.2:.2f} "
            f"{x - 5.2:.2f},{y:.2f}"
        )
        return f'<polygon points="{points}" fill="{color}">{title}</polygon>'
    if marker == "triangle":
        points = (
            f"{x:.2f},{y - 5.5:.2f} "
            f"{x + 5.5:.2f},{y + 4.5:.2f} "
            f"{x - 5.5:.2f},{y + 4.5:.2f}"
        )
        return f'<polygon points="{points}" fill="{color}">{title}</polygon>'
    return f'<circle cx="{x:.2f}" cy="{y:.2f}" r="4.5" fill="{color}">{title}</circle>'
@@ -1,40 +0,0 @@
 <!doctype html>
 <html lang="en">
  <head>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>{% block title %}Nornsight{% endblock %}</title>
    <link rel="stylesheet" href="{{ url_for('static', path='styles.css') }}">
    <script src="{{ url_for('static', path='vendor/htmx.min.js') }}" defer></script>
  </head>
  <body>
    <header class="topbar">
      <a class="brand" href="/">
        <span class="brand-mark">N</span>
        <span>Nornsight</span>
      </a>
      {% if show_primary_nav|default(true) %}
      <nav class="primary-nav" aria-label="Primary">
        <a href="/">Issues</a>
        <a href="/legislators">Legislators</a>
        <a href="/compare">Compare</a>
      </nav>
      {% endif %}
      <nav class="account-nav" aria-label="Account">
        <a href="#" aria-disabled="true">Help</a>
        {% if is_authenticated|default(true) %}
        <details class="account-menu">
          <summary>My account</summary>
          <div class="account-menu-panel">
            <a href="#" aria-disabled="true">Account settings</a>
            <a class="sign-out" href="/logout">Sign out</a>
          </div>
        </details>
        {% else %}
        <a class="sign-in" href="/login">Sign in</a>
        {% endif %}
      </nav>
    </header>
    {% block body %}{% endblock %}
  </body>
 </html>
@@ -1,87 +0,0 @@
 {% extends "base.html" %}
 {% block title %}Compare Legislators{% endblock %}
 {% block body %}
  <main class="shell">
    <section class="page-heading stacked-heading">
      <div>
        <h1>Compare legislators</h1>
        <p>Up to 4 legislators · up to 8 issue axes · each polygon = one legislator's full issue profile</p>
      </div>
    </section>
    <section class="compare-controls">
      <form class="wide-search compare-search" action="/compare" method="get">
        <label class="sr-only" for="compare-legislator-search">Search legislators</label>
        {% for legislator_id in selected_legislators %}
          <input type="hidden" name="legislator_id" value="{{ legislator_id }}">
        {% endfor %}
        {% for topic in topics %}
          <input type="hidden" name="topic" value="{{ topic }}">
        {% endfor %}
        <input
          id="compare-legislator-search"
          type="search"
          name="q"
          value="{{ q }}"
          placeholder="Search legislators to add"
          autocomplete="off">
        <button type="submit">Search</button>
      </form>
      <h2>Legislators ({{ selected_legislator_options|length }} / 4)</h2>
      <div class="result-chips">
        {% for legislator in selected_legislator_options %}
          {% set without = selected_legislators | reject('equalto', legislator.legislator_id) | list %}
          <a href="{{ build_compare_url(legislator_ids=without, topics=topics, q=q) }}"><span class="legend-dot dot-{{ loop.index0 }}"></span>{{ legislator.display_name }}{% if legislator.state %} — {{ legislator.state }}{% endif %} ×</a>
        {% endfor %}
        {% if selected_legislator_options|length < 4 %}
          {% for option in legislator_options %}
            {% if option.legislator_id not in selected_legislators %}
              <a class="dashed-chip" href="{{ build_compare_url(legislator_ids=selected_legislators + [option.legislator_id], topics=topics, q=q) }}">+ {{ option.display_name }}</a>
            {% endif %}
          {% endfor %}
        {% endif %}
      </div>
      <h2>Issue axes ({{ topics|length }} / 8)</h2>
      <div class="axis-chips">
        {% for topic in topics %}
          {% set without_topic = topics[:loop.index0] + topics[loop.index:] %}
          <a href="{{ build_compare_url(legislator_ids=selected_legislators, topics=without_topic, q=q) }}">{{ topic }} ×</a>
        {% endfor %}
        {% if topics|length < 8 %}
          {% for topic in topic_options %}
            {% if topic not in topics %}
              <a href="{{ build_compare_url(legislator_ids=selected_legislators, topics=topics + [topic], q=q) }}">{{ topic }}</a>
            {% endif %}
          {% endfor %}
        {% endif %}
      </div>
    </section>
    <section class="compare-card">
      <div class="radar-frame">{{ radar_svg | safe }}</div>
      <aside class="compare-legend">
        <h2>Legend</h2>
        {% for item in series %}
          <div class="legend-row">
            <span class="legend-line line-{{ loop.index0 }}"></span>
            <div>
              <strong>{{ item.legislator.display_name }}</strong>
              <small>{{ item.legislator.state or "US" }} · {{ item.legislator.party or "—" }} · avg {{ item.average_score|round(0) if item.average_score is not none else "—" }}</small>
            </div>
          </div>
        {% endfor %}
        <p>Outer ring = 100% support. Each axis is scored independently against full roll-call record.</p>
        <p><em>Max 4 legislators · max 8 axes</em></p>
      </aside>
    </section>
  </main>
  <footer class="footer">
    <span>Actual record, not rhetoric</span>
    <span>Source: congressional roll-call votes</span>
    <span>Not affiliated with any political party or organization</span>
  </footer>
 {% endblock %}
@@ -1,30 +0,0 @@
 {% extends "base.html" %}
 {% block title %}Legislative Accountability{% endblock %}
 {% block body %}
  <main class="shell">
    <section class="page-heading">
      <div>
        <h1>Legislative accountability</h1>
        <p>US legislative accountability · precomputed legislator topic scores{% if latest_score_year %} through {{ latest_score_year }}{% endif %}</p>
      </div>
      <div class="heading-actions">
        <a href="#" aria-disabled="true">Methodology</a>
        <a href="#" aria-disabled="true">Data sources</a>
        <span>Last updated: {{ last_updated.strftime("%b %Y") if last_updated else "Unavailable" }}</span>
      </div>
    </section>
    <div class="notice">Choose one or more score topics, then select lawmakers to compare computed records over time.</div>
    <div id="dashboard-body">
      {% include "partials/_dashboard.html" %}
    </div>
  </main>
  <footer class="footer">
    <span>Actual record, not rhetoric</span>
    <span>Source: congressional roll-call votes</span>
    <span>Not affiliated with any political party or organization</span>
  </footer>
 {% endblock %}
@@ -1,148 +0,0 @@
 {% extends "base.html" %}
 {% block title %}Legislator Profiles{% endblock %}
 {% block body %}
  <main class="shell">
    <section class="page-heading stacked-heading">
      <div>
        <h1>Legislator profiles</h1>
        <p>Full issue taxonomy · search any current legislator</p>
      </div>
    </section>
    <form class="wide-search legislator-search-form" action="/legislators" method="get">
      <label class="sr-only" for="legislator-search">Search legislators</label>
      <input
        id="legislator-search"
        type="search"
        name="q"
        value="{{ q }}"
        placeholder="Search by name or state"
        autocomplete="off"
        hx-get="/partials/legislator-suggestions"
        hx-trigger="input changed delay:200ms, search"
        hx-target="#legislator-suggestions"
        hx-swap="innerHTML">
      <label class="sr-only" for="legislator-page-size">Results per page</label>
      <select id="legislator-page-size" name="per_page" aria-label="Results per page">
        {% for option in per_page_options %}
          <option value="{{ option }}" {{ "selected" if option == per_page else "" }}>{{ option }}</option>
        {% endfor %}
      </select>
      <button type="submit">Search</button>
    </form>
    <div id="legislator-suggestions" aria-live="polite"></div>
    {% if q %}
      <section class="phonebook-results" aria-label="Matching legislators">
        <header>
          <h2>Matching legislators</h2>
          <span>{{ result_count }} result{{ "" if result_count == 1 else "s" }}</span>
        </header>
        {% if matches %}
          <ol class="phonebook-list" start="{{ ((page - 1) * per_page) + 1 }}">
            {% for option in matches %}
              <li>
                <a href="{{ build_legislator_url(legislator_id=option.legislator_id, q=q, per_page=per_page) }}">
                  <span class="phonebook-name">{{ option.display_name }}</span>
                  <span class="phonebook-meta">
                    {{ option.state or "US" }}{% if option.party %} · {{ option.party }}{% endif %}{% if option.chamber %} · {{ option.chamber }}{% endif %}
                  </span>
                </a>
              </li>
            {% endfor %}
          </ol>
          <nav class="pagination" aria-label="Legislator results pages">
            {% if previous_page %}
              <a href="{{ build_legislator_search_url(q=q, per_page=per_page, page=previous_page) }}">Previous</a>
            {% else %}
              <span>Previous</span>
            {% endif %}
            <strong>Page {{ page }} of {{ total_pages }}</strong>
            {% if next_page %}
              <a href="{{ build_legislator_search_url(q=q, per_page=per_page, page=next_page) }}">Next</a>
            {% else %}
              <span>Next</span>
            {% endif %}
          </nav>
        {% else %}
          <p class="empty-state">No legislators matched this search.</p>
        {% endif %}
      </section>
    {% endif %}
    {% if profile %}
      <section class="profile-card">
        <header class="profile-header">
          <div class="profile-identity">
            <span class="avatar">{{ profile.legislator.display_name.split(',')[0][:1] }}{{ profile.legislator.display_name.split(',')[-1].strip()[:1] }}</span>
            <div>
              <h2>{{ profile.legislator.display_name }} <span class="party-pill">{{ profile.legislator.chamber or "LEG" }}</span></h2>
              <p>{{ profile.legislator.state or "US" }} · {{ profile.legislator.party or "Unaffiliated" }}{% if profile.serving_since %} · Serving since {{ profile.serving_since }}{% endif %}</p>
            </div>
          </div>
          <div class="overall-score">
            <span>Overall avg</span>
            <strong>{{ profile.overall_score|round(0) if profile.overall_score is not none else "—" }}</strong>
            <small>/ 100</small>
          </div>
        </header>
        {% if profile.top_topics or profile.bottom_topics %}
          <div class="topic-panels">
            <article>
              <h3>Most important issues for</h3>
              {% for item in profile.top_topics %}
                <a class="topic-row" href="{{ build_legislator_url(legislator_id=profile.legislator.legislator_id, topic=item.topic) }}">
                  <strong class="score positive">{{ item.score|round(0) }}</strong>
                  <span>{{ item.topic }}</span>
                  <i style="width: {{ item.score }}%"></i>
                </a>
              {% endfor %}
            </article>
            <article>
              <h3 class="opposed-heading">Most important issues against</h3>
              {% for item in profile.bottom_topics %}
                <a class="topic-row {{ 'active' if item.topic == selected_topic else '' }}" href="{{ build_legislator_url(legislator_id=profile.legislator.legislator_id, topic=item.topic) }}">
                  <strong class="score negative">{{ item.score|round(0) }}</strong>
                  <span>{{ item.topic }}</span>
                  <i style="width: {{ item.score }}%"></i>
                </a>
              {% endfor %}
            </article>
          </div>
          <section class="profile-history">
            <h3>{{ selected_topic or "Topic" }} — score history</h3>
            <div class="chart-frame">{{ history_svg | safe }}</div>
            {% if history_series %}
              <div class="chart-legend compact" aria-label="Chart legend">
                {% for item in history_series %}
                  <div class="chart-legend-row">
                    <span class="chart-legend-line line-0"></span>
                    <span class="chart-legend-marker marker-0"></span>
                    <div class="chart-legend-copy">
                      <span class="chart-legend-label">{{ item.label }}</span>
                      <span class="chart-legend-meta">
                        {% if item.party %}{{ item.party }}{% endif %}{% if item.party and item.state %} · {% endif %}{% if item.state %}{{ item.state }}{% endif %}
                      </span>
                    </div>
                  </div>
                {% endfor %}
              </div>
            {% endif %}
          </section>
        {% else %}
          <p class="empty-state">No issue scores are available for this legislator yet.</p>
        {% endif %}
      </section>
    {% endif %}
  </main>
  <footer class="footer">
    <span>Actual record, not rhetoric</span>
    <span>Source: congressional roll-call votes</span>
    <span>Not affiliated with any political party or organization</span>
  </footer>
 {% endblock %}
@@ -1,45 +0,0 @@
 {% extends "base.html" %}
 {% block title %}Sign in | Nornsight{% endblock %}
 {% block body %}
 <main class="login-shell">
  <section class="login-panel" aria-labelledby="login-title">
    <div class="login-copy">
      <p class="eyebrow">Admin access</p>
      <h1 id="login-title">Sign in to Nornsight</h1>
      <p>Use the dashboard account to review rankings, profiles, and legislator comparisons.</p>
    </div>
    <form class="login-form" action="/login" method="post">
      <input type="hidden" name="next" value="{{ next_path }}">
      {% if error %}
      <p class="form-error" role="alert">{{ error }}</p>
      {% endif %}
      <label for="username">Username</label>
      <input
        id="username"
        name="username"
        type="text"
        autocomplete="username"
        value="{{ username }}"
        required
        autofocus
      >
      <label for="password">Password</label>
      <input
        id="password"
        name="password"
        type="password"
        autocomplete="current-password"
        required
      >
      <button type="submit">Sign in</button>
    </form>
  </section>
 </main>
 {% endblock %}
@@ -1,30 +0,0 @@
 <section class="chart-card">
  <header>
    <h2>Score history{% if selected_issue_label %} — {{ selected_issue_label }}{% endif %}</h2>
    <a href="{{ build_url(request, compare=[]) }}"
       hx-get="/partials/dashboard{{ build_url(request, compare=[])|replace('/', '', 1) }}"
       hx-target="#dashboard-body"
       hx-push-url="{{ build_url(request, compare=[]) }}">Clear comparison</a>
  </header>
  <div class="chart-frame">
    {{ chart_svg | safe }}
  </div>
  {% if chart_series %}
    <div class="chart-legend" aria-label="Chart legend">
      {% for item in chart_series %}
        {% set style_index = loop.index0 % 4 %}
        <div class="chart-legend-row">
          <span class="chart-legend-line line-{{ style_index }}"></span>
          <span class="chart-legend-marker marker-{{ style_index }}"></span>
          <div class="chart-legend-copy">
            <span class="chart-legend-label">{{ item.label }}</span>
            <span class="chart-legend-meta">
              {% if item.party %}{{ item.party }}{% endif %}{% if item.party and item.state %} · {% endif %}{% if item.state %}{{ item.state }}{% endif %}
            </span>
          </div>
        </div>
      {% endfor %}
    </div>
  {% endif %}
  <p class="score-note">Scores reflect averaged precomputed topic rows per year. Sparse years are omitted rather than plotted as zero.</p>
 </section>
@@ -1,25 +0,0 @@
 <section class="controls-grid">
  {% include "partials/_issue_filters.html" %}
  <div class="chamber-card">
    <a class="segment {{ 'active' if chamber == 'house' else '' }}"
       href="{{ build_url(request, chamber='house') }}"
       hx-get="/partials/dashboard{{ build_url(request, chamber='house')|replace('/', '', 1) }}"
       hx-target="#dashboard-body"
       hx-push-url="{{ build_url(request, chamber='house') }}">House</a>
    <a class="segment {{ 'active' if chamber == 'senate' else '' }}"
       href="{{ build_url(request, chamber='senate') }}"
       hx-get="/partials/dashboard{{ build_url(request, chamber='senate')|replace('/', '', 1) }}"
       hx-target="#dashboard-body"
       hx-push-url="{{ build_url(request, chamber='senate') }}">Senate</a>
    <a class="segment {{ 'active' if chamber == 'all' else '' }}"
       href="{{ build_url(request, chamber='all') }}"
       hx-get="/partials/dashboard{{ build_url(request, chamber='all')|replace('/', '', 1) }}"
       hx-target="#dashboard-body"
       hx-push-url="{{ build_url(request, chamber='all') }}">All</a>
  </div>
 </section>
 <p class="score-note">Support score: 1-100 precomputed from bill topic stance and roll-call votes. Higher means more aligned with the topic.</p>
 {% include "partials/_rankings.html" %}
 {% include "partials/_chart.html" %}
@@ -1,46 +0,0 @@
 <section class="filter-card">
  <h2>Issue filters</h2>
  <form class="issue-form"
        method="get"
        action="/"
        hx-get="/"
        hx-target="#dashboard-body"
        hx-push-url="true">
    <input type="hidden" name="chamber" value="{{ chamber }}">
    {% if congress %}
      <input type="hidden" name="congress" value="{{ congress }}">
    {% endif %}
    {% for legislator_id in compare %}
      <input type="hidden" name="compare" value="{{ legislator_id }}">
    {% endfor %}
    {% for issue in issues %}
      <span class="chip">
        {{ issue }}
        <a href="{{ build_url(request, issues=issues[:loop.index0] + issues[loop.index:]) }}"
           hx-get="/partials/dashboard{{ build_url(request, issues=issues[:loop.index0] + issues[loop.index:])|replace('/', '', 1) }}"
           hx-target="#dashboard-body"
           hx-push-url="{{ build_url(request, issues=issues[:loop.index0] + issues[loop.index:]) }}"
           aria-label="Remove {{ issue }}">×</a>
      </span>
      <input type="hidden" name="issues" value="{{ issue }}">
    {% endfor %}
    <label class="search-box">
      <span class="sr-only">Search issue areas</span>
      <input type="search" name="issues" placeholder="Search issue areas" autocomplete="off">
    </label>
    <button type="submit">Apply</button>
  </form>
  {% if suggestions %}
    <div class="suggestions" aria-label="Issue suggestions">
      {% for suggestion in suggestions %}
        {% if suggestion not in issues %}
          <a href="{{ build_url(request, issues=issues + [suggestion]) }}"
             hx-get="/partials/dashboard{{ build_url(request, issues=issues + [suggestion])|replace('/', '', 1) }}"
             hx-target="#dashboard-body"
             hx-push-url="{{ build_url(request, issues=issues + [suggestion]) }}">{{ suggestion }}</a>
        {% endif %}
      {% endfor %}
    </div>
  {% endif %}
 </section>
@@ -1,11 +0,0 @@
 {% if matches %}
  <div class="result-chips" aria-label="Search suggestions">
    {% for option in matches %}
      <a href="{{ build_legislator_url(legislator_id=option.legislator_id) }}">
        {{ option.display_name }}{% if option.state %} · {{ option.state }}{% endif %}
      </a>
    {% endfor %}
  </div>
 {% elif q %}
  <p class="suggestion-empty">No matches</p>
 {% endif %}
@@ -1,61 +0,0 @@
 <section class="rankings-grid">
  <article class="ranking-card">
    <header>
      <h2>Most supportive</h2>
      <span>Top 10</span>
    </header>
    {% if rankings.supportive %}
      <ol class="ranking-list">
        {% for row in rankings.supportive %}
          {% set next_compare = toggle_compare(compare, row.legislator_id) %}
          <li class="{{ 'selected' if row.legislator_id in compare else '' }}">
            <a href="{{ build_url(request, compare=next_compare) }}"
               hx-get="/partials/dashboard{{ build_url(request, compare=next_compare)|replace('/', '', 1) }}"
               hx-target="#dashboard-body"
               hx-push-url="{{ build_url(request, compare=next_compare) }}">
              <span class="rank">{{ loop.index }}</span>
              <strong class="score positive">{{ row.score|round(1) }}</strong>
              <span class="member">
                <strong>{{ row.display_name }}</strong>
                <small>{{ row.state or "US" }}{% if row.party %} · {{ row.party[:1] }}{% endif %}</small>
              </span>
              <span class="votes">{{ row.total }} rows</span>
            </a>
          </li>
        {% endfor %}
      </ol>
    {% else %}
      <p class="empty-state">{{ empty_message }}</p>
    {% endif %}
  </article>
  <article class="ranking-card">
    <header>
      <h2>Most opposed</h2>
      <span>Bottom 10</span>
    </header>
    {% if rankings.opposed %}
      <ol class="ranking-list">
        {% for row in rankings.opposed %}
          {% set next_compare = toggle_compare(compare, row.legislator_id) %}
          <li class="{{ 'selected' if row.legislator_id in compare else '' }}">
            <a href="{{ build_url(request, compare=next_compare) }}"
               hx-get="/partials/dashboard{{ build_url(request, compare=next_compare)|replace('/', '', 1) }}"
               hx-target="#dashboard-body"
               hx-push-url="{{ build_url(request, compare=next_compare) }}">
              <span class="rank">{{ loop.index }}</span>
              <strong class="score negative">{{ row.score|round(1) }}</strong>
              <span class="member">
                <strong>{{ row.display_name }}</strong>
                <small>{{ row.state or "US" }}{% if row.party %} · {{ row.party[:1] }}{% endif %}</small>
              </span>
              <span class="votes">{{ row.total }} rows</span>
            </a>
          </li>
        {% endfor %}
      </ol>
    {% else %}
      <p class="empty-state">{{ empty_message }}</p>
    {% endif %}
  </article>
 </section>
@@ -1,15 +0,0 @@
 {% extends "base.html" %}
 {% block title %}Database Setup Required{% endblock %}
 {% block body %}
  <main class="shell">
    <section class="page-heading stacked-heading">
      <div>
        <h1>Database setup required</h1>
        <p>Configure DATA_SCIENCE_DEV before opening the dashboard.</p>
      </div>
    </section>
    <pre class="setup-error">{{ error }}</pre>
  </main>
 {% endblock %}
Author	SHA1	Message	Date
Richie	9a4f71f471	updated dependencies	2026-04-21 22:57:54 -04:00
Richie	c8adf6914e	removed LegislatorBillScore	2026-04-21 22:51:29 -04:00
Richie	87a7f5312f	converted default_config_path and get_config_dir	2026-04-21 22:51:10 -04:00
Richie	51d6240690	removed LegislatorBillScore	2026-04-21 22:48:13 -04:00
Richie	1426b797e5	added summarize_bills.py	2026-04-21 22:44:52 -04:00
Richie	4b768049c0	added summarization metadata to the DB	2026-04-21 21:42:13 -04:00
Richie	674edafe94	created scoring tables and basic logic	2026-04-21 11:44:53 -04:00
Richie	be4b473a3c	got alembic working	2026-04-21 11:36:58 -04:00
Richie	e5ba089479	adding database_cli.py and alembic	2026-04-21 11:08:00 -04:00
Richie	0c99a63347	added DatabaseSetupError	2026-04-21 10:54:30 -04:00
Richie	07cd231609	adding common.py code and parallelize.py	2026-04-21 10:09:01 -04:00
Richie	c8b61fc3c0	adding ingest_congress.py and ingest_posts.py	2026-04-21 09:59:26 -04:00
		`@@ -1 +0,0 @@`
			`"""FastAPI HTMX front end for the legislative database."""`