"""Canonical vote context, artifact tracking, and run metadata models.""" from __future__ import annotations from datetime import datetime from enum import StrEnum from typing import TYPE_CHECKING from sqlalchemy import DateTime, Enum, ForeignKey, Index, func, text from sqlalchemy.orm import Mapped, mapped_column, relationship from pipelines.orm.data_science_dev.base import DataScienceDevTableBase if TYPE_CHECKING: from pipelines.orm.data_science_dev.congress.amendment import Amendment, AmendmentAction from pipelines.orm.data_science_dev.congress.bill import Bill, BillAction, BillText from pipelines.orm.data_science_dev.congress.legislator import LegislatorScore from pipelines.orm.data_science_dev.congress.vote import Vote def _enum_column(enum_cls: type[StrEnum], *, name: str) -> Enum: """Build a portable SQLAlchemy enum column for StrEnum values.""" return Enum( enum_cls, values_callable=lambda enum_type: [member.value for member in enum_type], native_enum=False, name=name, ) class ConfidenceLevel(StrEnum): """Low/medium/high confidence buckets.""" HIGH = "high" MEDIUM = "medium" LOW = "low" class VoteActionScope(StrEnum): """Whether a matched action came from bill or amendment context.""" BILL = "bill" AMENDMENT = "amendment" class SubjectType(StrEnum): """The direct legal/procedural subject of the vote.""" MEASURE = "measure" AMENDMENT = "amendment" NOMINATION = "nomination" TREATY = "treaty" QUORUM = "quorum" CHAMBER_ADMIN = "chamber_admin" UNKNOWN = "unknown" class MeasureSubtype(StrEnum): """Formal congressional measure subtype.""" BILL = "bill" JOINT_RESOLUTION = "joint_resolution" CONCURRENT_RESOLUTION = "concurrent_resolution" SIMPLE_RESOLUTION = "simple_resolution" class MeasureFunction(StrEnum): """Semantic function of a measure beyond its formal subtype.""" SUBSTANTIVE_MEASURE = "substantive_measure" SPECIAL_RULE = "special_rule" BUDGET_RESOLUTION = "budget_resolution" CHAMBER_INTERNAL = "chamber_internal" COMMEMORATIVE_OR_SENSE_OF = "commemorative_or_sense_of" UNKNOWN = "unknown" class VoteRelationship(StrEnum): """The vote's relationship to the direct subject and its text.""" DIRECT_TEXT_VOTE = "direct_text_vote" AMENDMENT_TEXT_VOTE = "amendment_text_vote" PROCEDURAL_RELATED_TO_MEASURE = "procedural_related_to_measure" PROCEDURAL_RELATED_TO_AMENDMENT = "procedural_related_to_amendment" NON_LEGISLATIVE = "non_legislative" UNKNOWN = "unknown" class ClassificationMethod(StrEnum): """How the final classification was derived.""" RECORDED_VOTE_ACTION_EXACT = "recorded_vote_action_exact" RECORDED_VOTE_ACTION_DUPLICATE_SOURCE_DEDUPED = ( "recorded_vote_action_duplicate_source_deduped" ) VOTE_XML_ONLY = "vote_xml_only" QUESTION_TEXT_ONLY = "question_text_only" MANUAL_REVIEW = "manual_review" class VoteMeasureRole(StrEnum): """How one measure relates to one classified vote.""" VOTED_ON = "voted_on" RULE_FOR = "rule_for" UNDERLYING_BILL = "underlying_bill" PROCEDURAL_TARGET = "procedural_target" AMENDS = "amends" AMENDED_BY = "amended_by" CONFERENCE_REPORT_FOR = "conference_report_for" RELATED_ONLY = "related_only" class TextTargetType(StrEnum): """Which kind of legislative text was the object of a vote.""" BILL_TEXT = "bill_text" RESOLUTION_TEXT = "resolution_text" AMENDMENT_TEXT = "amendment_text" NONE = "none" UNKNOWN = "unknown" class TextTargetBasis(StrEnum): """How the text target should be interpreted.""" EXACT_ACTION_TEXT_VERSION = "exact_action_text_version" RESULTING_ENGROSSED_VERSION = "resulting_engrossed_version" RECEIVED_PRIOR_CHAMBER_VERSION = "received_prior_chamber_version" AMENDMENT_TEXT = "amendment_text" RULE_RESOLUTION_TEXT = "rule_resolution_text" NO_TEXT_TARGET = "no_text_target" UNKNOWN = "unknown" class TextResolutionMethod(StrEnum): """How the official text target was resolved.""" TEXT_EXACT_ACTION_DATE_AND_CODE = "text_exact_action_date_and_code" TEXT_EXACT_ACTION_DATE_WRONG_CODE = "text_exact_action_date_wrong_code" TEXT_PRIOR_VERSION_CODE_MATCH = "text_prior_version_code_match" TEXT_RECEIVED_PRIOR_CHAMBER_VERSION = "text_received_prior_chamber_version" TEXT_RESULTING_ENROLLED_ONLY = "text_resulting_enrolled_only" AMENDMENT_TEXT_UNMODELED_PHASE1 = "amendment_text_unmodeled_phase1" NO_TEXT_TARGET = "no_text_target" UNKNOWN = "unknown" class VoteEffect(StrEnum): """Meaning of one member position relative to the target text/procedure.""" SUPPORTS_TEXT = "supports_text" OPPOSES_TEXT = "opposes_text" ADVANCES_PROCEDURE = "advances_procedure" BLOCKS_PROCEDURE = "blocks_procedure" UNKNOWN = "unknown" class IngestRun(DataScienceDevTableBase): """One full ingestion or context rebuild run.""" __tablename__ = "ingest_run" started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) git_sha: Mapped[str | None] classifier_version: Mapped[str | None] source_snapshot_label: Mapped[str | None] status: Mapped[str] source_artifacts: Mapped[list[SourceArtifact]] = relationship( "SourceArtifact", back_populates="ingest_run", cascade="all, delete-orphan", ) score_runs: Mapped[list[ScoreRun]] = relationship( "ScoreRun", back_populates="ingest_run", cascade="all, delete-orphan", ) class SourceArtifact(DataScienceDevTableBase): """Local artifact manifest entry for reproducibility.""" __tablename__ = "source_artifact" __table_args__ = ( Index("ix_source_artifact_source_kind", "source_kind"), Index("ix_source_artifact_congress", "congress"), Index( "uq_source_artifact_ingest_identity", "ingest_run_id", "local_path", "sha256", unique=True, ), ) source_kind: Mapped[str] congress: Mapped[int] chamber: Mapped[str | None] local_path: Mapped[str] source_url: Mapped[str | None] sha256: Mapped[str] byte_size: Mapped[int] modified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) ingested_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) ingest_run_id: Mapped[int | None] = mapped_column( ForeignKey("main.ingest_run.id", ondelete="SET NULL") ) ingest_run: Mapped[IngestRun | None] = relationship( "IngestRun", back_populates="source_artifacts", ) class ScoreRun(DataScienceDevTableBase): """One full score recomputation tied to one ingest snapshot.""" __tablename__ = "score_run" ingest_run_id: Mapped[int | None] = mapped_column( ForeignKey("main.ingest_run.id", ondelete="SET NULL") ) classifier_version: Mapped[str | None] scoring_version: Mapped[str | None] included_vote_count: Mapped[int] excluded_vote_count: Mapped[int] started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) ingest_run: Mapped[IngestRun | None] = relationship( "IngestRun", back_populates="score_runs", ) scores: Mapped[list[LegislatorScore]] = relationship( "LegislatorScore", back_populates="score_run", cascade="all, delete-orphan", ) class VoteActionMatch(DataScienceDevTableBase): """A candidate or selected official action match for one raw vote.""" __tablename__ = "vote_action_match" __table_args__ = ( Index("ix_vote_action_match_vote_id", "vote_id"), Index( "uq_vote_action_match_selected_vote_id", "vote_id", unique=True, postgresql_where=text("is_selected"), ), ) vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE")) action_scope: Mapped[VoteActionScope] = mapped_column( _enum_column(VoteActionScope, name="vote_action_scope") ) bill_action_id: Mapped[int | None] = mapped_column( ForeignKey("main.bill_action.id", ondelete="CASCADE") ) amendment_action_id: Mapped[int | None] = mapped_column( ForeignKey("main.amendment_action.id", ondelete="CASCADE") ) is_selected: Mapped[bool] match_method: Mapped[str] match_reason: Mapped[str | None] match_confidence: Mapped[ConfidenceLevel] = mapped_column( _enum_column(ConfidenceLevel, name="vote_action_match_confidence") ) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), ) vote: Mapped[Vote] = relationship("Vote", back_populates="action_matches") bill_action: Mapped[BillAction | None] = relationship("BillAction") amendment_action: Mapped[AmendmentAction | None] = relationship("AmendmentAction") class VoteClassification(DataScienceDevTableBase): """Normalized classification for what a vote was legally/procedurally on.""" __tablename__ = "vote_classification" __table_args__ = ( Index("ix_vote_classification_subject_type", "subject_type"), Index( "ix_vote_classification_eligible_vote_id", "vote_id", postgresql_where=text( "subject_type = 'measure' " "AND vote_relationship = 'direct_text_vote' " "AND is_direct_vote_on_legislative_text " "AND is_substantive_policy_vote " "AND NOT is_special_rule" ), ), ) vote_id: Mapped[int] = mapped_column( ForeignKey("main.vote.id", ondelete="CASCADE"), unique=True, ) subject_type: Mapped[SubjectType] = mapped_column( _enum_column(SubjectType, name="vote_subject_type") ) measure_type: Mapped[str | None] measure_subtype: Mapped[MeasureSubtype | None] = mapped_column( _enum_column(MeasureSubtype, name="vote_measure_subtype") ) measure_function: Mapped[MeasureFunction | None] = mapped_column( _enum_column(MeasureFunction, name="vote_measure_function") ) vote_relationship: Mapped[VoteRelationship] = mapped_column( _enum_column(VoteRelationship, name="vote_relationship") ) is_legislation_related: Mapped[bool] is_direct_vote_on_legislative_text: Mapped[bool] is_substantive_policy_vote: Mapped[bool] is_lawmaking_vehicle: Mapped[bool] is_special_rule: Mapped[bool] classification_method: Mapped[ClassificationMethod] = mapped_column( _enum_column(ClassificationMethod, name="vote_classification_method") ) classification_confidence_reason: Mapped[str | None] confidence: Mapped[ConfidenceLevel] = mapped_column( _enum_column(ConfidenceLevel, name="vote_classification_confidence") ) classified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) classification_version: Mapped[str] vote: Mapped[Vote] = relationship("Vote", back_populates="classification") class VoteMeasureLink(DataScienceDevTableBase): """Relationship between a classified vote and one bill/resolution measure.""" __tablename__ = "vote_measure_link" __table_args__ = ( Index("ix_vote_measure_link_vote_id", "vote_id"), Index("ix_vote_measure_link_vote_id_role", "vote_id", "role"), Index("ix_vote_measure_link_measure_id_role", "measure_id", "role"), ) vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE")) measure_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE")) role: Mapped[VoteMeasureRole] = mapped_column( _enum_column(VoteMeasureRole, name="vote_measure_role") ) source: Mapped[str] confidence: Mapped[ConfidenceLevel] = mapped_column( _enum_column(ConfidenceLevel, name="vote_measure_link_confidence") ) notes: Mapped[str | None] vote: Mapped[Vote] = relationship("Vote", back_populates="vote_measure_links") measure: Mapped[Bill] = relationship("Bill", back_populates="vote_measure_links") class VoteTextTarget(DataScienceDevTableBase): """Official text target, if any, resolved for one classified vote.""" __tablename__ = "vote_text_target" __table_args__ = ( Index( "ix_vote_text_target_voted_text_version_id", "voted_text_version_id", postgresql_where=text("voted_text_version_id IS NOT NULL"), ), ) vote_id: Mapped[int] = mapped_column( ForeignKey("main.vote.id", ondelete="CASCADE"), unique=True, ) text_target_type: Mapped[TextTargetType] = mapped_column( _enum_column(TextTargetType, name="vote_text_target_type") ) voted_text_version_id: Mapped[int | None] = mapped_column( ForeignKey("main.bill_text.id", ondelete="SET NULL") ) resulting_text_version_id: Mapped[int | None] = mapped_column( ForeignKey("main.bill_text.id", ondelete="SET NULL") ) related_amendment_id: Mapped[int | None] = mapped_column( ForeignKey("main.amendment.id", ondelete="SET NULL") ) text_target_basis: Mapped[TextTargetBasis] = mapped_column( _enum_column(TextTargetBasis, name="vote_text_target_basis") ) text_resolution_method: Mapped[TextResolutionMethod] = mapped_column( _enum_column(TextResolutionMethod, name="vote_text_resolution_method") ) text_resolution_confidence_reason: Mapped[str | None] confidence: Mapped[ConfidenceLevel] = mapped_column( _enum_column(ConfidenceLevel, name="vote_text_target_confidence") ) notes: Mapped[str | None] vote: Mapped[Vote] = relationship("Vote", back_populates="text_target") voted_text_version: Mapped[BillText | None] = relationship( "BillText", foreign_keys=[voted_text_version_id], ) resulting_text_version: Mapped[BillText | None] = relationship( "BillText", foreign_keys=[resulting_text_version_id], ) related_amendment: Mapped[Amendment | None] = relationship("Amendment") class VotePositionMeaning(DataScienceDevTableBase): """Meaning of Yea/Nay/Present positions for one classified vote.""" __tablename__ = "vote_position_meaning" vote_id: Mapped[int] = mapped_column( ForeignKey("main.vote.id", ondelete="CASCADE"), unique=True, ) yea_effect: Mapped[VoteEffect] = mapped_column( _enum_column(VoteEffect, name="vote_yea_effect") ) nay_effect: Mapped[VoteEffect] = mapped_column( _enum_column(VoteEffect, name="vote_nay_effect") ) present_effect: Mapped[VoteEffect] = mapped_column( _enum_column(VoteEffect, name="vote_present_effect") ) polarity_confidence: Mapped[ConfidenceLevel] = mapped_column( _enum_column(ConfidenceLevel, name="vote_polarity_confidence") ) polarity_method: Mapped[str] notes: Mapped[str | None] vote: Mapped[Vote] = relationship("Vote", back_populates="position_meaning") class VoteContextAudit(DataScienceDevTableBase): """Audit/event row for ambiguous or noteworthy vote-context decisions.""" __tablename__ = "vote_context_audit" __table_args__ = ( Index("ix_vote_context_audit_vote_id", "vote_id"), Index("ix_vote_context_audit_severity_vote_id", "severity", "vote_id"), ) vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE")) step: Mapped[str] message: Mapped[str] severity: Mapped[str] source_path: Mapped[str | None] created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), ) vote: Mapped[Vote] = relationship("Vote", back_populates="context_audit_rows")