463 lines
16 KiB
Python
463 lines
16 KiB
Python
"""Canonical vote context, artifact tracking, and run metadata models."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime
|
|
from enum import StrEnum
|
|
from typing import TYPE_CHECKING
|
|
|
|
from sqlalchemy import DateTime, Enum, ForeignKey, Index, func, text
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
|
|
|
|
if TYPE_CHECKING:
|
|
from pipelines.orm.data_science_dev.congress.amendment import Amendment, AmendmentAction
|
|
from pipelines.orm.data_science_dev.congress.bill import Bill, BillAction, BillText
|
|
from pipelines.orm.data_science_dev.congress.legislator import LegislatorScore
|
|
from pipelines.orm.data_science_dev.congress.vote import Vote
|
|
|
|
|
|
def _enum_column(enum_cls: type[StrEnum], *, name: str) -> Enum:
|
|
"""Build a portable SQLAlchemy enum column for StrEnum values."""
|
|
|
|
return Enum(
|
|
enum_cls,
|
|
values_callable=lambda enum_type: [member.value for member in enum_type],
|
|
native_enum=False,
|
|
name=name,
|
|
)
|
|
|
|
|
|
class ConfidenceLevel(StrEnum):
|
|
"""Low/medium/high confidence buckets."""
|
|
|
|
HIGH = "high"
|
|
MEDIUM = "medium"
|
|
LOW = "low"
|
|
|
|
|
|
class VoteActionScope(StrEnum):
|
|
"""Whether a matched action came from bill or amendment context."""
|
|
|
|
BILL = "bill"
|
|
AMENDMENT = "amendment"
|
|
|
|
|
|
class SubjectType(StrEnum):
|
|
"""The direct legal/procedural subject of the vote."""
|
|
|
|
MEASURE = "measure"
|
|
AMENDMENT = "amendment"
|
|
NOMINATION = "nomination"
|
|
TREATY = "treaty"
|
|
QUORUM = "quorum"
|
|
CHAMBER_ADMIN = "chamber_admin"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class MeasureSubtype(StrEnum):
|
|
"""Formal congressional measure subtype."""
|
|
|
|
BILL = "bill"
|
|
JOINT_RESOLUTION = "joint_resolution"
|
|
CONCURRENT_RESOLUTION = "concurrent_resolution"
|
|
SIMPLE_RESOLUTION = "simple_resolution"
|
|
|
|
|
|
class MeasureFunction(StrEnum):
|
|
"""Semantic function of a measure beyond its formal subtype."""
|
|
|
|
SUBSTANTIVE_MEASURE = "substantive_measure"
|
|
SPECIAL_RULE = "special_rule"
|
|
BUDGET_RESOLUTION = "budget_resolution"
|
|
CHAMBER_INTERNAL = "chamber_internal"
|
|
COMMEMORATIVE_OR_SENSE_OF = "commemorative_or_sense_of"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class VoteRelationship(StrEnum):
|
|
"""The vote's relationship to the direct subject and its text."""
|
|
|
|
DIRECT_TEXT_VOTE = "direct_text_vote"
|
|
AMENDMENT_TEXT_VOTE = "amendment_text_vote"
|
|
PROCEDURAL_RELATED_TO_MEASURE = "procedural_related_to_measure"
|
|
PROCEDURAL_RELATED_TO_AMENDMENT = "procedural_related_to_amendment"
|
|
NON_LEGISLATIVE = "non_legislative"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class ClassificationMethod(StrEnum):
|
|
"""How the final classification was derived."""
|
|
|
|
RECORDED_VOTE_ACTION_EXACT = "recorded_vote_action_exact"
|
|
RECORDED_VOTE_ACTION_DUPLICATE_SOURCE_DEDUPED = (
|
|
"recorded_vote_action_duplicate_source_deduped"
|
|
)
|
|
VOTE_XML_ONLY = "vote_xml_only"
|
|
QUESTION_TEXT_ONLY = "question_text_only"
|
|
MANUAL_REVIEW = "manual_review"
|
|
|
|
|
|
class VoteMeasureRole(StrEnum):
|
|
"""How one measure relates to one classified vote."""
|
|
|
|
VOTED_ON = "voted_on"
|
|
RULE_FOR = "rule_for"
|
|
UNDERLYING_BILL = "underlying_bill"
|
|
PROCEDURAL_TARGET = "procedural_target"
|
|
AMENDS = "amends"
|
|
AMENDED_BY = "amended_by"
|
|
CONFERENCE_REPORT_FOR = "conference_report_for"
|
|
RELATED_ONLY = "related_only"
|
|
|
|
|
|
class TextTargetType(StrEnum):
|
|
"""Which kind of legislative text was the object of a vote."""
|
|
|
|
BILL_TEXT = "bill_text"
|
|
RESOLUTION_TEXT = "resolution_text"
|
|
AMENDMENT_TEXT = "amendment_text"
|
|
NONE = "none"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class TextTargetBasis(StrEnum):
|
|
"""How the text target should be interpreted."""
|
|
|
|
EXACT_ACTION_TEXT_VERSION = "exact_action_text_version"
|
|
RESULTING_ENGROSSED_VERSION = "resulting_engrossed_version"
|
|
RECEIVED_PRIOR_CHAMBER_VERSION = "received_prior_chamber_version"
|
|
AMENDMENT_TEXT = "amendment_text"
|
|
RULE_RESOLUTION_TEXT = "rule_resolution_text"
|
|
NO_TEXT_TARGET = "no_text_target"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class TextResolutionMethod(StrEnum):
|
|
"""How the official text target was resolved."""
|
|
|
|
TEXT_EXACT_ACTION_DATE_AND_CODE = "text_exact_action_date_and_code"
|
|
TEXT_EXACT_ACTION_DATE_WRONG_CODE = "text_exact_action_date_wrong_code"
|
|
TEXT_PRIOR_VERSION_CODE_MATCH = "text_prior_version_code_match"
|
|
TEXT_RECEIVED_PRIOR_CHAMBER_VERSION = "text_received_prior_chamber_version"
|
|
TEXT_RESULTING_ENROLLED_ONLY = "text_resulting_enrolled_only"
|
|
AMENDMENT_TEXT_UNMODELED_PHASE1 = "amendment_text_unmodeled_phase1"
|
|
NO_TEXT_TARGET = "no_text_target"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class VoteEffect(StrEnum):
|
|
"""Meaning of one member position relative to the target text/procedure."""
|
|
|
|
SUPPORTS_TEXT = "supports_text"
|
|
OPPOSES_TEXT = "opposes_text"
|
|
ADVANCES_PROCEDURE = "advances_procedure"
|
|
BLOCKS_PROCEDURE = "blocks_procedure"
|
|
UNKNOWN = "unknown"
|
|
|
|
|
|
class IngestRun(DataScienceDevTableBase):
|
|
"""One full ingestion or context rebuild run."""
|
|
|
|
__tablename__ = "ingest_run"
|
|
|
|
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
git_sha: Mapped[str | None]
|
|
classifier_version: Mapped[str | None]
|
|
source_snapshot_label: Mapped[str | None]
|
|
status: Mapped[str]
|
|
|
|
source_artifacts: Mapped[list[SourceArtifact]] = relationship(
|
|
"SourceArtifact",
|
|
back_populates="ingest_run",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
score_runs: Mapped[list[ScoreRun]] = relationship(
|
|
"ScoreRun",
|
|
back_populates="ingest_run",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
|
|
class SourceArtifact(DataScienceDevTableBase):
|
|
"""Local artifact manifest entry for reproducibility."""
|
|
|
|
__tablename__ = "source_artifact"
|
|
__table_args__ = (
|
|
Index("ix_source_artifact_source_kind", "source_kind"),
|
|
Index("ix_source_artifact_congress", "congress"),
|
|
Index(
|
|
"uq_source_artifact_ingest_identity",
|
|
"ingest_run_id",
|
|
"local_path",
|
|
"sha256",
|
|
unique=True,
|
|
),
|
|
)
|
|
|
|
source_kind: Mapped[str]
|
|
congress: Mapped[int]
|
|
chamber: Mapped[str | None]
|
|
local_path: Mapped[str]
|
|
source_url: Mapped[str | None]
|
|
sha256: Mapped[str]
|
|
byte_size: Mapped[int]
|
|
modified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
ingested_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
ingest_run_id: Mapped[int | None] = mapped_column(
|
|
ForeignKey("main.ingest_run.id", ondelete="SET NULL")
|
|
)
|
|
|
|
ingest_run: Mapped[IngestRun | None] = relationship(
|
|
"IngestRun",
|
|
back_populates="source_artifacts",
|
|
)
|
|
|
|
|
|
class ScoreRun(DataScienceDevTableBase):
|
|
"""One full score recomputation tied to one ingest snapshot."""
|
|
|
|
__tablename__ = "score_run"
|
|
|
|
ingest_run_id: Mapped[int | None] = mapped_column(
|
|
ForeignKey("main.ingest_run.id", ondelete="SET NULL")
|
|
)
|
|
classifier_version: Mapped[str | None]
|
|
scoring_version: Mapped[str | None]
|
|
included_vote_count: Mapped[int]
|
|
excluded_vote_count: Mapped[int]
|
|
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
|
|
ingest_run: Mapped[IngestRun | None] = relationship(
|
|
"IngestRun",
|
|
back_populates="score_runs",
|
|
)
|
|
scores: Mapped[list[LegislatorScore]] = relationship(
|
|
"LegislatorScore",
|
|
back_populates="score_run",
|
|
cascade="all, delete-orphan",
|
|
)
|
|
|
|
|
|
class VoteActionMatch(DataScienceDevTableBase):
|
|
"""A candidate or selected official action match for one raw vote."""
|
|
|
|
__tablename__ = "vote_action_match"
|
|
__table_args__ = (
|
|
Index("ix_vote_action_match_vote_id", "vote_id"),
|
|
Index(
|
|
"uq_vote_action_match_selected_vote_id",
|
|
"vote_id",
|
|
unique=True,
|
|
postgresql_where=text("is_selected"),
|
|
),
|
|
)
|
|
|
|
vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
|
|
action_scope: Mapped[VoteActionScope] = mapped_column(
|
|
_enum_column(VoteActionScope, name="vote_action_scope")
|
|
)
|
|
bill_action_id: Mapped[int | None] = mapped_column(
|
|
ForeignKey("main.bill_action.id", ondelete="CASCADE")
|
|
)
|
|
amendment_action_id: Mapped[int | None] = mapped_column(
|
|
ForeignKey("main.amendment_action.id", ondelete="CASCADE")
|
|
)
|
|
is_selected: Mapped[bool]
|
|
match_method: Mapped[str]
|
|
match_reason: Mapped[str | None]
|
|
match_confidence: Mapped[ConfidenceLevel] = mapped_column(
|
|
_enum_column(ConfidenceLevel, name="vote_action_match_confidence")
|
|
)
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
)
|
|
|
|
vote: Mapped[Vote] = relationship("Vote", back_populates="action_matches")
|
|
bill_action: Mapped[BillAction | None] = relationship("BillAction")
|
|
amendment_action: Mapped[AmendmentAction | None] = relationship("AmendmentAction")
|
|
|
|
|
|
class VoteClassification(DataScienceDevTableBase):
|
|
"""Normalized classification for what a vote was legally/procedurally on."""
|
|
|
|
__tablename__ = "vote_classification"
|
|
__table_args__ = (
|
|
Index("ix_vote_classification_subject_type", "subject_type"),
|
|
Index(
|
|
"ix_vote_classification_eligible_vote_id",
|
|
"vote_id",
|
|
postgresql_where=text(
|
|
"subject_type = 'measure' "
|
|
"AND vote_relationship = 'direct_text_vote' "
|
|
"AND is_direct_vote_on_legislative_text "
|
|
"AND is_substantive_policy_vote "
|
|
"AND NOT is_special_rule"
|
|
),
|
|
),
|
|
)
|
|
|
|
vote_id: Mapped[int] = mapped_column(
|
|
ForeignKey("main.vote.id", ondelete="CASCADE"),
|
|
unique=True,
|
|
)
|
|
subject_type: Mapped[SubjectType] = mapped_column(
|
|
_enum_column(SubjectType, name="vote_subject_type")
|
|
)
|
|
measure_type: Mapped[str | None]
|
|
measure_subtype: Mapped[MeasureSubtype | None] = mapped_column(
|
|
_enum_column(MeasureSubtype, name="vote_measure_subtype")
|
|
)
|
|
measure_function: Mapped[MeasureFunction | None] = mapped_column(
|
|
_enum_column(MeasureFunction, name="vote_measure_function")
|
|
)
|
|
vote_relationship: Mapped[VoteRelationship] = mapped_column(
|
|
_enum_column(VoteRelationship, name="vote_relationship")
|
|
)
|
|
is_legislation_related: Mapped[bool]
|
|
is_direct_vote_on_legislative_text: Mapped[bool]
|
|
is_substantive_policy_vote: Mapped[bool]
|
|
is_lawmaking_vehicle: Mapped[bool]
|
|
is_special_rule: Mapped[bool]
|
|
classification_method: Mapped[ClassificationMethod] = mapped_column(
|
|
_enum_column(ClassificationMethod, name="vote_classification_method")
|
|
)
|
|
classification_confidence_reason: Mapped[str | None]
|
|
confidence: Mapped[ConfidenceLevel] = mapped_column(
|
|
_enum_column(ConfidenceLevel, name="vote_classification_confidence")
|
|
)
|
|
classified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
classification_version: Mapped[str]
|
|
|
|
vote: Mapped[Vote] = relationship("Vote", back_populates="classification")
|
|
|
|
|
|
class VoteMeasureLink(DataScienceDevTableBase):
|
|
"""Relationship between a classified vote and one bill/resolution measure."""
|
|
|
|
__tablename__ = "vote_measure_link"
|
|
__table_args__ = (
|
|
Index("ix_vote_measure_link_vote_id", "vote_id"),
|
|
Index("ix_vote_measure_link_vote_id_role", "vote_id", "role"),
|
|
Index("ix_vote_measure_link_measure_id_role", "measure_id", "role"),
|
|
)
|
|
|
|
vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
|
|
measure_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
|
role: Mapped[VoteMeasureRole] = mapped_column(
|
|
_enum_column(VoteMeasureRole, name="vote_measure_role")
|
|
)
|
|
source: Mapped[str]
|
|
confidence: Mapped[ConfidenceLevel] = mapped_column(
|
|
_enum_column(ConfidenceLevel, name="vote_measure_link_confidence")
|
|
)
|
|
notes: Mapped[str | None]
|
|
|
|
vote: Mapped[Vote] = relationship("Vote", back_populates="vote_measure_links")
|
|
measure: Mapped[Bill] = relationship("Bill", back_populates="vote_measure_links")
|
|
|
|
|
|
class VoteTextTarget(DataScienceDevTableBase):
|
|
"""Official text target, if any, resolved for one classified vote."""
|
|
|
|
__tablename__ = "vote_text_target"
|
|
__table_args__ = (
|
|
Index(
|
|
"ix_vote_text_target_voted_text_version_id",
|
|
"voted_text_version_id",
|
|
postgresql_where=text("voted_text_version_id IS NOT NULL"),
|
|
),
|
|
)
|
|
|
|
vote_id: Mapped[int] = mapped_column(
|
|
ForeignKey("main.vote.id", ondelete="CASCADE"),
|
|
unique=True,
|
|
)
|
|
text_target_type: Mapped[TextTargetType] = mapped_column(
|
|
_enum_column(TextTargetType, name="vote_text_target_type")
|
|
)
|
|
voted_text_version_id: Mapped[int | None] = mapped_column(
|
|
ForeignKey("main.bill_text.id", ondelete="SET NULL")
|
|
)
|
|
resulting_text_version_id: Mapped[int | None] = mapped_column(
|
|
ForeignKey("main.bill_text.id", ondelete="SET NULL")
|
|
)
|
|
related_amendment_id: Mapped[int | None] = mapped_column(
|
|
ForeignKey("main.amendment.id", ondelete="SET NULL")
|
|
)
|
|
text_target_basis: Mapped[TextTargetBasis] = mapped_column(
|
|
_enum_column(TextTargetBasis, name="vote_text_target_basis")
|
|
)
|
|
text_resolution_method: Mapped[TextResolutionMethod] = mapped_column(
|
|
_enum_column(TextResolutionMethod, name="vote_text_resolution_method")
|
|
)
|
|
text_resolution_confidence_reason: Mapped[str | None]
|
|
confidence: Mapped[ConfidenceLevel] = mapped_column(
|
|
_enum_column(ConfidenceLevel, name="vote_text_target_confidence")
|
|
)
|
|
notes: Mapped[str | None]
|
|
|
|
vote: Mapped[Vote] = relationship("Vote", back_populates="text_target")
|
|
voted_text_version: Mapped[BillText | None] = relationship(
|
|
"BillText",
|
|
foreign_keys=[voted_text_version_id],
|
|
)
|
|
resulting_text_version: Mapped[BillText | None] = relationship(
|
|
"BillText",
|
|
foreign_keys=[resulting_text_version_id],
|
|
)
|
|
related_amendment: Mapped[Amendment | None] = relationship("Amendment")
|
|
|
|
|
|
class VotePositionMeaning(DataScienceDevTableBase):
|
|
"""Meaning of Yea/Nay/Present positions for one classified vote."""
|
|
|
|
__tablename__ = "vote_position_meaning"
|
|
|
|
vote_id: Mapped[int] = mapped_column(
|
|
ForeignKey("main.vote.id", ondelete="CASCADE"),
|
|
unique=True,
|
|
)
|
|
yea_effect: Mapped[VoteEffect] = mapped_column(
|
|
_enum_column(VoteEffect, name="vote_yea_effect")
|
|
)
|
|
nay_effect: Mapped[VoteEffect] = mapped_column(
|
|
_enum_column(VoteEffect, name="vote_nay_effect")
|
|
)
|
|
present_effect: Mapped[VoteEffect] = mapped_column(
|
|
_enum_column(VoteEffect, name="vote_present_effect")
|
|
)
|
|
polarity_confidence: Mapped[ConfidenceLevel] = mapped_column(
|
|
_enum_column(ConfidenceLevel, name="vote_polarity_confidence")
|
|
)
|
|
polarity_method: Mapped[str]
|
|
notes: Mapped[str | None]
|
|
|
|
vote: Mapped[Vote] = relationship("Vote", back_populates="position_meaning")
|
|
|
|
|
|
class VoteContextAudit(DataScienceDevTableBase):
|
|
"""Audit/event row for ambiguous or noteworthy vote-context decisions."""
|
|
|
|
__tablename__ = "vote_context_audit"
|
|
__table_args__ = (
|
|
Index("ix_vote_context_audit_vote_id", "vote_id"),
|
|
Index("ix_vote_context_audit_severity_vote_id", "severity", "vote_id"),
|
|
)
|
|
|
|
vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
|
|
step: Mapped[str]
|
|
message: Mapped[str]
|
|
severity: Mapped[str]
|
|
source_path: Mapped[str | None]
|
|
created_at: Mapped[datetime] = mapped_column(
|
|
DateTime(timezone=True),
|
|
server_default=func.now(),
|
|
)
|
|
|
|
vote: Mapped[Vote] = relationship("Vote", back_populates="context_audit_rows")
|