setting up ORM
This commit is contained in:
+14
-3
@@ -17,6 +17,10 @@ NAMING_CONVENTION = {
|
||||
}
|
||||
|
||||
|
||||
class DatabaseSetupError(RuntimeError):
|
||||
"""Raised when database configuration is missing or invalid."""
|
||||
|
||||
|
||||
def get_connection_info(name: str) -> tuple[str, str, str, str, str | None]:
|
||||
"""Get connection info from environment variables."""
|
||||
database = getenv(f"{name}_DB")
|
||||
@@ -27,11 +31,18 @@ def get_connection_info(name: str) -> tuple[str, str, str, str, str | None]:
|
||||
|
||||
if None in (database, host, port, username):
|
||||
error = f"Missing environment variables for Postgres connection.\n{database=}\n{host=}\n{port=}\n{username=}\n"
|
||||
raise ValueError(error)
|
||||
return cast("tuple[str, str, str, str, str | None]", (database, host, port, username, password))
|
||||
raise DatabaseSetupError(error)
|
||||
return cast(
|
||||
"tuple[str, str, str, str, str | None]",
|
||||
(database, host, port, username, password),
|
||||
)
|
||||
|
||||
|
||||
def get_postgres_engine(*, name: str = "POSTGRES", pool_pre_ping: bool = True) -> Engine:
|
||||
def get_postgres_engine(
|
||||
*,
|
||||
name: str = "POSTGRES",
|
||||
pool_pre_ping: bool = True,
|
||||
) -> Engine:
|
||||
"""Create a SQLAlchemy engine from environment variables."""
|
||||
database, host, port, username, password = get_connection_info(name)
|
||||
|
||||
|
||||
@@ -1,17 +1,86 @@
|
||||
"""init."""
|
||||
"""Congress ORM models."""
|
||||
|
||||
from pipelines.orm.data_science_dev.congress.bill import Bill, BillText
|
||||
from pipelines.orm.data_science_dev.congress.bill import (
|
||||
Bill,
|
||||
BillAction,
|
||||
BillActionRecordedVote,
|
||||
BillRelation,
|
||||
BillText,
|
||||
BillTopic,
|
||||
BillTopicPosition,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress.amendment import (
|
||||
Amendment,
|
||||
AmendmentAction,
|
||||
AmendmentActionRecordedVote,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress.context import (
|
||||
ClassificationMethod,
|
||||
ConfidenceLevel,
|
||||
IngestRun,
|
||||
MeasureFunction,
|
||||
MeasureSubtype,
|
||||
ScoreRun,
|
||||
SourceArtifact,
|
||||
SubjectType,
|
||||
TextResolutionMethod,
|
||||
TextTargetBasis,
|
||||
TextTargetType,
|
||||
VoteActionMatch,
|
||||
VoteActionScope,
|
||||
VoteClassification,
|
||||
VoteContextAudit,
|
||||
VoteEffect,
|
||||
VoteMeasureLink,
|
||||
VoteMeasureRole,
|
||||
VotePositionMeaning,
|
||||
VoteRelationship,
|
||||
VoteTextTarget,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress.legislator import (
|
||||
Legislator,
|
||||
LegislatorScore,
|
||||
LegislatorSocialMedia,
|
||||
LegislatorScoreFake,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress.vote import Vote, VoteRecord
|
||||
|
||||
__all__ = [
|
||||
"Amendment",
|
||||
"AmendmentAction",
|
||||
"AmendmentActionRecordedVote",
|
||||
"Bill",
|
||||
"BillAction",
|
||||
"BillActionRecordedVote",
|
||||
"BillRelation",
|
||||
"BillText",
|
||||
"BillTopic",
|
||||
"BillTopicPosition",
|
||||
"ClassificationMethod",
|
||||
"ConfidenceLevel",
|
||||
"IngestRun",
|
||||
"Legislator",
|
||||
"LegislatorScore",
|
||||
"LegislatorScoreFake",
|
||||
"LegislatorSocialMedia",
|
||||
"MeasureFunction",
|
||||
"MeasureSubtype",
|
||||
"ScoreRun",
|
||||
"SourceArtifact",
|
||||
"SubjectType",
|
||||
"TextResolutionMethod",
|
||||
"TextTargetBasis",
|
||||
"TextTargetType",
|
||||
"Vote",
|
||||
"VoteActionMatch",
|
||||
"VoteActionScope",
|
||||
"VoteClassification",
|
||||
"VoteContextAudit",
|
||||
"VoteEffect",
|
||||
"VoteMeasureLink",
|
||||
"VoteMeasureRole",
|
||||
"VotePositionMeaning",
|
||||
"VoteRelationship",
|
||||
"VoteRecord",
|
||||
"VoteTextTarget",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,127 @@
|
||||
"""Amendment models and official action context."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date, datetime
|
||||
|
||||
from sqlalchemy import DateTime, ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
|
||||
|
||||
|
||||
class Amendment(DataScienceDevTableBase):
|
||||
"""Congressional amendment linked to a bill or to another amendment."""
|
||||
|
||||
__tablename__ = "amendment"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress",
|
||||
"amendment_type",
|
||||
"number",
|
||||
name="uq_amendment_congress_type_number",
|
||||
),
|
||||
)
|
||||
|
||||
congress: Mapped[int]
|
||||
amendment_type: Mapped[str]
|
||||
number: Mapped[int]
|
||||
chamber: Mapped[str]
|
||||
description: Mapped[str | None]
|
||||
purpose: Mapped[str | None]
|
||||
amended_bill_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.bill.id", ondelete="SET NULL")
|
||||
)
|
||||
amended_amendment_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.amendment.id", ondelete="SET NULL")
|
||||
)
|
||||
source_path: Mapped[str | None]
|
||||
source_artifact_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.source_artifact.id", ondelete="SET NULL")
|
||||
)
|
||||
|
||||
actions: Mapped[list[AmendmentAction]] = relationship(
|
||||
"AmendmentAction",
|
||||
back_populates="amendment",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
amended_amendment: Mapped[Amendment | None] = relationship(
|
||||
"Amendment",
|
||||
remote_side="Amendment.id",
|
||||
)
|
||||
|
||||
|
||||
class AmendmentAction(DataScienceDevTableBase):
|
||||
"""Official action row for an amendment."""
|
||||
|
||||
__tablename__ = "amendment_action"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"amendment_id",
|
||||
"sequence",
|
||||
name="uq_amendment_action_amendment_id_sequence",
|
||||
),
|
||||
)
|
||||
|
||||
amendment_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.amendment.id", ondelete="CASCADE")
|
||||
)
|
||||
sequence: Mapped[int]
|
||||
action_date: Mapped[date]
|
||||
action_time: Mapped[str | None]
|
||||
action_text: Mapped[str]
|
||||
action_type: Mapped[str | None]
|
||||
action_code: Mapped[str | None]
|
||||
source_system_code: Mapped[str | None]
|
||||
source_system_name: Mapped[str | None]
|
||||
source_artifact_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.source_artifact.id", ondelete="SET NULL")
|
||||
)
|
||||
|
||||
amendment: Mapped[Amendment] = relationship(
|
||||
"Amendment",
|
||||
back_populates="actions",
|
||||
)
|
||||
recorded_votes: Mapped[list[AmendmentActionRecordedVote]] = relationship(
|
||||
"AmendmentActionRecordedVote",
|
||||
back_populates="amendment_action",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class AmendmentActionRecordedVote(DataScienceDevTableBase):
|
||||
"""Recorded vote nested under one official amendment action."""
|
||||
|
||||
__tablename__ = "amendment_action_recorded_vote"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"amendment_action_id",
|
||||
"congress",
|
||||
"chamber",
|
||||
"session_number",
|
||||
"roll_number",
|
||||
name="uq_amendment_action_recorded_vote_match_key",
|
||||
),
|
||||
Index(
|
||||
"ix_amendment_action_recorded_vote_match_tuple",
|
||||
"congress",
|
||||
"chamber",
|
||||
"session_number",
|
||||
"roll_number",
|
||||
),
|
||||
)
|
||||
|
||||
amendment_action_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.amendment_action.id", ondelete="CASCADE")
|
||||
)
|
||||
congress: Mapped[int]
|
||||
chamber: Mapped[str]
|
||||
session_number: Mapped[int]
|
||||
roll_number: Mapped[int]
|
||||
vote_datetime: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
vote_url: Mapped[str | None]
|
||||
|
||||
amendment_action: Mapped[AmendmentAction] = relationship(
|
||||
"AmendmentAction",
|
||||
back_populates="recorded_votes",
|
||||
)
|
||||
@@ -1,23 +1,48 @@
|
||||
"""Bill model - legislation introduced in Congress."""
|
||||
"""Bill models for legislation, official actions, text versions, and topic tags."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from datetime import date, datetime
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy import DateTime, Enum, ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipelines.orm.data_science_dev.congress.vote import Vote
|
||||
from pipelines.orm.data_science_dev.congress.context import VoteMeasureLink
|
||||
|
||||
|
||||
class BillTopicPosition(StrEnum):
|
||||
"""Whether a yes vote on a bill is for or against a topic."""
|
||||
|
||||
FOR = "for"
|
||||
AGAINST = "against"
|
||||
|
||||
|
||||
def _enum_column(enum_cls: type[StrEnum], *, name: str) -> Enum:
|
||||
"""Build a portable SQLAlchemy enum column for StrEnum values."""
|
||||
|
||||
return Enum(
|
||||
enum_cls,
|
||||
values_callable=lambda enum_type: [member.value for member in enum_type],
|
||||
native_enum=False,
|
||||
name=name,
|
||||
)
|
||||
|
||||
|
||||
class Bill(DataScienceDevTableBase):
|
||||
"""Legislation with congress number, type, titles, status, and sponsor."""
|
||||
|
||||
__tablename__ = "bill"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress", "bill_type", "number", name="uq_bill_congress_type_number"
|
||||
),
|
||||
Index("ix_bill_congress", "congress"),
|
||||
)
|
||||
|
||||
congress: Mapped[int]
|
||||
bill_type: Mapped[str]
|
||||
@@ -33,22 +58,39 @@ class Bill(DataScienceDevTableBase):
|
||||
sponsor_bioguide_id: Mapped[str | None]
|
||||
|
||||
subjects_top_term: Mapped[str | None]
|
||||
score_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
votes: Mapped[list[Vote]] = relationship(
|
||||
"Vote",
|
||||
back_populates="bill",
|
||||
)
|
||||
bill_texts: Mapped[list[BillText]] = relationship(
|
||||
"BillText",
|
||||
back_populates="bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress", "bill_type", "number", name="uq_bill_congress_type_number"
|
||||
),
|
||||
Index("ix_bill_congress", "congress"),
|
||||
topics: Mapped[list[BillTopic]] = relationship(
|
||||
"BillTopic",
|
||||
back_populates="bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
bill_actions: Mapped[list[BillAction]] = relationship(
|
||||
"BillAction",
|
||||
back_populates="bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
outgoing_bill_relations: Mapped[list[BillRelation]] = relationship(
|
||||
"BillRelation",
|
||||
foreign_keys="BillRelation.bill_id",
|
||||
back_populates="bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
incoming_bill_relations: Mapped[list[BillRelation]] = relationship(
|
||||
"BillRelation",
|
||||
foreign_keys="BillRelation.related_bill_id",
|
||||
back_populates="related_bill",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
vote_measure_links: Mapped[list[VoteMeasureLink]] = relationship(
|
||||
"VoteMeasureLink",
|
||||
back_populates="measure",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
@@ -56,17 +98,147 @@ class BillText(DataScienceDevTableBase):
|
||||
"""Stores different text versions of a bill (introduced, enrolled, etc.)."""
|
||||
|
||||
__tablename__ = "bill_text"
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
version_code: Mapped[str]
|
||||
version_name: Mapped[str | None]
|
||||
text_content: Mapped[str | None]
|
||||
date: Mapped[date | None]
|
||||
|
||||
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"bill_id", "version_code", name="uq_bill_text_bill_id_version_code"
|
||||
),
|
||||
)
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
version_code: Mapped[str]
|
||||
version_name: Mapped[str | None]
|
||||
text_content: Mapped[str | None]
|
||||
summary: Mapped[str | None]
|
||||
summarization_model: Mapped[str | None]
|
||||
summarization_user_prompt_version: Mapped[str | None]
|
||||
summarization_system_prompt_version: Mapped[str | None]
|
||||
date: Mapped[date | None]
|
||||
source_datetime_raw: Mapped[str | None]
|
||||
text_url_xml: Mapped[str | None]
|
||||
text_url_pdf: Mapped[str | None]
|
||||
text_url_html: Mapped[str | None]
|
||||
source_artifact_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.source_artifact.id", ondelete="SET NULL")
|
||||
)
|
||||
|
||||
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
|
||||
|
||||
|
||||
class BillAction(DataScienceDevTableBase):
|
||||
"""Official action row from Bill Status XML."""
|
||||
|
||||
__tablename__ = "bill_action"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("bill_id", "sequence", name="uq_bill_action_bill_id_sequence"),
|
||||
)
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
sequence: Mapped[int]
|
||||
action_date: Mapped[date]
|
||||
action_time: Mapped[str | None]
|
||||
action_text: Mapped[str]
|
||||
action_type: Mapped[str | None]
|
||||
action_code: Mapped[str | None]
|
||||
source_system_code: Mapped[str | None]
|
||||
source_system_name: Mapped[str | None]
|
||||
source_artifact_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.source_artifact.id", ondelete="SET NULL")
|
||||
)
|
||||
|
||||
bill: Mapped[Bill] = relationship("Bill", back_populates="bill_actions")
|
||||
recorded_votes: Mapped[list[BillActionRecordedVote]] = relationship(
|
||||
"BillActionRecordedVote",
|
||||
back_populates="bill_action",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class BillActionRecordedVote(DataScienceDevTableBase):
|
||||
"""Recorded vote nested under one official bill action."""
|
||||
|
||||
__tablename__ = "bill_action_recorded_vote"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"bill_action_id",
|
||||
"congress",
|
||||
"chamber",
|
||||
"session_number",
|
||||
"roll_number",
|
||||
name="uq_bill_action_recorded_vote_match_key",
|
||||
),
|
||||
Index(
|
||||
"ix_bill_action_recorded_vote_match_tuple",
|
||||
"congress",
|
||||
"chamber",
|
||||
"session_number",
|
||||
"roll_number",
|
||||
),
|
||||
)
|
||||
|
||||
bill_action_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.bill_action.id", ondelete="CASCADE")
|
||||
)
|
||||
congress: Mapped[int]
|
||||
chamber: Mapped[str]
|
||||
session_number: Mapped[int]
|
||||
roll_number: Mapped[int]
|
||||
vote_datetime: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
vote_url: Mapped[str | None]
|
||||
|
||||
bill_action: Mapped[BillAction] = relationship(
|
||||
"BillAction",
|
||||
back_populates="recorded_votes",
|
||||
)
|
||||
|
||||
|
||||
class BillRelation(DataScienceDevTableBase):
|
||||
"""Relationship between one bill/resolution and another."""
|
||||
|
||||
__tablename__ = "bill_relation"
|
||||
__table_args__ = (
|
||||
Index("ix_bill_relation_bill_id", "bill_id"),
|
||||
Index("ix_bill_relation_related_bill_id", "related_bill_id"),
|
||||
)
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
related_bill_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.bill.id", ondelete="CASCADE")
|
||||
)
|
||||
relationship_type: Mapped[str]
|
||||
identified_by: Mapped[str | None]
|
||||
latest_action_date: Mapped[date | None]
|
||||
latest_action_text: Mapped[str | None]
|
||||
|
||||
bill: Mapped[Bill] = relationship(
|
||||
"Bill",
|
||||
foreign_keys=[bill_id],
|
||||
back_populates="outgoing_bill_relations",
|
||||
)
|
||||
related_bill: Mapped[Bill] = relationship(
|
||||
"Bill",
|
||||
foreign_keys=[related_bill_id],
|
||||
back_populates="incoming_bill_relations",
|
||||
)
|
||||
|
||||
|
||||
class BillTopic(DataScienceDevTableBase):
|
||||
"""One bill stance on one topic used to score roll-call votes."""
|
||||
|
||||
__tablename__ = "bill_topic"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"bill_id",
|
||||
"topic",
|
||||
"support_position",
|
||||
name="uq_bill_topic_bill_id_topic_support_position",
|
||||
),
|
||||
Index("ix_bill_topic_topic", "topic"),
|
||||
)
|
||||
|
||||
bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
topic: Mapped[str]
|
||||
support_position: Mapped[BillTopicPosition] = mapped_column(
|
||||
_enum_column(BillTopicPosition, name="bill_topic_position")
|
||||
)
|
||||
|
||||
bill: Mapped[Bill] = relationship("Bill", back_populates="topics")
|
||||
|
||||
@@ -0,0 +1,462 @@
|
||||
"""Canonical vote context, artifact tracking, and run metadata models."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import DateTime, Enum, ForeignKey, Index, func, text
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipelines.orm.data_science_dev.congress.amendment import Amendment, AmendmentAction
|
||||
from pipelines.orm.data_science_dev.congress.bill import Bill, BillAction, BillText
|
||||
from pipelines.orm.data_science_dev.congress.legislator import LegislatorScore
|
||||
from pipelines.orm.data_science_dev.congress.vote import Vote
|
||||
|
||||
|
||||
def _enum_column(enum_cls: type[StrEnum], *, name: str) -> Enum:
|
||||
"""Build a portable SQLAlchemy enum column for StrEnum values."""
|
||||
|
||||
return Enum(
|
||||
enum_cls,
|
||||
values_callable=lambda enum_type: [member.value for member in enum_type],
|
||||
native_enum=False,
|
||||
name=name,
|
||||
)
|
||||
|
||||
|
||||
class ConfidenceLevel(StrEnum):
|
||||
"""Low/medium/high confidence buckets."""
|
||||
|
||||
HIGH = "high"
|
||||
MEDIUM = "medium"
|
||||
LOW = "low"
|
||||
|
||||
|
||||
class VoteActionScope(StrEnum):
|
||||
"""Whether a matched action came from bill or amendment context."""
|
||||
|
||||
BILL = "bill"
|
||||
AMENDMENT = "amendment"
|
||||
|
||||
|
||||
class SubjectType(StrEnum):
|
||||
"""The direct legal/procedural subject of the vote."""
|
||||
|
||||
MEASURE = "measure"
|
||||
AMENDMENT = "amendment"
|
||||
NOMINATION = "nomination"
|
||||
TREATY = "treaty"
|
||||
QUORUM = "quorum"
|
||||
CHAMBER_ADMIN = "chamber_admin"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class MeasureSubtype(StrEnum):
|
||||
"""Formal congressional measure subtype."""
|
||||
|
||||
BILL = "bill"
|
||||
JOINT_RESOLUTION = "joint_resolution"
|
||||
CONCURRENT_RESOLUTION = "concurrent_resolution"
|
||||
SIMPLE_RESOLUTION = "simple_resolution"
|
||||
|
||||
|
||||
class MeasureFunction(StrEnum):
|
||||
"""Semantic function of a measure beyond its formal subtype."""
|
||||
|
||||
SUBSTANTIVE_MEASURE = "substantive_measure"
|
||||
SPECIAL_RULE = "special_rule"
|
||||
BUDGET_RESOLUTION = "budget_resolution"
|
||||
CHAMBER_INTERNAL = "chamber_internal"
|
||||
COMMEMORATIVE_OR_SENSE_OF = "commemorative_or_sense_of"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class VoteRelationship(StrEnum):
|
||||
"""The vote's relationship to the direct subject and its text."""
|
||||
|
||||
DIRECT_TEXT_VOTE = "direct_text_vote"
|
||||
AMENDMENT_TEXT_VOTE = "amendment_text_vote"
|
||||
PROCEDURAL_RELATED_TO_MEASURE = "procedural_related_to_measure"
|
||||
PROCEDURAL_RELATED_TO_AMENDMENT = "procedural_related_to_amendment"
|
||||
NON_LEGISLATIVE = "non_legislative"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class ClassificationMethod(StrEnum):
|
||||
"""How the final classification was derived."""
|
||||
|
||||
RECORDED_VOTE_ACTION_EXACT = "recorded_vote_action_exact"
|
||||
RECORDED_VOTE_ACTION_DUPLICATE_SOURCE_DEDUPED = (
|
||||
"recorded_vote_action_duplicate_source_deduped"
|
||||
)
|
||||
VOTE_XML_ONLY = "vote_xml_only"
|
||||
QUESTION_TEXT_ONLY = "question_text_only"
|
||||
MANUAL_REVIEW = "manual_review"
|
||||
|
||||
|
||||
class VoteMeasureRole(StrEnum):
|
||||
"""How one measure relates to one classified vote."""
|
||||
|
||||
VOTED_ON = "voted_on"
|
||||
RULE_FOR = "rule_for"
|
||||
UNDERLYING_BILL = "underlying_bill"
|
||||
PROCEDURAL_TARGET = "procedural_target"
|
||||
AMENDS = "amends"
|
||||
AMENDED_BY = "amended_by"
|
||||
CONFERENCE_REPORT_FOR = "conference_report_for"
|
||||
RELATED_ONLY = "related_only"
|
||||
|
||||
|
||||
class TextTargetType(StrEnum):
|
||||
"""Which kind of legislative text was the object of a vote."""
|
||||
|
||||
BILL_TEXT = "bill_text"
|
||||
RESOLUTION_TEXT = "resolution_text"
|
||||
AMENDMENT_TEXT = "amendment_text"
|
||||
NONE = "none"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class TextTargetBasis(StrEnum):
|
||||
"""How the text target should be interpreted."""
|
||||
|
||||
EXACT_ACTION_TEXT_VERSION = "exact_action_text_version"
|
||||
RESULTING_ENGROSSED_VERSION = "resulting_engrossed_version"
|
||||
RECEIVED_PRIOR_CHAMBER_VERSION = "received_prior_chamber_version"
|
||||
AMENDMENT_TEXT = "amendment_text"
|
||||
RULE_RESOLUTION_TEXT = "rule_resolution_text"
|
||||
NO_TEXT_TARGET = "no_text_target"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class TextResolutionMethod(StrEnum):
|
||||
"""How the official text target was resolved."""
|
||||
|
||||
TEXT_EXACT_ACTION_DATE_AND_CODE = "text_exact_action_date_and_code"
|
||||
TEXT_EXACT_ACTION_DATE_WRONG_CODE = "text_exact_action_date_wrong_code"
|
||||
TEXT_PRIOR_VERSION_CODE_MATCH = "text_prior_version_code_match"
|
||||
TEXT_RECEIVED_PRIOR_CHAMBER_VERSION = "text_received_prior_chamber_version"
|
||||
TEXT_RESULTING_ENROLLED_ONLY = "text_resulting_enrolled_only"
|
||||
AMENDMENT_TEXT_UNMODELED_PHASE1 = "amendment_text_unmodeled_phase1"
|
||||
NO_TEXT_TARGET = "no_text_target"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class VoteEffect(StrEnum):
|
||||
"""Meaning of one member position relative to the target text/procedure."""
|
||||
|
||||
SUPPORTS_TEXT = "supports_text"
|
||||
OPPOSES_TEXT = "opposes_text"
|
||||
ADVANCES_PROCEDURE = "advances_procedure"
|
||||
BLOCKS_PROCEDURE = "blocks_procedure"
|
||||
UNKNOWN = "unknown"
|
||||
|
||||
|
||||
class IngestRun(DataScienceDevTableBase):
|
||||
"""One full ingestion or context rebuild run."""
|
||||
|
||||
__tablename__ = "ingest_run"
|
||||
|
||||
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
git_sha: Mapped[str | None]
|
||||
classifier_version: Mapped[str | None]
|
||||
source_snapshot_label: Mapped[str | None]
|
||||
status: Mapped[str]
|
||||
|
||||
source_artifacts: Mapped[list[SourceArtifact]] = relationship(
|
||||
"SourceArtifact",
|
||||
back_populates="ingest_run",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
score_runs: Mapped[list[ScoreRun]] = relationship(
|
||||
"ScoreRun",
|
||||
back_populates="ingest_run",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class SourceArtifact(DataScienceDevTableBase):
|
||||
"""Local artifact manifest entry for reproducibility."""
|
||||
|
||||
__tablename__ = "source_artifact"
|
||||
__table_args__ = (
|
||||
Index("ix_source_artifact_source_kind", "source_kind"),
|
||||
Index("ix_source_artifact_congress", "congress"),
|
||||
Index(
|
||||
"uq_source_artifact_ingest_identity",
|
||||
"ingest_run_id",
|
||||
"local_path",
|
||||
"sha256",
|
||||
unique=True,
|
||||
),
|
||||
)
|
||||
|
||||
source_kind: Mapped[str]
|
||||
congress: Mapped[int]
|
||||
chamber: Mapped[str | None]
|
||||
local_path: Mapped[str]
|
||||
source_url: Mapped[str | None]
|
||||
sha256: Mapped[str]
|
||||
byte_size: Mapped[int]
|
||||
modified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
ingested_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
ingest_run_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.ingest_run.id", ondelete="SET NULL")
|
||||
)
|
||||
|
||||
ingest_run: Mapped[IngestRun | None] = relationship(
|
||||
"IngestRun",
|
||||
back_populates="source_artifacts",
|
||||
)
|
||||
|
||||
|
||||
class ScoreRun(DataScienceDevTableBase):
|
||||
"""One full score recomputation tied to one ingest snapshot."""
|
||||
|
||||
__tablename__ = "score_run"
|
||||
|
||||
ingest_run_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.ingest_run.id", ondelete="SET NULL")
|
||||
)
|
||||
classifier_version: Mapped[str | None]
|
||||
scoring_version: Mapped[str | None]
|
||||
included_vote_count: Mapped[int]
|
||||
excluded_vote_count: Mapped[int]
|
||||
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
|
||||
ingest_run: Mapped[IngestRun | None] = relationship(
|
||||
"IngestRun",
|
||||
back_populates="score_runs",
|
||||
)
|
||||
scores: Mapped[list[LegislatorScore]] = relationship(
|
||||
"LegislatorScore",
|
||||
back_populates="score_run",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class VoteActionMatch(DataScienceDevTableBase):
|
||||
"""A candidate or selected official action match for one raw vote."""
|
||||
|
||||
__tablename__ = "vote_action_match"
|
||||
__table_args__ = (
|
||||
Index("ix_vote_action_match_vote_id", "vote_id"),
|
||||
Index(
|
||||
"uq_vote_action_match_selected_vote_id",
|
||||
"vote_id",
|
||||
unique=True,
|
||||
postgresql_where=text("is_selected"),
|
||||
),
|
||||
)
|
||||
|
||||
vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
|
||||
action_scope: Mapped[VoteActionScope] = mapped_column(
|
||||
_enum_column(VoteActionScope, name="vote_action_scope")
|
||||
)
|
||||
bill_action_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.bill_action.id", ondelete="CASCADE")
|
||||
)
|
||||
amendment_action_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.amendment_action.id", ondelete="CASCADE")
|
||||
)
|
||||
is_selected: Mapped[bool]
|
||||
match_method: Mapped[str]
|
||||
match_reason: Mapped[str | None]
|
||||
match_confidence: Mapped[ConfidenceLevel] = mapped_column(
|
||||
_enum_column(ConfidenceLevel, name="vote_action_match_confidence")
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
server_default=func.now(),
|
||||
)
|
||||
|
||||
vote: Mapped[Vote] = relationship("Vote", back_populates="action_matches")
|
||||
bill_action: Mapped[BillAction | None] = relationship("BillAction")
|
||||
amendment_action: Mapped[AmendmentAction | None] = relationship("AmendmentAction")
|
||||
|
||||
|
||||
class VoteClassification(DataScienceDevTableBase):
|
||||
"""Normalized classification for what a vote was legally/procedurally on."""
|
||||
|
||||
__tablename__ = "vote_classification"
|
||||
__table_args__ = (
|
||||
Index("ix_vote_classification_subject_type", "subject_type"),
|
||||
Index(
|
||||
"ix_vote_classification_eligible_vote_id",
|
||||
"vote_id",
|
||||
postgresql_where=text(
|
||||
"subject_type = 'measure' "
|
||||
"AND vote_relationship = 'direct_text_vote' "
|
||||
"AND is_direct_vote_on_legislative_text "
|
||||
"AND is_substantive_policy_vote "
|
||||
"AND NOT is_special_rule"
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
vote_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.vote.id", ondelete="CASCADE"),
|
||||
unique=True,
|
||||
)
|
||||
subject_type: Mapped[SubjectType] = mapped_column(
|
||||
_enum_column(SubjectType, name="vote_subject_type")
|
||||
)
|
||||
measure_type: Mapped[str | None]
|
||||
measure_subtype: Mapped[MeasureSubtype | None] = mapped_column(
|
||||
_enum_column(MeasureSubtype, name="vote_measure_subtype")
|
||||
)
|
||||
measure_function: Mapped[MeasureFunction | None] = mapped_column(
|
||||
_enum_column(MeasureFunction, name="vote_measure_function")
|
||||
)
|
||||
vote_relationship: Mapped[VoteRelationship] = mapped_column(
|
||||
_enum_column(VoteRelationship, name="vote_relationship")
|
||||
)
|
||||
is_legislation_related: Mapped[bool]
|
||||
is_direct_vote_on_legislative_text: Mapped[bool]
|
||||
is_substantive_policy_vote: Mapped[bool]
|
||||
is_lawmaking_vehicle: Mapped[bool]
|
||||
is_special_rule: Mapped[bool]
|
||||
classification_method: Mapped[ClassificationMethod] = mapped_column(
|
||||
_enum_column(ClassificationMethod, name="vote_classification_method")
|
||||
)
|
||||
classification_confidence_reason: Mapped[str | None]
|
||||
confidence: Mapped[ConfidenceLevel] = mapped_column(
|
||||
_enum_column(ConfidenceLevel, name="vote_classification_confidence")
|
||||
)
|
||||
classified_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
classification_version: Mapped[str]
|
||||
|
||||
vote: Mapped[Vote] = relationship("Vote", back_populates="classification")
|
||||
|
||||
|
||||
class VoteMeasureLink(DataScienceDevTableBase):
|
||||
"""Relationship between a classified vote and one bill/resolution measure."""
|
||||
|
||||
__tablename__ = "vote_measure_link"
|
||||
__table_args__ = (
|
||||
Index("ix_vote_measure_link_vote_id", "vote_id"),
|
||||
Index("ix_vote_measure_link_vote_id_role", "vote_id", "role"),
|
||||
Index("ix_vote_measure_link_measure_id_role", "measure_id", "role"),
|
||||
)
|
||||
|
||||
vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
|
||||
measure_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
|
||||
role: Mapped[VoteMeasureRole] = mapped_column(
|
||||
_enum_column(VoteMeasureRole, name="vote_measure_role")
|
||||
)
|
||||
source: Mapped[str]
|
||||
confidence: Mapped[ConfidenceLevel] = mapped_column(
|
||||
_enum_column(ConfidenceLevel, name="vote_measure_link_confidence")
|
||||
)
|
||||
notes: Mapped[str | None]
|
||||
|
||||
vote: Mapped[Vote] = relationship("Vote", back_populates="vote_measure_links")
|
||||
measure: Mapped[Bill] = relationship("Bill", back_populates="vote_measure_links")
|
||||
|
||||
|
||||
class VoteTextTarget(DataScienceDevTableBase):
|
||||
"""Official text target, if any, resolved for one classified vote."""
|
||||
|
||||
__tablename__ = "vote_text_target"
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"ix_vote_text_target_voted_text_version_id",
|
||||
"voted_text_version_id",
|
||||
postgresql_where=text("voted_text_version_id IS NOT NULL"),
|
||||
),
|
||||
)
|
||||
|
||||
vote_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.vote.id", ondelete="CASCADE"),
|
||||
unique=True,
|
||||
)
|
||||
text_target_type: Mapped[TextTargetType] = mapped_column(
|
||||
_enum_column(TextTargetType, name="vote_text_target_type")
|
||||
)
|
||||
voted_text_version_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.bill_text.id", ondelete="SET NULL")
|
||||
)
|
||||
resulting_text_version_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.bill_text.id", ondelete="SET NULL")
|
||||
)
|
||||
related_amendment_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.amendment.id", ondelete="SET NULL")
|
||||
)
|
||||
text_target_basis: Mapped[TextTargetBasis] = mapped_column(
|
||||
_enum_column(TextTargetBasis, name="vote_text_target_basis")
|
||||
)
|
||||
text_resolution_method: Mapped[TextResolutionMethod] = mapped_column(
|
||||
_enum_column(TextResolutionMethod, name="vote_text_resolution_method")
|
||||
)
|
||||
text_resolution_confidence_reason: Mapped[str | None]
|
||||
confidence: Mapped[ConfidenceLevel] = mapped_column(
|
||||
_enum_column(ConfidenceLevel, name="vote_text_target_confidence")
|
||||
)
|
||||
notes: Mapped[str | None]
|
||||
|
||||
vote: Mapped[Vote] = relationship("Vote", back_populates="text_target")
|
||||
voted_text_version: Mapped[BillText | None] = relationship(
|
||||
"BillText",
|
||||
foreign_keys=[voted_text_version_id],
|
||||
)
|
||||
resulting_text_version: Mapped[BillText | None] = relationship(
|
||||
"BillText",
|
||||
foreign_keys=[resulting_text_version_id],
|
||||
)
|
||||
related_amendment: Mapped[Amendment | None] = relationship("Amendment")
|
||||
|
||||
|
||||
class VotePositionMeaning(DataScienceDevTableBase):
|
||||
"""Meaning of Yea/Nay/Present positions for one classified vote."""
|
||||
|
||||
__tablename__ = "vote_position_meaning"
|
||||
|
||||
vote_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.vote.id", ondelete="CASCADE"),
|
||||
unique=True,
|
||||
)
|
||||
yea_effect: Mapped[VoteEffect] = mapped_column(
|
||||
_enum_column(VoteEffect, name="vote_yea_effect")
|
||||
)
|
||||
nay_effect: Mapped[VoteEffect] = mapped_column(
|
||||
_enum_column(VoteEffect, name="vote_nay_effect")
|
||||
)
|
||||
present_effect: Mapped[VoteEffect] = mapped_column(
|
||||
_enum_column(VoteEffect, name="vote_present_effect")
|
||||
)
|
||||
polarity_confidence: Mapped[ConfidenceLevel] = mapped_column(
|
||||
_enum_column(ConfidenceLevel, name="vote_polarity_confidence")
|
||||
)
|
||||
polarity_method: Mapped[str]
|
||||
notes: Mapped[str | None]
|
||||
|
||||
vote: Mapped[Vote] = relationship("Vote", back_populates="position_meaning")
|
||||
|
||||
|
||||
class VoteContextAudit(DataScienceDevTableBase):
|
||||
"""Audit/event row for ambiguous or noteworthy vote-context decisions."""
|
||||
|
||||
__tablename__ = "vote_context_audit"
|
||||
__table_args__ = (
|
||||
Index("ix_vote_context_audit_vote_id", "vote_id"),
|
||||
Index("ix_vote_context_audit_severity_vote_id", "severity", "vote_id"),
|
||||
)
|
||||
|
||||
vote_id: Mapped[int] = mapped_column(ForeignKey("main.vote.id", ondelete="CASCADE"))
|
||||
step: Mapped[str]
|
||||
message: Mapped[str]
|
||||
severity: Mapped[str]
|
||||
source_path: Mapped[str | None]
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
server_default=func.now(),
|
||||
)
|
||||
|
||||
vote: Mapped[Vote] = relationship("Vote", back_populates="context_audit_rows")
|
||||
@@ -5,12 +5,13 @@ from __future__ import annotations
|
||||
from datetime import date
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import ForeignKey, Text
|
||||
from sqlalchemy import ForeignKey, Index, Text, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from pipelines.orm.data_science_dev.base import DataScienceDevTableBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipelines.orm.data_science_dev.congress.context import ScoreRun
|
||||
from pipelines.orm.data_science_dev.congress.vote import VoteRecord
|
||||
|
||||
|
||||
@@ -18,6 +19,7 @@ class Legislator(DataScienceDevTableBase):
|
||||
"""Members of Congress with identification and current term info."""
|
||||
|
||||
__tablename__ = "legislator"
|
||||
__table_args__ = (Index("ix_legislator_current_chamber", "current_chamber"),)
|
||||
|
||||
bioguide_id: Mapped[str] = mapped_column(Text, unique=True, index=True)
|
||||
|
||||
@@ -50,6 +52,11 @@ class Legislator(DataScienceDevTableBase):
|
||||
back_populates="legislator",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
scores: Mapped[list[LegislatorScore]] = relationship(
|
||||
"LegislatorScore",
|
||||
back_populates="legislator",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class LegislatorSocialMedia(DataScienceDevTableBase):
|
||||
@@ -66,3 +73,59 @@ class LegislatorSocialMedia(DataScienceDevTableBase):
|
||||
legislator: Mapped[Legislator] = relationship(
|
||||
back_populates="social_media_accounts"
|
||||
)
|
||||
|
||||
|
||||
class LegislatorScore(DataScienceDevTableBase):
|
||||
"""Computed topic score for a legislator in one calendar year."""
|
||||
|
||||
__tablename__ = "legislator_score"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"legislator_id",
|
||||
"year",
|
||||
"topic",
|
||||
name="uq_legislator_score_legislator_id_year_topic",
|
||||
),
|
||||
Index("ix_legislator_score_year_topic", "year", "topic"),
|
||||
)
|
||||
|
||||
legislator_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.legislator.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
score_run_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.score_run.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
year: Mapped[int]
|
||||
topic: Mapped[str]
|
||||
score: Mapped[float]
|
||||
|
||||
legislator: Mapped[Legislator] = relationship(back_populates="scores")
|
||||
score_run: Mapped[ScoreRun | None] = relationship(
|
||||
"ScoreRun",
|
||||
back_populates="scores",
|
||||
)
|
||||
|
||||
|
||||
class LegislatorScoreFake(DataScienceDevTableBase):
|
||||
"""Computed topic score for a legislator in one calendar year."""
|
||||
|
||||
__tablename__ = "legislator_score_fake"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"legislator_id",
|
||||
"year",
|
||||
"topic",
|
||||
name="uq_legislator_score_fake_legislator_id_year_topic",
|
||||
),
|
||||
Index("ix_legislator_score_fake_year_topic", "year", "topic"),
|
||||
)
|
||||
|
||||
legislator_id: Mapped[int] = mapped_column(
|
||||
ForeignKey("main.legislator.id", ondelete="CASCADE"),
|
||||
index=True,
|
||||
)
|
||||
year: Mapped[int]
|
||||
topic: Mapped[str]
|
||||
score: Mapped[float]
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
"""Vote model - roll call votes in Congress."""
|
||||
"""Vote models for raw roll-call data and member positions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from datetime import date, datetime
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy import DateTime, ForeignKey, Index, UniqueConstraint
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from pipelines.orm.data_science_dev.base import (
|
||||
@@ -14,9 +15,15 @@ from pipelines.orm.data_science_dev.base import (
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pipelines.orm.data_science_dev.congress.bill import Bill
|
||||
from pipelines.orm.data_science_dev.congress.context import (
|
||||
VoteActionMatch,
|
||||
VoteClassification,
|
||||
VoteContextAudit,
|
||||
VoteMeasureLink,
|
||||
VotePositionMeaning,
|
||||
VoteTextTarget,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.congress.legislator import Legislator
|
||||
from pipelines.orm.data_science_dev.congress.vote import Vote
|
||||
|
||||
|
||||
class VoteRecord(DataScienceDevBase):
|
||||
@@ -41,14 +48,26 @@ class VoteRecord(DataScienceDevBase):
|
||||
|
||||
|
||||
class Vote(DataScienceDevTableBase):
|
||||
"""Roll call votes with counts and optional bill linkage."""
|
||||
"""Raw roll call vote facts from House or Senate vote sources."""
|
||||
|
||||
__tablename__ = "vote"
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress",
|
||||
"chamber",
|
||||
"session_number",
|
||||
"roll_number",
|
||||
name="uq_vote_congress_chamber_session_number_roll_number",
|
||||
),
|
||||
Index("ix_vote_date", "vote_date"),
|
||||
Index("ix_vote_congress_chamber", "congress", "chamber"),
|
||||
)
|
||||
|
||||
congress: Mapped[int]
|
||||
chamber: Mapped[str]
|
||||
session: Mapped[int]
|
||||
number: Mapped[int]
|
||||
session_year: Mapped[int]
|
||||
session_number: Mapped[int]
|
||||
roll_number: Mapped[int]
|
||||
|
||||
vote_type: Mapped[str | None]
|
||||
question: Mapped[str | None]
|
||||
@@ -56,29 +75,57 @@ class Vote(DataScienceDevTableBase):
|
||||
result_text: Mapped[str | None]
|
||||
|
||||
vote_date: Mapped[date]
|
||||
vote_datetime: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
raw_vote_source_url: Mapped[str | None]
|
||||
|
||||
yea_count: Mapped[int | None]
|
||||
nay_count: Mapped[int | None]
|
||||
not_voting_count: Mapped[int | None]
|
||||
present_count: Mapped[int | None]
|
||||
|
||||
bill_id: Mapped[int | None] = mapped_column(ForeignKey("main.bill.id"))
|
||||
raw_bill_ref: Mapped[dict | None] = mapped_column(JSONB)
|
||||
raw_amendment_ref: Mapped[dict | None] = mapped_column(JSONB)
|
||||
raw_nomination_ref: Mapped[dict | None] = mapped_column(JSONB)
|
||||
raw_treaty_ref: Mapped[dict | None] = mapped_column(JSONB)
|
||||
raw_vote_source_artifact_id: Mapped[int | None] = mapped_column(
|
||||
ForeignKey("main.source_artifact.id", ondelete="SET NULL")
|
||||
)
|
||||
|
||||
bill: Mapped[Bill | None] = relationship("Bill", back_populates="votes")
|
||||
vote_records: Mapped[list[VoteRecord]] = relationship(
|
||||
"VoteRecord",
|
||||
back_populates="vote",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint(
|
||||
"congress",
|
||||
"chamber",
|
||||
"session",
|
||||
"number",
|
||||
name="uq_vote_congress_chamber_session_number",
|
||||
),
|
||||
Index("ix_vote_date", "vote_date"),
|
||||
Index("ix_vote_congress_chamber", "congress", "chamber"),
|
||||
action_matches: Mapped[list[VoteActionMatch]] = relationship(
|
||||
"VoteActionMatch",
|
||||
back_populates="vote",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
classification: Mapped[VoteClassification | None] = relationship(
|
||||
"VoteClassification",
|
||||
back_populates="vote",
|
||||
cascade="all, delete-orphan",
|
||||
uselist=False,
|
||||
)
|
||||
vote_measure_links: Mapped[list[VoteMeasureLink]] = relationship(
|
||||
"VoteMeasureLink",
|
||||
back_populates="vote",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
text_target: Mapped[VoteTextTarget | None] = relationship(
|
||||
"VoteTextTarget",
|
||||
back_populates="vote",
|
||||
cascade="all, delete-orphan",
|
||||
uselist=False,
|
||||
)
|
||||
position_meaning: Mapped[VotePositionMeaning | None] = relationship(
|
||||
"VotePositionMeaning",
|
||||
back_populates="vote",
|
||||
cascade="all, delete-orphan",
|
||||
uselist=False,
|
||||
)
|
||||
context_audit_rows: Mapped[list[VoteContextAudit]] = relationship(
|
||||
"VoteContextAudit",
|
||||
back_populates="vote",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
@@ -2,15 +2,81 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pipelines.orm.data_science_dev.congress import Bill, BillText, Legislator, Vote, VoteRecord
|
||||
from pipelines.orm.data_science_dev.congress import (
|
||||
Amendment,
|
||||
AmendmentAction,
|
||||
AmendmentActionRecordedVote,
|
||||
Bill,
|
||||
BillAction,
|
||||
BillActionRecordedVote,
|
||||
BillRelation,
|
||||
BillText,
|
||||
BillTopic,
|
||||
BillTopicPosition,
|
||||
ClassificationMethod,
|
||||
ConfidenceLevel,
|
||||
IngestRun,
|
||||
Legislator,
|
||||
LegislatorScore,
|
||||
MeasureFunction,
|
||||
MeasureSubtype,
|
||||
ScoreRun,
|
||||
SourceArtifact,
|
||||
SubjectType,
|
||||
TextResolutionMethod,
|
||||
TextTargetBasis,
|
||||
TextTargetType,
|
||||
Vote,
|
||||
VoteActionMatch,
|
||||
VoteActionScope,
|
||||
VoteClassification,
|
||||
VoteContextAudit,
|
||||
VoteEffect,
|
||||
VoteMeasureLink,
|
||||
VoteMeasureRole,
|
||||
VotePositionMeaning,
|
||||
VoteRelationship,
|
||||
VoteRecord,
|
||||
VoteTextTarget,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.posts import partitions # noqa: F401 — registers partition classes in metadata
|
||||
from pipelines.orm.data_science_dev.posts.tables import Posts
|
||||
|
||||
__all__ = [
|
||||
"Amendment",
|
||||
"AmendmentAction",
|
||||
"AmendmentActionRecordedVote",
|
||||
"Bill",
|
||||
"BillAction",
|
||||
"BillActionRecordedVote",
|
||||
"BillRelation",
|
||||
"BillText",
|
||||
"BillTopic",
|
||||
"BillTopicPosition",
|
||||
"ClassificationMethod",
|
||||
"ConfidenceLevel",
|
||||
"IngestRun",
|
||||
"Legislator",
|
||||
"LegislatorScore",
|
||||
"MeasureFunction",
|
||||
"MeasureSubtype",
|
||||
"Posts",
|
||||
"ScoreRun",
|
||||
"SourceArtifact",
|
||||
"SubjectType",
|
||||
"TextResolutionMethod",
|
||||
"TextTargetBasis",
|
||||
"TextTargetType",
|
||||
"Vote",
|
||||
"VoteActionMatch",
|
||||
"VoteActionScope",
|
||||
"VoteClassification",
|
||||
"VoteContextAudit",
|
||||
"VoteEffect",
|
||||
"VoteMeasureLink",
|
||||
"VoteMeasureRole",
|
||||
"VotePositionMeaning",
|
||||
"VoteRelationship",
|
||||
"VoteRecord",
|
||||
"VoteTextTarget",
|
||||
]
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pipelines.orm.data_science_dev.posts.failed_ingestion import FailedIngestion
|
||||
from pipelines.orm.data_science_dev.posts.tables import Posts
|
||||
from pipelines.orm.data_science_dev.posts.tables import Posts, PostTopic
|
||||
|
||||
__all__ = [
|
||||
"FailedIngestion",
|
||||
"Posts",
|
||||
"PostTopic",
|
||||
]
|
||||
|
||||
@@ -0,0 +1,195 @@
|
||||
"""Shared language filter constants for post sampling queries."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
ENGLISH_LANGS = (
|
||||
'["", "", ""]',
|
||||
'[""]',
|
||||
"[]",
|
||||
'["", "eng"]',
|
||||
'["eng", "", ""]',
|
||||
'["eng", ""]',
|
||||
'["eng"]',
|
||||
'["eng", "aar"]',
|
||||
'["eng", "abk", "afr"]',
|
||||
'["eng", "afr"]',
|
||||
'["eng", "afr", "abk"]',
|
||||
'["eng", "afr", "anp"]',
|
||||
'["eng", "afr", "ber"]',
|
||||
'["eng", "afr", "dan"]',
|
||||
'["eng", "afr", "deu"]',
|
||||
'["eng", "afr", "est"]',
|
||||
'["eng", "afr", "fra"]',
|
||||
'["eng", "afr", "ind"]',
|
||||
'["eng", "afr", "lat"]',
|
||||
'["eng", "afr", "nld"]',
|
||||
'["eng", "afr", "nor"]',
|
||||
'["eng", "afr", "pol"]',
|
||||
'["eng", "afr", "por"]',
|
||||
'["eng", "afr", "ron"]',
|
||||
'["eng", "afr", "slk"]',
|
||||
'["eng", "afr", "spa"]',
|
||||
'["eng", "afr", "tgl"]',
|
||||
'["eng", "afr", "tuk"]',
|
||||
'["eng", "afr", "tur"]',
|
||||
'["eng", "afr", "ukr"]',
|
||||
'["eng", "afr", "vol"]',
|
||||
'["eng", "agq"]',
|
||||
'["eng", "ain"]',
|
||||
'["eng", "ain", "amh"]',
|
||||
'["eng", "ain", "jpn"]',
|
||||
'["eng", "aka"]',
|
||||
'["eng", "amh"]',
|
||||
'["eng", "amh", "afr"]',
|
||||
'["eng", "amh", "ara"]',
|
||||
'["eng", "amh", "fra"]',
|
||||
'["eng", "anp"]',
|
||||
'["eng", "anp", "hye"]',
|
||||
'["eng", "anp", "sqi"]',
|
||||
'["eng", "", "ara"]',
|
||||
'["eng", "ara", ""]',
|
||||
'["eng", "ara"]',
|
||||
'["eng", "ara", "afr"]',
|
||||
'["eng", "ara", "anp"]',
|
||||
'["eng", "ara", "ars"]',
|
||||
'["eng", "ara", "bul"]',
|
||||
'["eng", "ara", "cat"]',
|
||||
'["eng", "ara", "deu"]',
|
||||
'["eng", "ara", "ell"]',
|
||||
'["eng", "ara", "fas"]',
|
||||
'["eng", "ara", "fra"]',
|
||||
'["eng", "ara", "heb"]',
|
||||
'["eng", "ara", "hin"]',
|
||||
'["eng", "ara", "ind"]',
|
||||
'["eng", "ara", "ita"]',
|
||||
'["eng", "ara", "jpn"]',
|
||||
'["eng", "ara", "kas"]',
|
||||
'["eng", "ara", "kor"]',
|
||||
'["eng", "ara", "nob"]',
|
||||
'["eng", "ara", "nor"]',
|
||||
'["eng", "ara", "rus"]',
|
||||
'["eng", "ara", "spa"]',
|
||||
'["eng", "ara", "swe"]',
|
||||
'["eng", "ara", "tam"]',
|
||||
'["eng", "ara", "tur"]',
|
||||
'["eng", "ara", "urd"]',
|
||||
'["eng", "ara", "zho"]',
|
||||
'["eng", "arg"]',
|
||||
'["eng", "arg", "amh"]',
|
||||
'["eng", "arg", "aze"]',
|
||||
'["eng", "ars"]',
|
||||
'["eng", "ars", "ara"]',
|
||||
'["eng", "asm"]',
|
||||
'["eng", "ava", "sqi"]',
|
||||
'["eng", "ave"]',
|
||||
'["eng", "aze"]',
|
||||
'["eng", "aze", "deu"]',
|
||||
'["eng", "aze", "hye"]',
|
||||
'["eng", "aze", "ita"]',
|
||||
'["eng", "aze", "rus"]',
|
||||
'["eng", "bam", ""]',
|
||||
'["eng", "bel"]',
|
||||
'["eng", "bel", "rus"]',
|
||||
'["eng", "ben"]',
|
||||
'["eng", "ben", "deu"]',
|
||||
'["eng", "ben", "fra"]',
|
||||
'["eng", "ben", "hin"]',
|
||||
'["eng", "ben", "mya"]',
|
||||
'["eng", "ber"]',
|
||||
'["eng", "ber", "afr"]',
|
||||
'["eng", "ber", "deu"]',
|
||||
'["eng", "ber", "est"]',
|
||||
'["eng", "ber", "hun"]',
|
||||
'["eng", "ber", "isl"]',
|
||||
'["eng", "ber", "jpn"]',
|
||||
'["eng", "ber", "lat"]',
|
||||
'["eng", "ber", "nor"]',
|
||||
'["eng", "ber", "pol"]',
|
||||
'["eng", "ber", "por"]',
|
||||
'["eng", "ber", "ron"]',
|
||||
'["eng", "ber", "run"]',
|
||||
'["eng", "ber", "slk"]',
|
||||
'["eng", "ber", "spa"]',
|
||||
'["eng", "ber", "tgl"]',
|
||||
'["eng", "ber", "tlh"]',
|
||||
'["eng", "ber", "tuk"]',
|
||||
'["eng", "bod"]',
|
||||
'["eng", "bod", "nep"]',
|
||||
'["eng", "bos", "hrv"]',
|
||||
'["eng", "bos", "srp"]',
|
||||
'["eng", "bul"]',
|
||||
'["eng", "bul", "deu"]',
|
||||
'["eng", "bul", "fra"]',
|
||||
'["eng", "bul", "jpn"]',
|
||||
'["eng", "bul", "mkd"]',
|
||||
'["eng", "bul", "mri"]',
|
||||
'["eng", "bul", "nld"]',
|
||||
'["eng", "bul", "rus"]',
|
||||
'["eng", "bul", "srp"]',
|
||||
'["eng", "cat"]',
|
||||
'["eng", "cat", "fra"]',
|
||||
'["eng", "cat", "ind"]',
|
||||
'["eng", "cat", "isl"]',
|
||||
'["eng", "cat", "jpn"]',
|
||||
'["eng", "cat", "nld"]',
|
||||
'["eng", "cat", "spa"]',
|
||||
'["eng", "ces"]',
|
||||
'["eng", "ces", "deu"]',
|
||||
'["eng", "ces", "ell"]',
|
||||
'["eng", "ces", "haw"]',
|
||||
'["eng", "ces", "ind"]',
|
||||
'["eng", "ces", "ita"]',
|
||||
'["eng", "ces", "jpn"]',
|
||||
'["eng", "ces", "por"]',
|
||||
'["eng", "ces", "rus"]',
|
||||
'["eng", "ces", "slk"]',
|
||||
'["eng", "ces", "spa"]',
|
||||
'["eng", "ces", "tuk"]',
|
||||
'["eng", "cha"]',
|
||||
'["eng", "chr"]',
|
||||
'["eng", "chr", "ara"]',
|
||||
'["eng", "chr", "deu"]',
|
||||
'["eng", "chr", "ell"]',
|
||||
'["eng", "chr", "fil"]',
|
||||
'["eng", "chr", "isl"]',
|
||||
'["eng", "chr", "kor"]',
|
||||
'["eng", "chr", "rus"]',
|
||||
'["eng", "chr", "spa"]',
|
||||
'["eng", "chr", "zho"]',
|
||||
'["eng", "chu", "oci"]',
|
||||
'["eng", "cor"]',
|
||||
'["eng", "", "cos"]',
|
||||
'["eng", "cos"]',
|
||||
'["eng", "cym"]',
|
||||
'["eng", "cym", "deu"]',
|
||||
'["eng", "cym", "fra"]',
|
||||
'["eng", "cym", "jpn"]',
|
||||
'["eng", "cym", "spa"]',
|
||||
'["eng", "cym", "zho"]',
|
||||
'["eng", "dan"]',
|
||||
'["eng", "dan", "ber"]',
|
||||
'["eng", "dan", "deu"]',
|
||||
'["eng", "dan", "ell"]',
|
||||
'["eng", "dan", "est"]',
|
||||
'["eng", "dan", "fas"]',
|
||||
'["eng", "dan", "fin"]',
|
||||
'["eng", "dan", "fra"]',
|
||||
'["eng", "dan", "gle"]',
|
||||
'["eng", "dan", "hun"]',
|
||||
'["eng", "dan", "isl"]',
|
||||
'["eng", "dan", "ita"]',
|
||||
'["eng", "dan", "jpn"]',
|
||||
'["eng", "dan", "lat"]',
|
||||
'["eng", "dan", "nld"]',
|
||||
'["eng", "dan", "nob"]',
|
||||
'["eng", "dan", "nor"]',
|
||||
'["eng", "dan", "por"]',
|
||||
'["eng", "dan", "rus"]',
|
||||
'["eng", "dan", "slk"]',
|
||||
'["eng", "dan", "spa"]',
|
||||
'["eng", "dan", "swe"]',
|
||||
'["eng", "dan", "tuk"]',
|
||||
'["eng", "dan", "zho"]',
|
||||
'["eng", "deu", ""]',
|
||||
'["eng", "deu"]',
|
||||
)
|
||||
@@ -1,13 +1,36 @@
|
||||
"""Posts parent table with PostgreSQL weekly range partitioning on date column."""
|
||||
"""Posts parent table and PostTopic table for the data_science_dev database."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pipelines.orm.data_science_dev.base import DataScienceDevBase
|
||||
from pipelines.orm.data_science_dev.base import (
|
||||
DataScienceDevBase,
|
||||
DataScienceDevTableBase,
|
||||
)
|
||||
from pipelines.orm.data_science_dev.posts.columns import PostsColumns
|
||||
|
||||
|
||||
from sqlalchemy import BigInteger, Index, SmallInteger
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
|
||||
class Posts(PostsColumns, DataScienceDevBase):
|
||||
"""Parent partitioned table for posts, partitioned by week on `date`."""
|
||||
|
||||
__tablename__ = "posts"
|
||||
__table_args__ = ({"postgresql_partition_by": "RANGE (date)"},)
|
||||
|
||||
|
||||
class PostTopic(DataScienceDevTableBase):
|
||||
"""Stores BERTopic topic assignments for posts.
|
||||
|
||||
post_id references main.posts but without a FK constraint
|
||||
since posts is a partitioned table.
|
||||
"""
|
||||
|
||||
__tablename__ = "post_topic"
|
||||
__table_args__ = (Index("ix_post_topic_post_id", "post_id"),)
|
||||
|
||||
post_id: Mapped[int] = mapped_column(BigInteger)
|
||||
topic_id: Mapped[int] = mapped_column(SmallInteger)
|
||||
topic_label: Mapped[str | None]
|
||||
model_version: Mapped[str | None]
|
||||
|
||||
Reference in New Issue
Block a user