101 lines
3.0 KiB
Python
101 lines
3.0 KiB
Python
"""Issue matching and voting score helpers."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from sqlalchemy import ColumnElement, false, func, or_
|
|
from sqlalchemy.sql.elements import BinaryExpression
|
|
|
|
from pipelines.orm.data_science_dev.congress import Bill, BillTopicPosition, Vote
|
|
|
|
SUPPORT_POSITIONS = frozenset({"yea", "aye", "yes"})
|
|
OPPOSE_POSITIONS = frozenset({"nay", "no"})
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ScoreCounts:
|
|
"""Support/opposition counts for one legislator or time bucket."""
|
|
|
|
supportive: int
|
|
opposed: int
|
|
|
|
@property
|
|
def total(self) -> int:
|
|
return self.supportive + self.opposed
|
|
|
|
|
|
def normalize_position(position: str | None) -> str | None:
|
|
"""Normalize a raw roll-call position into support/oppose/ignore buckets."""
|
|
if position is None:
|
|
return None
|
|
value = position.strip().lower()
|
|
if value in SUPPORT_POSITIONS:
|
|
return "support"
|
|
if value in OPPOSE_POSITIONS:
|
|
return "oppose"
|
|
return None
|
|
|
|
|
|
def score_vote_position(
|
|
position: str | None,
|
|
support_position: BillTopicPosition | str,
|
|
) -> str | None:
|
|
"""Score a raw vote as support/opposition for an extracted bill topic."""
|
|
normalized_vote = normalize_position(position)
|
|
if normalized_vote is None:
|
|
return None
|
|
|
|
topic_position = BillTopicPosition(support_position)
|
|
if topic_position is BillTopicPosition.FOR:
|
|
return normalized_vote
|
|
if normalized_vote == "support":
|
|
return "oppose"
|
|
return "support"
|
|
|
|
|
|
def calculate_score(counts: ScoreCounts) -> int | None:
|
|
"""Calculate the 0-100 support score, or None when there are no scored votes."""
|
|
if counts.total == 0:
|
|
return None
|
|
return round(100 * counts.supportive / counts.total)
|
|
|
|
|
|
def normalize_issues(issues: list[str] | tuple[str, ...]) -> list[str]:
|
|
"""Trim, de-duplicate, and preserve issue order for display and queries."""
|
|
normalized: list[str] = []
|
|
seen: set[str] = set()
|
|
for issue in issues:
|
|
value = issue.strip()
|
|
key = value.casefold()
|
|
if value and key not in seen:
|
|
normalized.append(value)
|
|
seen.add(key)
|
|
return normalized
|
|
|
|
|
|
def issue_match_condition(issues: list[str] | tuple[str, ...]) -> ColumnElement[bool]:
|
|
"""Build the SQLAlchemy condition for issue text matching."""
|
|
normalized = normalize_issues(list(issues))
|
|
if not normalized:
|
|
return false()
|
|
|
|
fields: tuple[ColumnElement[str | None], ...] = (
|
|
Bill.subjects_top_term,
|
|
Bill.title,
|
|
Bill.title_short,
|
|
Bill.official_title,
|
|
Vote.question,
|
|
Vote.result_text,
|
|
)
|
|
terms: list[BinaryExpression[bool]] = []
|
|
for issue in normalized:
|
|
pattern = f"%{issue}%"
|
|
terms.extend(field.ilike(pattern) for field in fields)
|
|
return or_(*terms)
|
|
|
|
|
|
def normalized_position_expression(column: ColumnElement[str]) -> ColumnElement[str | None]:
|
|
"""Lowercase and trim a SQL column containing raw vote positions."""
|
|
return func.lower(func.trim(column))
|