Files
weave/pipelines/web/scoring.py
T
2026-04-28 22:50:53 -04:00

101 lines
3.0 KiB
Python

"""Issue matching and voting score helpers."""
from __future__ import annotations
from dataclasses import dataclass
from sqlalchemy import ColumnElement, false, func, or_
from sqlalchemy.sql.elements import BinaryExpression
from pipelines.orm.data_science_dev.congress import Bill, BillTopicPosition, Vote
SUPPORT_POSITIONS = frozenset({"yea", "aye", "yes"})
OPPOSE_POSITIONS = frozenset({"nay", "no"})
@dataclass(frozen=True)
class ScoreCounts:
"""Support/opposition counts for one legislator or time bucket."""
supportive: int
opposed: int
@property
def total(self) -> int:
return self.supportive + self.opposed
def normalize_position(position: str | None) -> str | None:
"""Normalize a raw roll-call position into support/oppose/ignore buckets."""
if position is None:
return None
value = position.strip().lower()
if value in SUPPORT_POSITIONS:
return "support"
if value in OPPOSE_POSITIONS:
return "oppose"
return None
def score_vote_position(
position: str | None,
support_position: BillTopicPosition | str,
) -> str | None:
"""Score a raw vote as support/opposition for an extracted bill topic."""
normalized_vote = normalize_position(position)
if normalized_vote is None:
return None
topic_position = BillTopicPosition(support_position)
if topic_position is BillTopicPosition.FOR:
return normalized_vote
if normalized_vote == "support":
return "oppose"
return "support"
def calculate_score(counts: ScoreCounts) -> int | None:
"""Calculate the 0-100 support score, or None when there are no scored votes."""
if counts.total == 0:
return None
return round(100 * counts.supportive / counts.total)
def normalize_issues(issues: list[str] | tuple[str, ...]) -> list[str]:
"""Trim, de-duplicate, and preserve issue order for display and queries."""
normalized: list[str] = []
seen: set[str] = set()
for issue in issues:
value = issue.strip()
key = value.casefold()
if value and key not in seen:
normalized.append(value)
seen.add(key)
return normalized
def issue_match_condition(issues: list[str] | tuple[str, ...]) -> ColumnElement[bool]:
"""Build the SQLAlchemy condition for issue text matching."""
normalized = normalize_issues(list(issues))
if not normalized:
return false()
fields: tuple[ColumnElement[str | None], ...] = (
Bill.subjects_top_term,
Bill.title,
Bill.title_short,
Bill.official_title,
Vote.question,
Vote.result_text,
)
terms: list[BinaryExpression[bool]] = []
for issue in normalized:
pattern = f"%{issue}%"
terms.extend(field.ilike(pattern) for field in fields)
return or_(*terms)
def normalized_position_expression(column: ColumnElement[str]) -> ColumnElement[str | None]:
"""Lowercase and trim a SQL column containing raw vote positions."""
return func.lower(func.trim(column))