weave/pipelines/web/scoring.py

"""Issue matching and voting score helpers."""

from __future__ import annotations

from dataclasses import dataclass

from sqlalchemy import ColumnElement, false, func, or_
from sqlalchemy.sql.elements import BinaryExpression

from pipelines.orm.data_science_dev.congress import Bill, BillTopicPosition, Vote

SUPPORT_POSITIONS = frozenset({"yea", "aye", "yes"})
OPPOSE_POSITIONS = frozenset({"nay", "no"})


@dataclass(frozen=True)
class ScoreCounts:
    """Support/opposition counts for one legislator or time bucket."""

    supportive: int
    opposed: int

    @property
    def total(self) -> int:
        return self.supportive + self.opposed


def normalize_position(position: str | None) -> str | None:
    """Normalize a raw roll-call position into support/oppose/ignore buckets."""
    if position is None:
        return None
    value = position.strip().lower()
    if value in SUPPORT_POSITIONS:
        return "support"
    if value in OPPOSE_POSITIONS:
        return "oppose"
    return None


def score_vote_position(
    position: str | None,
    support_position: BillTopicPosition | str,
) -> str | None:
    """Score a raw vote as support/opposition for an extracted bill topic."""
    normalized_vote = normalize_position(position)
    if normalized_vote is None:
        return None

    topic_position = BillTopicPosition(support_position)
    if topic_position is BillTopicPosition.FOR:
        return normalized_vote
    if normalized_vote == "support":
        return "oppose"
    return "support"


def calculate_score(counts: ScoreCounts) -> int | None:
    """Calculate the 0-100 support score, or None when there are no scored votes."""
    if counts.total == 0:
        return None
    return round(100 * counts.supportive / counts.total)


def normalize_issues(issues: list[str] | tuple[str, ...]) -> list[str]:
    """Trim, de-duplicate, and preserve issue order for display and queries."""
    normalized: list[str] = []
    seen: set[str] = set()
    for issue in issues:
        value = issue.strip()
        key = value.casefold()
        if value and key not in seen:
            normalized.append(value)
            seen.add(key)
    return normalized


def issue_match_condition(issues: list[str] | tuple[str, ...]) -> ColumnElement[bool]:
    """Build the SQLAlchemy condition for issue text matching."""
    normalized = normalize_issues(list(issues))
    if not normalized:
        return false()

    fields: tuple[ColumnElement[str | None], ...] = (
        Bill.subjects_top_term,
        Bill.title,
        Bill.title_short,
        Bill.official_title,
        Vote.question,
        Vote.result_text,
    )
    terms: list[BinaryExpression[bool]] = []
    for issue in normalized:
        pattern = f"%{issue}%"
        terms.extend(field.ilike(pattern) for field in fields)
    return or_(*terms)


def normalized_position_expression(column: ColumnElement[str]) -> ColumnElement[str | None]:
    """Lowercase and trim a SQL column containing raw vote positions."""
    return func.lower(func.trim(column))