"""Issue matching and voting score helpers.""" from __future__ import annotations from dataclasses import dataclass from sqlalchemy import ColumnElement, false, func, or_ from sqlalchemy.sql.elements import BinaryExpression from pipelines.orm.data_science_dev.congress import Bill, BillTopicPosition, Vote SUPPORT_POSITIONS = frozenset({"yea", "aye", "yes"}) OPPOSE_POSITIONS = frozenset({"nay", "no"}) @dataclass(frozen=True) class ScoreCounts: """Support/opposition counts for one legislator or time bucket.""" supportive: int opposed: int @property def total(self) -> int: return self.supportive + self.opposed def normalize_position(position: str | None) -> str | None: """Normalize a raw roll-call position into support/oppose/ignore buckets.""" if position is None: return None value = position.strip().lower() if value in SUPPORT_POSITIONS: return "support" if value in OPPOSE_POSITIONS: return "oppose" return None def score_vote_position( position: str | None, support_position: BillTopicPosition | str, ) -> str | None: """Score a raw vote as support/opposition for an extracted bill topic.""" normalized_vote = normalize_position(position) if normalized_vote is None: return None topic_position = BillTopicPosition(support_position) if topic_position is BillTopicPosition.FOR: return normalized_vote if normalized_vote == "support": return "oppose" return "support" def calculate_score(counts: ScoreCounts) -> int | None: """Calculate the 0-100 support score, or None when there are no scored votes.""" if counts.total == 0: return None return round(100 * counts.supportive / counts.total) def normalize_issues(issues: list[str] | tuple[str, ...]) -> list[str]: """Trim, de-duplicate, and preserve issue order for display and queries.""" normalized: list[str] = [] seen: set[str] = set() for issue in issues: value = issue.strip() key = value.casefold() if value and key not in seen: normalized.append(value) seen.add(key) return normalized def issue_match_condition(issues: list[str] | tuple[str, ...]) -> ColumnElement[bool]: """Build the SQLAlchemy condition for issue text matching.""" normalized = normalize_issues(list(issues)) if not normalized: return false() fields: tuple[ColumnElement[str | None], ...] = ( Bill.subjects_top_term, Bill.title, Bill.title_short, Bill.official_title, Vote.question, Vote.result_text, ) terms: list[BinaryExpression[bool]] = [] for issue in normalized: pattern = f"%{issue}%" terms.extend(field.ilike(pattern) for field in fields) return or_(*terms) def normalized_position_expression(column: ColumnElement[str]) -> ColumnElement[str | None]: """Lowercase and trim a SQL column containing raw vote positions.""" return func.lower(func.trim(column))