671 lines
19 KiB
Python
671 lines
19 KiB
Python
"""Congress database queries for the web dashboard."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import date
|
|
from typing import Literal
|
|
|
|
from sqlalchemy import ColumnElement, Select, case, desc, false, func, or_, select, true
|
|
from sqlalchemy.orm import Session
|
|
|
|
from pipelines.orm.data_science_dev.congress import (
|
|
BillTopic,
|
|
Legislator,
|
|
LegislatorScore,
|
|
Vote,
|
|
)
|
|
from pipelines.web.scoring import normalize_issues
|
|
|
|
Chamber = Literal["house", "senate", "all"]
|
|
|
|
STATE_ALIASES = {
|
|
"alabama": "AL",
|
|
"alaska": "AK",
|
|
"arizona": "AZ",
|
|
"arkansas": "AR",
|
|
"california": "CA",
|
|
"colorado": "CO",
|
|
"connecticut": "CT",
|
|
"delaware": "DE",
|
|
"florida": "FL",
|
|
"georgia": "GA",
|
|
"hawaii": "HI",
|
|
"idaho": "ID",
|
|
"illinois": "IL",
|
|
"indiana": "IN",
|
|
"iowa": "IA",
|
|
"kansas": "KS",
|
|
"kentucky": "KY",
|
|
"louisiana": "LA",
|
|
"maine": "ME",
|
|
"maryland": "MD",
|
|
"massachusetts": "MA",
|
|
"michigan": "MI",
|
|
"minnesota": "MN",
|
|
"mississippi": "MS",
|
|
"missouri": "MO",
|
|
"montana": "MT",
|
|
"nebraska": "NE",
|
|
"nevada": "NV",
|
|
"new hampshire": "NH",
|
|
"new jersey": "NJ",
|
|
"new mexico": "NM",
|
|
"new york": "NY",
|
|
"north carolina": "NC",
|
|
"north dakota": "ND",
|
|
"ohio": "OH",
|
|
"oklahoma": "OK",
|
|
"oregon": "OR",
|
|
"pennsylvania": "PA",
|
|
"rhode island": "RI",
|
|
"south carolina": "SC",
|
|
"south dakota": "SD",
|
|
"tennessee": "TN",
|
|
"texas": "TX",
|
|
"utah": "UT",
|
|
"vermont": "VT",
|
|
"virginia": "VA",
|
|
"washington": "WA",
|
|
"west virginia": "WV",
|
|
"wisconsin": "WI",
|
|
"wyoming": "WY",
|
|
"district of columbia": "DC",
|
|
}
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RankingRow:
|
|
"""A legislator support score row."""
|
|
|
|
legislator_id: int
|
|
display_name: str
|
|
party: str | None
|
|
state: str | None
|
|
chamber: str | None
|
|
score: float | None
|
|
supportive: int
|
|
opposed: int
|
|
|
|
@property
|
|
def total(self) -> int:
|
|
return self.supportive + self.opposed
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RankingResult:
|
|
"""Supportive and opposed ranking lists."""
|
|
|
|
supportive: list[RankingRow]
|
|
opposed: list[RankingRow]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class TimePoint:
|
|
"""One yearly chart point."""
|
|
|
|
year: int
|
|
score: float
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class ChartSeries:
|
|
"""One legislator score-history series."""
|
|
|
|
legislator_id: int
|
|
label: str
|
|
party: str | None
|
|
state: str | None
|
|
points: list[TimePoint]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class TopicScore:
|
|
"""Average score for one topic."""
|
|
|
|
topic: str
|
|
score: float
|
|
count: int
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class LegislatorOption:
|
|
"""Compact legislator metadata for search and comparison controls."""
|
|
|
|
legislator_id: int
|
|
display_name: str
|
|
party: str | None
|
|
state: str | None
|
|
chamber: str | None
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class LegislatorProfile:
|
|
"""Legislator metadata plus issue score summary."""
|
|
|
|
legislator: LegislatorOption
|
|
overall_score: float | None
|
|
serving_since: int | None
|
|
top_topics: list[TopicScore]
|
|
bottom_topics: list[TopicScore]
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RadarSeries:
|
|
"""One legislator polygon for the compare radar chart."""
|
|
|
|
legislator: LegislatorOption
|
|
average_score: float | None
|
|
scores_by_topic: dict[str, float]
|
|
|
|
|
|
def latest_congress(session: Session) -> int | None:
|
|
"""Return the latest congress number in the vote table."""
|
|
return session.scalar(select(func.max(Vote.congress)))
|
|
|
|
|
|
def latest_vote_date(session: Session, congress: int | None = None) -> date | None:
|
|
"""Return the most recent vote date, optionally scoped to a congress."""
|
|
stmt = select(func.max(Vote.vote_date))
|
|
if congress is not None:
|
|
stmt = stmt.where(Vote.congress == congress)
|
|
return session.scalar(stmt)
|
|
|
|
|
|
def latest_score_year(session: Session) -> int | None:
|
|
"""Return the latest year in the precomputed legislator score table."""
|
|
return session.scalar(select(func.max(LegislatorScore.year)))
|
|
|
|
|
|
def has_scores(session: Session) -> bool:
|
|
"""Return True when the database has at least one precomputed score."""
|
|
return session.scalar(select(LegislatorScore.id).limit(1)) is not None
|
|
|
|
|
|
def issue_suggestions(
|
|
session: Session,
|
|
*,
|
|
congress: int | None,
|
|
limit: int = 12,
|
|
) -> list[str]:
|
|
"""Return common precomputed score topics for issue filter suggestions."""
|
|
stmt = (
|
|
select(LegislatorScore.topic, func.count(LegislatorScore.id).label("score_count"))
|
|
.where(LegislatorScore.topic != "")
|
|
.group_by(LegislatorScore.topic)
|
|
.order_by(desc("score_count"), LegislatorScore.topic)
|
|
.limit(limit)
|
|
)
|
|
suggestions = [row[0] for row in session.execute(stmt).all()]
|
|
if suggestions:
|
|
return suggestions
|
|
|
|
fallback = (
|
|
select(BillTopic.topic, func.count(BillTopic.id).label("topic_count"))
|
|
.where(BillTopic.topic != "")
|
|
.group_by(BillTopic.topic)
|
|
.order_by(desc("topic_count"), BillTopic.topic)
|
|
.limit(limit)
|
|
)
|
|
return [row[0] for row in session.execute(fallback).all()]
|
|
|
|
|
|
def ranking_query(
|
|
*,
|
|
issues: list[str],
|
|
chamber: Chamber,
|
|
congress: int,
|
|
) -> Select:
|
|
"""Build the aggregate ranking query from precomputed scores."""
|
|
average_score = func.avg(LegislatorScore.score).label("score")
|
|
supportive = func.sum(case((LegislatorScore.score >= 50, 1), else_=0)).label(
|
|
"supportive"
|
|
)
|
|
opposed = func.sum(case((LegislatorScore.score < 50, 1), else_=0)).label("opposed")
|
|
|
|
stmt = (
|
|
select(
|
|
Legislator.id,
|
|
Legislator.official_full_name,
|
|
Legislator.last_name,
|
|
Legislator.current_party,
|
|
Legislator.current_state,
|
|
Legislator.current_chamber,
|
|
average_score,
|
|
supportive,
|
|
opposed,
|
|
)
|
|
.join(LegislatorScore, LegislatorScore.legislator_id == Legislator.id)
|
|
.where(_score_topic_match_condition(issues))
|
|
.group_by(
|
|
Legislator.id,
|
|
Legislator.official_full_name,
|
|
Legislator.last_name,
|
|
Legislator.current_party,
|
|
Legislator.current_state,
|
|
Legislator.current_chamber,
|
|
)
|
|
)
|
|
if chamber != "all":
|
|
stmt = stmt.where(Legislator.current_chamber == _db_chamber(chamber))
|
|
return stmt
|
|
|
|
|
|
def get_rankings(
|
|
session: Session,
|
|
*,
|
|
issues: list[str],
|
|
chamber: Chamber,
|
|
congress: int,
|
|
limit: int = 10,
|
|
) -> RankingResult:
|
|
"""Return top supportive and opposed legislators from precomputed scores."""
|
|
rows = [
|
|
_ranking_row(row)
|
|
for row in session.execute(
|
|
ranking_query(issues=issues, chamber=chamber, congress=congress)
|
|
)
|
|
]
|
|
scored = [row for row in rows if row.score is not None]
|
|
supportive = sorted(
|
|
scored, key=lambda row: (-float(row.score), -row.total, row.display_name)
|
|
)[:limit]
|
|
opposed = sorted(
|
|
scored, key=lambda row: (float(row.score), -row.total, row.display_name)
|
|
)[:limit]
|
|
return RankingResult(supportive=supportive, opposed=opposed)
|
|
|
|
|
|
def get_score_history(
|
|
session: Session,
|
|
*,
|
|
issues: list[str],
|
|
chamber: Chamber,
|
|
congress: int,
|
|
legislator_ids: list[int],
|
|
) -> list[ChartSeries]:
|
|
"""Return yearly score history from precomputed scores."""
|
|
if not legislator_ids:
|
|
return []
|
|
|
|
average_score = func.avg(LegislatorScore.score).label("score")
|
|
stmt = (
|
|
select(
|
|
Legislator.id,
|
|
Legislator.official_full_name,
|
|
Legislator.last_name,
|
|
Legislator.current_party,
|
|
Legislator.current_state,
|
|
LegislatorScore.year,
|
|
average_score,
|
|
)
|
|
.join(LegislatorScore, LegislatorScore.legislator_id == Legislator.id)
|
|
.where(
|
|
Legislator.id.in_(legislator_ids),
|
|
_score_topic_match_condition(issues),
|
|
)
|
|
.group_by(
|
|
Legislator.id,
|
|
Legislator.official_full_name,
|
|
Legislator.last_name,
|
|
Legislator.current_party,
|
|
Legislator.current_state,
|
|
LegislatorScore.year,
|
|
)
|
|
.order_by(Legislator.id, LegislatorScore.year)
|
|
)
|
|
if chamber != "all":
|
|
stmt = stmt.where(Legislator.current_chamber == _db_chamber(chamber))
|
|
|
|
by_legislator: dict[int, ChartSeries] = {}
|
|
for row in session.execute(stmt):
|
|
if row.score is None:
|
|
continue
|
|
series = by_legislator.setdefault(
|
|
row.id,
|
|
ChartSeries(
|
|
legislator_id=row.id,
|
|
label=_display_name(row.official_full_name, row.last_name),
|
|
party=row.current_party,
|
|
state=row.current_state,
|
|
points=[],
|
|
),
|
|
)
|
|
series.points.append(TimePoint(year=int(row.year), score=float(row.score)))
|
|
return list(by_legislator.values())
|
|
|
|
|
|
def _ranking_row(row: object) -> RankingRow:
|
|
return RankingRow(
|
|
legislator_id=row.id,
|
|
display_name=_display_name(row.official_full_name, row.last_name),
|
|
party=row.current_party,
|
|
state=row.current_state,
|
|
chamber=row.current_chamber,
|
|
score=float(row.score) if row.score is not None else None,
|
|
supportive=row.supportive or 0,
|
|
opposed=row.opposed or 0,
|
|
)
|
|
|
|
|
|
def _score_topic_match_condition(
|
|
issues: list[str] | tuple[str, ...],
|
|
) -> ColumnElement[bool]:
|
|
normalized = normalize_issues(list(issues))
|
|
if not normalized:
|
|
return false()
|
|
return or_(*(LegislatorScore.topic.ilike(f"%{issue}%") for issue in normalized))
|
|
|
|
|
|
def search_legislators(
|
|
session: Session,
|
|
*,
|
|
query: str | None = None,
|
|
limit: int = 12,
|
|
offset: int = 0,
|
|
) -> list[LegislatorOption]:
|
|
"""Search ingested legislators, preferring those with computed scores."""
|
|
return [
|
|
_legislator_option(row)
|
|
for row in session.execute(
|
|
legislator_search_query(query=query, limit=limit, offset=offset)
|
|
)
|
|
]
|
|
|
|
|
|
def count_legislators(session: Session, *, query: str | None = None) -> int:
|
|
"""Return the total number of legislators matching a search query."""
|
|
return int(
|
|
session.scalar(
|
|
select(func.count(Legislator.id)).where(_legislator_search_condition(query))
|
|
)
|
|
or 0
|
|
)
|
|
|
|
|
|
def get_legislator_options(
|
|
session: Session, legislator_ids: list[int]
|
|
) -> list[LegislatorOption]:
|
|
"""Return legislator options in the same order as the selected IDs."""
|
|
options = {
|
|
option.legislator_id: option
|
|
for option in (
|
|
_get_legislator_option(session, legislator_id)
|
|
for legislator_id in legislator_ids
|
|
)
|
|
if option is not None
|
|
}
|
|
return [
|
|
options[legislator_id]
|
|
for legislator_id in legislator_ids
|
|
if legislator_id in options
|
|
]
|
|
|
|
|
|
def legislator_search_query(
|
|
*,
|
|
query: str | None = None,
|
|
limit: int = 12,
|
|
offset: int = 0,
|
|
) -> Select:
|
|
"""Build the legislator search query used by profile and compare controls."""
|
|
score_count = func.count(LegislatorScore.id).label("score_count")
|
|
stmt = (
|
|
select(
|
|
Legislator.id,
|
|
Legislator.official_full_name,
|
|
Legislator.last_name,
|
|
Legislator.current_party,
|
|
Legislator.current_state,
|
|
Legislator.current_chamber,
|
|
score_count,
|
|
)
|
|
.outerjoin(LegislatorScore, LegislatorScore.legislator_id == Legislator.id)
|
|
.group_by(
|
|
Legislator.id,
|
|
Legislator.official_full_name,
|
|
Legislator.first_name,
|
|
Legislator.last_name,
|
|
Legislator.current_party,
|
|
Legislator.current_state,
|
|
Legislator.current_chamber,
|
|
Legislator.bioguide_id,
|
|
)
|
|
.order_by(desc("score_count"), Legislator.last_name, Legislator.first_name)
|
|
.limit(limit)
|
|
.offset(offset)
|
|
)
|
|
return stmt.where(_legislator_search_condition(query))
|
|
|
|
|
|
def _legislator_search_condition(query: str | None) -> ColumnElement[bool]:
|
|
cleaned_query = query.strip() if query else ""
|
|
if not cleaned_query:
|
|
return true()
|
|
|
|
pattern = f"%{cleaned_query}%"
|
|
state_alias = _state_alias(cleaned_query)
|
|
conditions: list[ColumnElement[bool]] = [
|
|
Legislator.official_full_name.ilike(pattern),
|
|
Legislator.first_name.ilike(pattern),
|
|
Legislator.last_name.ilike(pattern),
|
|
Legislator.current_state.ilike(pattern),
|
|
Legislator.bioguide_id.ilike(pattern),
|
|
]
|
|
if state_alias is not None:
|
|
conditions.append(Legislator.current_state == state_alias)
|
|
return or_(*conditions)
|
|
|
|
|
|
def _state_alias(query: str) -> str | None:
|
|
normalized = " ".join(query.lower().replace(".", "").split())
|
|
if len(normalized) == 2 and normalized.isalpha():
|
|
return normalized.upper()
|
|
return STATE_ALIASES.get(normalized)
|
|
|
|
|
|
def get_legislator_profile(
|
|
session: Session,
|
|
*,
|
|
legislator_id: int | None = None,
|
|
query: str | None = None,
|
|
) -> LegislatorProfile | None:
|
|
"""Return the selected legislator profile and top/bottom topic scores."""
|
|
selected = _get_legislator_option(session, legislator_id)
|
|
cleaned_query = query.strip() if query else ""
|
|
if selected is None and cleaned_query:
|
|
matches = search_legislators(session, query=query, limit=1)
|
|
selected = matches[0] if matches else None
|
|
if selected is None:
|
|
return None
|
|
|
|
topic_scores = get_legislator_topic_scores(
|
|
session, legislator_id=selected.legislator_id
|
|
)
|
|
top_topics = sorted(topic_scores, key=lambda item: (-item.score, item.topic))[:3]
|
|
bottom_topics = sorted(topic_scores, key=lambda item: (item.score, item.topic))[:3]
|
|
overall_score = session.scalar(
|
|
select(func.avg(LegislatorScore.score)).where(
|
|
LegislatorScore.legislator_id == selected.legislator_id
|
|
)
|
|
)
|
|
serving_since = session.scalar(
|
|
select(func.min(LegislatorScore.year)).where(
|
|
LegislatorScore.legislator_id == selected.legislator_id
|
|
)
|
|
)
|
|
return LegislatorProfile(
|
|
legislator=selected,
|
|
overall_score=float(overall_score) if overall_score is not None else None,
|
|
serving_since=int(serving_since) if serving_since is not None else None,
|
|
top_topics=top_topics,
|
|
bottom_topics=bottom_topics,
|
|
)
|
|
|
|
|
|
def get_legislator_topic_scores(
|
|
session: Session,
|
|
*,
|
|
legislator_id: int,
|
|
) -> list[TopicScore]:
|
|
"""Return all average topic scores for one legislator."""
|
|
rows = session.execute(
|
|
select(
|
|
LegislatorScore.topic,
|
|
func.avg(LegislatorScore.score).label("score"),
|
|
func.count(LegislatorScore.id).label("count"),
|
|
)
|
|
.where(LegislatorScore.legislator_id == legislator_id)
|
|
.group_by(LegislatorScore.topic)
|
|
.order_by(LegislatorScore.topic)
|
|
)
|
|
return [
|
|
TopicScore(topic=row.topic, score=float(row.score), count=row.count)
|
|
for row in rows
|
|
if row.score is not None
|
|
]
|
|
|
|
|
|
def get_single_legislator_history(
|
|
session: Session,
|
|
*,
|
|
legislator_id: int,
|
|
topic: str,
|
|
) -> list[ChartSeries]:
|
|
"""Return score history for one legislator/topic pair."""
|
|
option = _get_legislator_option(session, legislator_id)
|
|
if option is None:
|
|
return []
|
|
|
|
rows = session.execute(
|
|
select(
|
|
LegislatorScore.year,
|
|
func.avg(LegislatorScore.score).label("score"),
|
|
)
|
|
.where(
|
|
LegislatorScore.legislator_id == legislator_id,
|
|
LegislatorScore.topic == topic,
|
|
)
|
|
.group_by(LegislatorScore.year)
|
|
.order_by(LegislatorScore.year)
|
|
)
|
|
points = [
|
|
TimePoint(year=int(row.year), score=float(row.score))
|
|
for row in rows
|
|
if row.score is not None
|
|
]
|
|
return [
|
|
ChartSeries(
|
|
legislator_id=option.legislator_id,
|
|
label=option.display_name,
|
|
party=option.party,
|
|
state=option.state,
|
|
points=points,
|
|
)
|
|
]
|
|
|
|
|
|
def get_compare_defaults(session: Session) -> tuple[list[int], list[str]]:
|
|
"""Return default compare legislators and topics."""
|
|
legislators = search_legislators(session, limit=3)
|
|
topics = issue_suggestions(session, congress=None, limit=6)
|
|
return [item.legislator_id for item in legislators], topics
|
|
|
|
|
|
def get_compare_radar_series(
|
|
session: Session,
|
|
*,
|
|
legislator_ids: list[int],
|
|
topics: list[str],
|
|
) -> list[RadarSeries]:
|
|
"""Return radar chart scores for selected legislators and topics."""
|
|
if not legislator_ids:
|
|
return []
|
|
|
|
options = {
|
|
option.legislator_id: option
|
|
for option in (
|
|
_get_legislator_option(session, legislator_id)
|
|
for legislator_id in legislator_ids
|
|
)
|
|
if option is not None
|
|
}
|
|
if not options:
|
|
return []
|
|
|
|
scores: dict[int, dict[str, float]] = {
|
|
legislator_id: {} for legislator_id in options
|
|
}
|
|
if topics:
|
|
rows = session.execute(
|
|
select(
|
|
LegislatorScore.legislator_id,
|
|
LegislatorScore.topic,
|
|
func.avg(LegislatorScore.score).label("score"),
|
|
)
|
|
.where(
|
|
LegislatorScore.legislator_id.in_(list(options)),
|
|
LegislatorScore.topic.in_(topics),
|
|
)
|
|
.group_by(LegislatorScore.legislator_id, LegislatorScore.topic)
|
|
)
|
|
for row in rows:
|
|
scores[row.legislator_id][row.topic] = float(row.score)
|
|
|
|
series: list[RadarSeries] = []
|
|
for legislator_id in legislator_ids:
|
|
option = options.get(legislator_id)
|
|
if option is None:
|
|
continue
|
|
topic_scores = scores.get(legislator_id, {})
|
|
values = list(topic_scores.values())
|
|
series.append(
|
|
RadarSeries(
|
|
legislator=option,
|
|
average_score=sum(values) / len(values) if values else None,
|
|
scores_by_topic=topic_scores,
|
|
)
|
|
)
|
|
return series
|
|
|
|
|
|
def _display_name(official_full_name: str | None, last_name: str | None) -> str:
|
|
if official_full_name:
|
|
parts = official_full_name.split()
|
|
if len(parts) > 1:
|
|
return f"{parts[-1]}, {' '.join(parts[:-1])}"
|
|
return official_full_name
|
|
return last_name or "Unknown"
|
|
|
|
|
|
def _legislator_option(row: object) -> LegislatorOption:
|
|
return LegislatorOption(
|
|
legislator_id=row.id,
|
|
display_name=_display_name(row.official_full_name, row.last_name),
|
|
party=row.current_party,
|
|
state=row.current_state,
|
|
chamber=row.current_chamber,
|
|
)
|
|
|
|
|
|
def _get_legislator_option(
|
|
session: Session, legislator_id: int | None
|
|
) -> LegislatorOption | None:
|
|
if legislator_id is None:
|
|
return None
|
|
row = session.execute(
|
|
select(
|
|
Legislator.id,
|
|
Legislator.official_full_name,
|
|
Legislator.last_name,
|
|
Legislator.current_party,
|
|
Legislator.current_state,
|
|
Legislator.current_chamber,
|
|
).where(Legislator.id == legislator_id)
|
|
).first()
|
|
return _legislator_option(row) if row is not None else None
|
|
|
|
|
|
def _db_chamber(chamber: Chamber) -> str:
|
|
return {"house": "House", "senate": "Senate", "all": "all"}[chamber]
|