From d0e865ffbd5a0cd6d707c664aee503ac383bca49 Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Tue, 28 Apr 2026 23:01:38 -0400
Subject: [PATCH 1/4] added congress_vote_context.py ingest_congress.py
 ingest_posts.py to jobs dir

---
 pipelines/jobs/calculate_legislator_scores.py |    4 +-
 pipelines/jobs/congress_vote_context.py       | 1984 +++++++++++++++++
 pipelines/jobs/ingest_congress.py             | 1084 +++++++++
 pipelines/jobs/ingest_posts.py                |  281 +++
 4 files changed, 3351 insertions(+), 2 deletions(-)
 create mode 100644 pipelines/jobs/congress_vote_context.py
 create mode 100644 pipelines/jobs/ingest_congress.py
 create mode 100644 pipelines/jobs/ingest_posts.py

diff --git a/pipelines/jobs/calculate_legislator_scores.py b/pipelines/jobs/calculate_legislator_scores.py
index 2a31361..37c5348 100644
--- a/pipelines/jobs/calculate_legislator_scores.py
+++ b/pipelines/jobs/calculate_legislator_scores.py
@@ -23,7 +23,7 @@ from sqlalchemy import (
 )
 from sqlalchemy.orm import Session
 
-from pipelines.congress_vote_context import create_score_run, finalize_score_run
+from pipelines.jobs.congress_vote_context import create_score_run, finalize_score_run
 from pipelines.orm.common import get_postgres_engine
 from pipelines.orm.data_science_dev.congress import (
     BillTopic,
@@ -39,7 +39,7 @@ from pipelines.orm.data_science_dev.congress import (
     VoteRelationship,
     VoteRecord,
 )
-from pipelines.pipelines.jobs.extract_bill_topics import normalize_topic_label
+from pipelines.jobs.extract_bill_topics import normalize_topic_label
 from pipelines.web.scoring import (
     OPPOSE_POSITIONS,
     SUPPORT_POSITIONS,
diff --git a/pipelines/jobs/congress_vote_context.py b/pipelines/jobs/congress_vote_context.py
new file mode 100644
index 0000000..c0a12d8
--- /dev/null
+++ b/pipelines/jobs/congress_vote_context.py
@@ -0,0 +1,1984 @@
+"""Offline canonical vote-context parsing, matching, classification, and scoring helpers."""
+
+from __future__ import annotations
+
+import hashlib
+import logging
+import re
+import subprocess
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass
+from datetime import UTC, date, datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+from zoneinfo import ZoneInfo
+
+from sqlalchemy import delete, select
+from sqlalchemy.orm import Session, joinedload, selectinload
+
+from pipelines.parallelize import parallelize_thread
+from pipelines.orm.data_science_dev.congress import (
+    Amendment,
+    AmendmentAction,
+    AmendmentActionRecordedVote,
+    Bill,
+    BillAction,
+    BillActionRecordedVote,
+    BillRelation,
+    BillText,
+    ClassificationMethod,
+    ConfidenceLevel,
+    IngestRun,
+    MeasureFunction,
+    MeasureSubtype,
+    ScoreRun,
+    SourceArtifact,
+    SubjectType,
+    TextResolutionMethod,
+    TextTargetBasis,
+    TextTargetType,
+    Vote,
+    VoteActionMatch,
+    VoteActionScope,
+    VoteClassification,
+    VoteContextAudit,
+    VoteEffect,
+    VoteMeasureLink,
+    VoteMeasureRole,
+    VotePositionMeaning,
+    VoteRelationship,
+    VoteTextTarget,
+)
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable, Sequence
+
+logger = logging.getLogger(__name__)
+
+CLASSIFICATION_VERSION = "canonical_vote_context_v3"
+SCORING_VERSION = "canonical_vote_scores_v3"
+EASTERN_TIMEZONE = ZoneInfo("America/New_York")
+OFFLINE_BILLSTATUS_MIN_CONGRESS = 108
+PARALLEL_FILE_CHUNK_SIZE = 500
+PARALLEL_PROGRESS_TRACKER = 250
+
+DIRECT_TEXT_PATTERNS = (
+    "on passage",
+    "on passage of the bill",
+    "on the bill",
+    "on the joint resolution",
+    "on agreeing to the resolution",
+    "on motion to suspend the rules and pass",
+    "on motion to suspend the rules and agree",
+    "on motion to suspend the rules and concur",
+    "on agreeing to the conference report",
+    "on motion to concur",
+    "passed house",
+    "passed senate",
+)
+AMENDMENT_DIRECT_PATTERNS = (
+    "on the amendment",
+    "on agreeing to the amendment",
+    "agreeing to the amendment",
+)
+PROCEDURAL_PATTERNS = (
+    "cloture",
+    "motion to proceed",
+    "motion to recommit",
+    "motion to reconsider",
+    "motion to table",
+    "previous question",
+    "ordering the previous question",
+    "rule",
+)
+NON_LEGISLATIVE_PATTERNS = (
+    "nomination",
+    "treaty",
+    "speaker",
+    "quorum",
+    "journal",
+    "adjourn",
+)
+SPECIAL_RULE_PATTERNS = (
+    "providing for consideration of",
+    "providing for the consideration of",
+)
+MEASURE_REF_RE = re.compile(
+    r"\b(?P<type>H\.?\s*R\.|S\.|H\.?\s*J\.?\s*Res\.|S\.?\s*J\.?\s*Res\."
+    r"|H\.?\s*Con\.?\s*Res\.|S\.?\s*Con\.?\s*Res\.|H\.?\s*Res\.|S\.?\s*Res\.)"
+    r"\s*(?P<number>\d+)\b",
+    flags=re.IGNORECASE,
+)
+
+
+@dataclass(frozen=True)
+class ParsedRecordedVote:
+    congress: int
+    chamber: str
+    session_number: int
+    roll_number: int
+    vote_datetime: datetime | None
+    vote_url: str | None
+
+
+@dataclass(frozen=True)
+class ParsedAction:
+    sequence: int
+    action_date: date
+    action_time: str | None
+    action_text: str
+    action_type: str | None
+    action_code: str | None
+    source_system_code: str | None
+    source_system_name: str | None
+    recorded_votes: tuple[ParsedRecordedVote, ...]
+
+
+@dataclass(frozen=True)
+class ParsedBillRelation:
+    related_key: tuple[int, str, int]
+    relationship_type: str
+    identified_by: str | None
+    latest_action_date: date | None
+    latest_action_text: str | None
+
+
+@dataclass(frozen=True)
+class ParsedTextVersion:
+    version_code: str
+    version_name: str | None
+    version_date: date | None
+    source_datetime_raw: str | None
+    text_url_xml: str | None
+    text_url_pdf: str | None
+    text_url_html: str | None
+
+
+@dataclass(frozen=True)
+class ParsedBillStatus:
+    bill_key: tuple[int, str, int]
+    actions: tuple[ParsedAction, ...]
+    relations: tuple[ParsedBillRelation, ...]
+    text_versions: tuple[ParsedTextVersion, ...]
+
+
+@dataclass(frozen=True)
+class ActionCandidate:
+    scope: VoteActionScope
+    bill_action: BillAction | None
+    amendment_action: AmendmentAction | None
+    score: int
+    match_method: str
+    match_reason: str
+    match_confidence: ConfidenceLevel
+
+    @property
+    def selected_action_text(self) -> str:
+        if self.bill_action is not None:
+            return self.bill_action.action_text
+        if self.amendment_action is not None:
+            return self.amendment_action.action_text
+        return ""
+
+
+def _chunked[T](items: Sequence[T], chunk_size: int) -> Iterable[Sequence[T]]:
+    """Yield fixed-size slices from a sequence."""
+    for start in range(0, len(items), chunk_size):
+        yield items[start : start + chunk_size]
+
+
+def get_git_sha(repo_root: Path | None = None) -> str | None:
+    """Best-effort current git SHA for audit/run metadata."""
+    try:
+        completed = subprocess.run(
+            ["git", "rev-parse", "HEAD"],
+            cwd=repo_root,
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+    except (OSError, subprocess.SubprocessError):
+        return None
+    return completed.stdout.strip() or None
+
+
+def start_ingest_run(
+    session: Session,
+    *,
+    source_snapshot_label: str,
+    repo_root: Path | None = None,
+) -> IngestRun:
+    """Create and persist an ingest run row."""
+    ingest_run = IngestRun(
+        started_at=datetime.now(UTC),
+        git_sha=get_git_sha(repo_root),
+        classifier_version=CLASSIFICATION_VERSION,
+        source_snapshot_label=source_snapshot_label,
+        status="running",
+    )
+    session.add(ingest_run)
+    session.commit()
+    return ingest_run
+
+
+def finish_ingest_run(
+    session: Session,
+    ingest_run_id: int,
+    *,
+    status: str,
+) -> None:
+    """Mark an ingest run complete."""
+    ingest_run = session.get(IngestRun, ingest_run_id)
+    if ingest_run is None:
+        return
+    ingest_run.completed_at = datetime.now(UTC)
+    ingest_run.status = status
+    session.commit()
+
+
+def register_source_artifact(
+    session: Session,
+    *,
+    path: Path,
+    source_kind: str,
+    congress: int,
+    chamber: str | None,
+    ingest_run_id: int | None,
+    source_url: str | None = None,
+) -> SourceArtifact:
+    """Track the exact local file used for a parsed object."""
+    payload = path.read_bytes()
+    sha256 = hashlib.sha256(payload).hexdigest()
+    modified_at = datetime.fromtimestamp(path.stat().st_mtime, tz=UTC)
+    artifact = session.scalar(
+        select(SourceArtifact).where(
+            SourceArtifact.local_path == str(path),
+            SourceArtifact.sha256 == sha256,
+            SourceArtifact.ingest_run_id == ingest_run_id,
+        )
+    )
+    if artifact is not None:
+        return artifact
+
+    artifact = SourceArtifact(
+        source_kind=source_kind,
+        congress=congress,
+        chamber=chamber,
+        local_path=str(path),
+        source_url=source_url,
+        sha256=sha256,
+        byte_size=len(payload),
+        modified_at=modified_at,
+        ingested_at=datetime.now(UTC),
+        ingest_run_id=ingest_run_id,
+    )
+    session.add(artifact)
+    session.flush()
+    return artifact
+
+
+def derive_session_number(congress: int, session_year: int) -> int:
+    """Convert a calendar session year into congressional session number 1 or 2."""
+    congress_start_year = ((congress - 1) * 2) + 1789
+    return session_year - congress_start_year + 1
+
+
+def normalize_chamber(raw: str | None) -> str | None:
+    """Normalize source-specific chamber labels."""
+    if raw is None:
+        return None
+    value = raw.strip().lower()
+    mapping = {
+        "h": "House",
+        "house": "House",
+        "s": "Senate",
+        "senate": "Senate",
+    }
+    return mapping.get(value, raw)
+
+
+def parse_date_like(value: Any) -> date | None:
+    """Parse a date-like string into a date object."""
+    if value is None:
+        return None
+    if isinstance(value, date) and not isinstance(value, datetime):
+        return value
+    text = str(value).strip()
+    if not text:
+        return None
+    try:
+        return date.fromisoformat(text[:10])
+    except ValueError:
+        return None
+
+
+def parse_datetime_like(
+    value: Any,
+    *,
+    fallback_time: str | None = None,
+) -> datetime | None:
+    """Parse ISO-ish datetime strings from local vote and billstatus sources."""
+    if value is None:
+        return None
+    if isinstance(value, datetime):
+        return value.astimezone(UTC) if value.tzinfo else value.replace(tzinfo=UTC)
+    text = str(value).strip()
+    if not text:
+        return None
+
+    normalized = text.replace("Z", "+00:00")
+    try:
+        parsed = datetime.fromisoformat(normalized)
+    except ValueError:
+        if fallback_time:
+            fallback = f"{text[:10]}T{fallback_time}"
+            try:
+                parsed = datetime.fromisoformat(fallback)
+            except ValueError:
+                return None
+        else:
+            try:
+                parsed = datetime.fromisoformat(f"{text[:10]}T00:00:00")
+            except ValueError:
+                return None
+
+    if parsed.tzinfo is None:
+        return parsed.replace(tzinfo=UTC)
+    return parsed.astimezone(UTC)
+
+
+def legislative_date_for_comparison(
+    *,
+    vote_datetime: datetime | None,
+    fallback_date: date | None,
+) -> date | None:
+    """Compare by chamber-local legislative day when a datetime is available."""
+    if vote_datetime is not None:
+        return vote_datetime.astimezone(EASTERN_TIMEZONE).date()
+    return fallback_date
+
+
+def measure_subtype_for_bill_type(bill_type: str | None) -> MeasureSubtype | None:
+    """Map repository bill types to normalized measure subtypes."""
+    if bill_type is None:
+        return None
+    normalized = bill_type.strip().lower()
+    if normalized in {"hr", "s"}:
+        return MeasureSubtype.BILL
+    if normalized in {"hjres", "sjres"}:
+        return MeasureSubtype.JOINT_RESOLUTION
+    if normalized in {"hconres", "sconres"}:
+        return MeasureSubtype.CONCURRENT_RESOLUTION
+    if normalized in {"hres", "sres"}:
+        return MeasureSubtype.SIMPLE_RESOLUTION
+    return None
+
+
+def measure_type_value(bill_type: str | None) -> str | None:
+    """Expose the measure type as a stable, upper-case code."""
+    if bill_type is None:
+        return None
+    return bill_type.upper()
+
+
+def is_house_origin_measure(bill: Bill) -> bool:
+    """Return True when a bill/resolution originates in the House."""
+    return bill.bill_type.lower().startswith("h")
+
+
+def is_senate_origin_measure(bill: Bill) -> bool:
+    """Return True when a bill/resolution originates in the Senate."""
+    return bill.bill_type.lower().startswith("s")
+
+
+def normalized_text(*parts: str | None) -> str:
+    """Normalize action/question/title text for semantic matching."""
+    joined = " ".join(part for part in parts if part)
+    return " ".join(joined.casefold().split())
+
+
+def has_amendment_signal(*parts: str | None, raw_amendment_ref: dict | None = None) -> bool:
+    """Heuristic for amendment-related votes."""
+    if raw_amendment_ref:
+        return True
+    text = normalized_text(*parts)
+    return "amendment" in text
+
+
+def is_non_legislative_question(*parts: str | None) -> bool:
+    """Classify votes that are not about legislation or legislative text."""
+    text = normalized_text(*parts)
+    return any(pattern in text for pattern in NON_LEGISLATIVE_PATTERNS)
+
+
+def is_procedural_question(*parts: str | None) -> bool:
+    """Heuristic for procedural motions related to legislation."""
+    text = normalized_text(*parts)
+    return any(pattern in text for pattern in PROCEDURAL_PATTERNS)
+
+
+def is_direct_measure_text_question(*parts: str | None) -> bool:
+    """Heuristic for direct measure/resolution text votes."""
+    text = normalized_text(*parts)
+    if any(pattern in text for pattern in DIRECT_TEXT_PATTERNS):
+        return True
+    return (
+        "agreeing to the resolution" in text
+        or "suspend the rules" in text
+        or "conference report" in text
+        or "passed house" in text
+        or "passed senate" in text
+    )
+
+
+def is_direct_amendment_text_question(*parts: str | None) -> bool:
+    """Heuristic for direct amendment adoption votes."""
+    text = normalized_text(*parts)
+    if "motion to table the amendment" in text:
+        return False
+    return any(pattern in text for pattern in AMENDMENT_DIRECT_PATTERNS)
+
+
+def is_special_rule_measure(
+    *,
+    bill: Bill | None,
+    question: str | None,
+    action_text: str | None,
+) -> bool:
+    """Detect House special rules that govern another measure."""
+    if bill is None or measure_subtype_for_bill_type(bill.bill_type) is not MeasureSubtype.SIMPLE_RESOLUTION:
+        return False
+    text = normalized_text(
+        question,
+        action_text,
+        bill.title,
+        bill.title_short,
+        bill.official_title,
+    )
+    return any(pattern in text for pattern in SPECIAL_RULE_PATTERNS)
+
+
+def measure_function_for_vote(
+    *,
+    bill: Bill | None,
+    question: str | None,
+    action_text: str | None,
+) -> MeasureFunction | None:
+    """Semantic function of the voted-on measure."""
+    subtype = measure_subtype_for_bill_type(bill.bill_type if bill else None)
+    if subtype is None:
+        return None
+    if is_special_rule_measure(bill=bill, question=question, action_text=action_text):
+        return MeasureFunction.SPECIAL_RULE
+
+    text = normalized_text(
+        question,
+        action_text,
+        bill.title if bill else None,
+        bill.title_short if bill else None,
+        bill.official_title if bill else None,
+    )
+    if "budget resolution" in text:
+        return MeasureFunction.BUDGET_RESOLUTION
+    if subtype is MeasureSubtype.SIMPLE_RESOLUTION and (
+        "rules of the house" in text
+        or "electing the speaker" in text
+        or "authorizing the speaker" in text
+    ):
+        return MeasureFunction.CHAMBER_INTERNAL
+    if "sense of" in text or "commemorat" in text or "congratulating" in text:
+        return MeasureFunction.COMMEMORATIVE_OR_SENSE_OF
+    return MeasureFunction.SUBSTANTIVE_MEASURE
+
+
+def parse_measure_references(
+    text: str | None,
+    *,
+    congress: int,
+) -> list[tuple[int, str, int]]:
+    """Extract referenced measures from question/title/action text."""
+    if not text:
+        return []
+
+    refs: list[tuple[int, str, int]] = []
+    for match in MEASURE_REF_RE.finditer(text):
+        raw_type = match.group("type").casefold()
+        number = int(match.group("number"))
+        normalized_type = (
+            raw_type.replace(".", "")
+            .replace(" ", "")
+            .replace("conres", "conres")
+            .replace("jres", "jres")
+            .replace("res", "res")
+        )
+        normalized_type = normalized_type.replace("hr", "hr", 1)
+        if normalized_type == "s":
+            bill_type = "s"
+        elif normalized_type == "hr":
+            bill_type = "hr"
+        else:
+            bill_type = normalized_type
+        refs.append((congress, bill_type, number))
+    return refs
+
+
+def require_billstatus_artifacts(congress_dirs: Sequence[Path]) -> None:
+    """Fail fast when canonical offline context files are unavailable."""
+    missing = [
+        congress_dir
+        for congress_dir in congress_dirs
+        if not any((congress_dir / "bills").rglob("fdsys_billstatus.xml"))
+    ]
+    if missing:
+        congress_list = ", ".join(path.name for path in missing)
+        msg = (
+            "Canonical offline vote-context resolution requires local BILLSTATUS "
+            f"artifacts. Missing fdsys_billstatus.xml under congress directories: {congress_list}"
+        )
+        raise RuntimeError(msg)
+
+
+def filter_context_supported_congress_dirs(
+    congress_dirs: Sequence[Path],
+) -> list[Path]:
+    """Return only congress directories supported by offline BILLSTATUS coverage."""
+    supported: list[Path] = []
+    skipped: list[Path] = []
+    for congress_dir in congress_dirs:
+        congress_number = int(congress_dir.name)
+        if congress_number < OFFLINE_BILLSTATUS_MIN_CONGRESS:
+            skipped.append(congress_dir)
+        else:
+            supported.append(congress_dir)
+
+    if skipped:
+        logger.info(
+            "Skipping canonical vote-context steps for pre-%sth Congress directories: %s",
+            OFFLINE_BILLSTATUS_MIN_CONGRESS,
+            ", ".join(path.name for path in skipped),
+        )
+    return supported
+
+
+def _xml_local_name(tag: str) -> str:
+    return tag.rsplit("}", 1)[-1]
+
+
+def _xml_text(element: ET.Element | None, *names: str) -> str | None:
+    if element is None:
+        return None
+    for descendant in element.iter():
+        if _xml_local_name(descendant.tag) in names:
+            text = descendant.text.strip() if descendant.text else None
+            if text:
+                return text
+    return None
+
+
+def _xml_direct_children(element: ET.Element, *names: str) -> list[ET.Element]:
+    return [child for child in list(element) if _xml_local_name(child.tag) in names]
+
+
+def _xml_direct_child(element: ET.Element, *names: str) -> ET.Element | None:
+    for child in list(element):
+        if _xml_local_name(child.tag) in names:
+            return child
+    return None
+
+
+def parse_billstatus_file(path: Path) -> ParsedBillStatus | None:
+    """Parse the official Bill Status XML needed for actions, relations, and text versions."""
+    try:
+        root = ET.fromstring(path.read_bytes())
+    except ET.ParseError:
+        logger.exception("Failed to parse bill status XML: %s", path)
+        return None
+
+    bill_node = _xml_direct_child(root, "bill")
+    if bill_node is None:
+        bill_node = root
+
+    congress_text = _xml_text(bill_node, "congress")
+    bill_type_text = _xml_text(bill_node, "billType", "bill-type", "type")
+    bill_number_text = _xml_text(bill_node, "billNumber", "bill-number", "number")
+    if not congress_text or not bill_type_text or not bill_number_text:
+        return None
+
+    bill_key = (int(congress_text), bill_type_text.strip().lower(), int(bill_number_text))
+
+    actions_parent = _xml_direct_child(bill_node, "actions")
+    actions: list[ParsedAction] = []
+    if actions_parent is not None:
+        for index, item in enumerate(_xml_direct_children(actions_parent, "item", "action"), start=1):
+            action_date = parse_date_like(_xml_text(item, "actionDate", "actedAt", "action-date", "acted_at"))
+            action_text = _xml_text(item, "text") or ""
+            if action_date is None or not action_text:
+                continue
+
+            source_system = _xml_direct_child(item, "sourceSystem")
+            recorded_votes_parent = _xml_direct_child(item, "recordedVotes")
+            recorded_votes: list[ParsedRecordedVote] = []
+            if recorded_votes_parent is not None:
+                for vote_item in _xml_direct_children(recorded_votes_parent, "recordedVote", "item"):
+                    roll_number = _xml_text(vote_item, "rollNumber", "roll-number")
+                    chamber = normalize_chamber(_xml_text(vote_item, "chamber"))
+                    congress = _xml_text(vote_item, "congress")
+                    session_number = _xml_text(vote_item, "sessionNumber", "session-number")
+                    if not roll_number or chamber is None or not congress or not session_number:
+                        continue
+                    recorded_votes.append(
+                        ParsedRecordedVote(
+                            congress=int(congress),
+                            chamber=chamber,
+                            session_number=int(session_number),
+                            roll_number=int(roll_number),
+                            vote_datetime=parse_datetime_like(_xml_text(vote_item, "date")),
+                            vote_url=_xml_text(vote_item, "url"),
+                        )
+                    )
+
+            actions.append(
+                ParsedAction(
+                    sequence=index,
+                    action_date=action_date,
+                    action_time=_xml_text(item, "actionTime", "action-time"),
+                    action_text=action_text,
+                    action_type=_xml_text(item, "type"),
+                    action_code=_xml_text(item, "actionCode", "action-code"),
+                    source_system_code=_xml_text(source_system, "code"),
+                    source_system_name=_xml_text(source_system, "name"),
+                    recorded_votes=tuple(recorded_votes),
+                )
+            )
+
+    relations: list[ParsedBillRelation] = []
+    related_parent = _xml_direct_child(bill_node, "relatedBills", "relatedBillDetails")
+    if related_parent is not None:
+        for item in _xml_direct_children(related_parent, "item", "relatedBill", "relatedBillDetail"):
+            relation_congress = _xml_text(item, "congress")
+            relation_type = _xml_text(item, "type", "billType")
+            relation_number = _xml_text(item, "number", "billNumber")
+            if not relation_congress or not relation_type or not relation_number:
+                continue
+            relationship_details = _xml_direct_child(item, "relationshipDetails")
+            relationship_item = (
+                _xml_direct_child(relationship_details, "item")
+                if relationship_details is not None
+                else None
+            )
+            relations.append(
+                ParsedBillRelation(
+                    related_key=(
+                        int(relation_congress),
+                        relation_type.strip().lower(),
+                        int(relation_number),
+                    ),
+                    relationship_type=(
+                        _xml_text(
+                            relationship_item,
+                            "relationshipType",
+                            "relationship-type",
+                            "typeOfRelationship",
+                            "type",
+                        )
+                        or _xml_text(
+                            item,
+                            "relationshipType",
+                            "relationship-type",
+                            "typeOfRelationship",
+                        )
+                        or "related"
+                    ),
+                    identified_by=_xml_text(
+                        relationship_item,
+                        "identifiedBy",
+                        "identified-by",
+                    )
+                    or _xml_text(item, "identifiedBy", "identified-by"),
+                    latest_action_date=parse_date_like(_xml_text(item, "latestActionDate", "latest-action-date")),
+                    latest_action_text=_xml_text(item, "latestActionText", "latest-action-text", "latestAction"),
+                )
+            )
+
+    text_versions: list[ParsedTextVersion] = []
+    titles_parent = _xml_direct_child(bill_node, "titles")
+    title_version_name_to_code: dict[str, str] = {}
+    if titles_parent is not None:
+        for item in _xml_direct_children(titles_parent, "item", "title"):
+            version_name = _xml_text(item, "billTextVersionName")
+            version_code = _xml_text(item, "billTextVersionCode")
+            if version_name and version_code:
+                title_version_name_to_code.setdefault(
+                    normalized_text(version_name),
+                    version_code.lower(),
+                )
+
+    text_versions_parent = _xml_direct_child(bill_node, "textVersions")
+    if text_versions_parent is not None:
+        for item in _xml_direct_children(text_versions_parent, "item", "textVersion"):
+            version_name = _xml_text(item, "type", "versionName")
+            version_code = _xml_text(item, "billTextVersionCode", "versionCode", "typeCode")
+            if version_code is None and version_name is not None:
+                version_code = title_version_name_to_code.get(normalized_text(version_name))
+            raw_date = _xml_text(item, "date")
+            if not version_code and not version_name:
+                continue
+            formats_parent = _xml_direct_child(item, "formats")
+            xml_url = None
+            pdf_url = None
+            html_url = None
+            if formats_parent is not None:
+                for format_item in _xml_direct_children(formats_parent, "item", "format"):
+                    format_type = normalized_text(_xml_text(format_item, "type"), _xml_text(format_item, "name"))
+                    url = _xml_text(format_item, "url")
+                    if not url:
+                        continue
+                    if "xml" in format_type:
+                        xml_url = url
+                    elif "pdf" in format_type:
+                        pdf_url = url
+                    elif "html" in format_type or "formatted text" in format_type:
+                        html_url = url
+            text_versions.append(
+                ParsedTextVersion(
+                    version_code=(version_code or version_name or "").lower(),
+                    version_name=version_name,
+                    version_date=parse_date_like(raw_date),
+                    source_datetime_raw=raw_date,
+                    text_url_xml=xml_url,
+                    text_url_pdf=pdf_url,
+                    text_url_html=html_url,
+                )
+            )
+
+    return ParsedBillStatus(
+        bill_key=bill_key,
+        actions=tuple(actions),
+        relations=tuple(relations),
+        text_versions=tuple(text_versions),
+    )
+
+
+def _parse_billstatus_path(*, path: Path) -> ParsedBillStatus | None:
+    """Thread-friendly wrapper for billstatus XML parsing."""
+    return parse_billstatus_file(path)
+
+
+def _read_json_path(*, path: Path) -> dict[str, Any] | None:
+    """Thread-friendly wrapper for amendment JSON loading."""
+    return _read_json(path)
+
+
+def merge_billstatus_text_versions_for_bill(
+    *,
+    bill_id: int,
+    parsed_text_versions: Sequence[ParsedTextVersion],
+    source_artifact_id: int | None,
+    existing_bill_texts: dict[tuple[int, str], BillText],
+) -> list[BillText]:
+    """Create or enrich BillText rows from official billstatus metadata.
+
+    This fills metadata-only bill text rows when local text-versions artifacts do not exist,
+    which allows vote->text resolution to link to an official version even without local content.
+    """
+    created: list[BillText] = []
+    for version in parsed_text_versions:
+        version_code = version.version_code.lower()
+        key = (bill_id, version_code)
+        existing = existing_bill_texts.get(key)
+        if existing is None:
+            bill_text = BillText(
+                bill_id=bill_id,
+                version_code=version_code,
+                version_name=version.version_name,
+                text_content=None,
+                date=version.version_date,
+                source_datetime_raw=version.source_datetime_raw,
+                text_url_xml=version.text_url_xml,
+                text_url_pdf=version.text_url_pdf,
+                text_url_html=version.text_url_html,
+                source_artifact_id=source_artifact_id,
+            )
+            existing_bill_texts[key] = bill_text
+            created.append(bill_text)
+            continue
+
+        if existing.version_name is None and version.version_name is not None:
+            existing.version_name = version.version_name
+        if existing.date is None and version.version_date is not None:
+            existing.date = version.version_date
+        if existing.source_datetime_raw is None and version.source_datetime_raw is not None:
+            existing.source_datetime_raw = version.source_datetime_raw
+        if existing.text_url_xml is None and version.text_url_xml is not None:
+            existing.text_url_xml = version.text_url_xml
+        if existing.text_url_pdf is None and version.text_url_pdf is not None:
+            existing.text_url_pdf = version.text_url_pdf
+        if existing.text_url_html is None and version.text_url_html is not None:
+            existing.text_url_html = version.text_url_html
+        if existing.source_artifact_id is None and source_artifact_id is not None:
+            existing.source_artifact_id = source_artifact_id
+
+    return created
+
+
+def build_billstatus_text_version_index(
+    congress_dirs: Sequence[Path],
+) -> dict[tuple[int, str, int], dict[str, ParsedTextVersion]]:
+    """Index text-version metadata by bill key and version code."""
+    index: dict[tuple[int, str, int], dict[str, ParsedTextVersion]] = {}
+    for congress_dir in congress_dirs:
+        billstatus_paths = sorted((congress_dir / "bills").rglob("fdsys_billstatus.xml"))
+        for chunk in _chunked(billstatus_paths, PARALLEL_FILE_CHUNK_SIZE):
+            results = parallelize_thread(
+                _parse_billstatus_path,
+                [{"path": path} for path in chunk],
+                progress_tracker=PARALLEL_PROGRESS_TRACKER,
+            )
+            for parsed in results.results:
+                if parsed is None:
+                    continue
+                version_map = index.setdefault(parsed.bill_key, {})
+                for version in parsed.text_versions:
+                    version_map.setdefault(version.version_code.lower(), version)
+    return index
+
+
+def raw_bill_key_from_ref(
+    raw_bill_ref: dict[str, Any] | None,
+    *,
+    default_congress: int,
+) -> tuple[int, str, int] | None:
+    """Resolve a raw vote-side bill reference into the canonical bill key."""
+    if not raw_bill_ref:
+        return None
+    raw_type = raw_bill_ref.get("type")
+    raw_number = raw_bill_ref.get("number")
+    if raw_type is None or raw_number is None:
+        return None
+    raw_congress = raw_bill_ref.get("congress", default_congress)
+    try:
+        return (int(raw_congress), str(raw_type).lower(), int(raw_number))
+    except (TypeError, ValueError):
+        return None
+
+
+def parse_vote_source_url(raw_vote: dict[str, Any]) -> str | None:
+    """Best-effort raw vote source URL from vote JSON."""
+    for key in ("url", "source_url", "sourceUrl"):
+        value = raw_vote.get(key)
+        if isinstance(value, str) and value:
+            return value
+    return None
+
+
+def coerce_raw_ref(raw_value: Any) -> dict[str, Any] | None:
+    """Preserve raw refs as JSON-ish dictionaries."""
+    if raw_value is None:
+        return None
+    if isinstance(raw_value, dict):
+        return raw_value
+    return {"value": raw_value}
+
+
+def parsed_vote_datetime(raw_vote: dict[str, Any]) -> datetime | None:
+    """Build a full vote datetime when the source exposes one."""
+    raw_date = raw_vote.get("date")
+    raw_time = raw_vote.get("time")
+    if raw_time is not None and isinstance(raw_date, str):
+        return parse_datetime_like(raw_date, fallback_time=str(raw_time))
+    return parse_datetime_like(raw_date)
+
+
+def ingest_bill_status_context(
+    session: Session,
+    *,
+    congress_dirs: Sequence[Path],
+    bill_map: dict[tuple[int, str, int], int],
+    ingest_run_id: int | None,
+) -> None:
+    """Rebuild bill actions, relations, amendments, and their recorded votes."""
+    require_billstatus_artifacts(congress_dirs)
+    congress_numbers = [int(path.name) for path in congress_dirs]
+    bill_ids_subquery = select(Bill.id).where(Bill.congress.in_(congress_numbers))
+    existing_bill_texts = {
+        (bill_text.bill_id, bill_text.version_code.lower()): bill_text
+        for bill_text in session.scalars(
+            select(BillText)
+            .join(Bill, Bill.id == BillText.bill_id)
+            .where(Bill.congress.in_(congress_numbers))
+        ).all()
+    }
+    session.execute(
+        delete(BillRelation).where(BillRelation.bill_id.in_(bill_ids_subquery))
+    )
+    session.execute(delete(BillAction).where(BillAction.bill_id.in_(bill_ids_subquery)))
+    session.execute(delete(Amendment).where(Amendment.congress.in_(congress_numbers)))
+    session.commit()
+
+    for congress_dir in congress_dirs:
+        bills_dir = congress_dir / "bills"
+        if not bills_dir.is_dir():
+            logger.warning(f"Missing bills directory for congress {congress_dir.name}: {bills_dir}")
+            continue
+        billstatus_paths = sorted(bills_dir.rglob("fdsys_billstatus.xml"))
+        logger.info(
+            "Scanning %d bill status files from %s",
+            len(billstatus_paths),
+            congress_dir.name,
+        )
+        for chunk in _chunked(billstatus_paths, PARALLEL_FILE_CHUNK_SIZE):
+            results = parallelize_thread(
+                _parse_billstatus_path,
+                [{"path": path} for path in chunk],
+                progress_tracker=PARALLEL_PROGRESS_TRACKER,
+            )
+            for path, parsed in zip(chunk, results.results, strict=True):
+                if parsed is None:
+                    continue
+                bill_id = bill_map.get(parsed.bill_key)
+                if bill_id is None:
+                    continue
+                artifact = register_source_artifact(
+                    session,
+                    path=path,
+                    source_kind="billstatus_xml",
+                    congress=parsed.bill_key[0],
+                    chamber=None,
+                    ingest_run_id=ingest_run_id,
+                )
+                session.add_all(
+                    merge_billstatus_text_versions_for_bill(
+                        bill_id=bill_id,
+                        parsed_text_versions=parsed.text_versions,
+                        source_artifact_id=artifact.id,
+                        existing_bill_texts=existing_bill_texts,
+                    )
+                )
+                for relation in parsed.relations:
+                    related_bill_id = bill_map.get(relation.related_key)
+                    if related_bill_id is None:
+                        continue
+                    session.add(
+                        BillRelation(
+                            bill_id=bill_id,
+                            related_bill_id=related_bill_id,
+                            relationship_type=relation.relationship_type,
+                            identified_by=relation.identified_by,
+                            latest_action_date=relation.latest_action_date,
+                            latest_action_text=relation.latest_action_text,
+                        )
+                    )
+
+                for action in parsed.actions:
+                    bill_action = BillAction(
+                        bill_id=bill_id,
+                        sequence=action.sequence,
+                        action_date=action.action_date,
+                        action_time=action.action_time,
+                        action_text=action.action_text,
+                        action_type=action.action_type,
+                        action_code=action.action_code,
+                        source_system_code=action.source_system_code,
+                        source_system_name=action.source_system_name,
+                        source_artifact_id=artifact.id,
+                    )
+                    session.add(bill_action)
+                    session.flush()
+                    for recorded_vote in action.recorded_votes:
+                        session.add(
+                            BillActionRecordedVote(
+                                bill_action_id=bill_action.id,
+                                congress=recorded_vote.congress,
+                                chamber=recorded_vote.chamber,
+                                session_number=recorded_vote.session_number,
+                                roll_number=recorded_vote.roll_number,
+                                vote_datetime=recorded_vote.vote_datetime,
+                                vote_url=recorded_vote.vote_url,
+                            )
+                        )
+
+        amendments_dir = congress_dir / "amendments"
+        if amendments_dir.is_dir():
+            amendment_paths = sorted(amendments_dir.rglob("data.json"))
+            logger.info(
+                "Scanning %d amendment files from %s",
+                len(amendment_paths),
+                congress_dir.name,
+            )
+            for chunk in _chunked(amendment_paths, PARALLEL_FILE_CHUNK_SIZE):
+                results = parallelize_thread(
+                    _read_json_path,
+                    [{"path": path} for path in chunk],
+                    progress_tracker=PARALLEL_PROGRESS_TRACKER,
+                )
+                for amendment_path, raw in zip(chunk, results.results, strict=True):
+                    if raw is None:
+                        continue
+                    amendment = _parse_amendment_json(
+                        session,
+                        raw=raw,
+                        bill_map=bill_map,
+                        ingest_run_id=ingest_run_id,
+                        path=amendment_path,
+                    )
+                    if amendment is not None:
+                        session.add(amendment)
+
+    session.commit()
+
+
+def _parse_amendment_json(
+    session: Session,
+    *,
+    raw: dict[str, Any],
+    bill_map: dict[tuple[int, str, int], int],
+    ingest_run_id: int | None,
+    path: Path,
+) -> Amendment | None:
+    congress = raw.get("congress")
+    amendment_type = raw.get("amendment_type") or raw.get("type")
+    number = raw.get("number")
+    if congress is None or amendment_type is None or number is None:
+        return None
+
+    artifact = register_source_artifact(
+        session,
+        path=path,
+        source_kind="amendment_json",
+        congress=int(congress),
+        chamber=normalize_chamber(raw.get("chamber")),
+        ingest_run_id=ingest_run_id,
+    )
+    amended_bill_id = None
+    amended_bill_ref = raw.get("amends_bill") or raw.get("bill") or raw.get("amended_bill")
+    if isinstance(amended_bill_ref, dict):
+        amended_bill_key = raw_bill_key_from_ref(
+            amended_bill_ref,
+            default_congress=int(congress),
+        )
+        if amended_bill_key is not None:
+            amended_bill_id = bill_map.get(amended_bill_key)
+
+    amendment = Amendment(
+        congress=int(congress),
+        amendment_type=str(amendment_type).lower(),
+        number=int(number),
+        chamber=normalize_chamber(raw.get("chamber")),
+        description=raw.get("description"),
+        purpose=raw.get("purpose"),
+        amended_bill_id=amended_bill_id,
+        source_path=str(path),
+        source_artifact_id=artifact.id,
+    )
+    session.add(amendment)
+    session.flush()
+
+    actions = raw.get("actions")
+    if isinstance(actions, list):
+        for index, item in enumerate(actions, start=1):
+            if not isinstance(item, dict):
+                continue
+            action_date = parse_date_like(item.get("acted_at") or item.get("action_date"))
+            action_text = item.get("text")
+            if action_date is None or not isinstance(action_text, str) or not action_text:
+                continue
+            action = AmendmentAction(
+                amendment_id=amendment.id,
+                sequence=index,
+                action_date=action_date,
+                action_time=_extract_time_component(item.get("acted_at")),
+                action_text=action_text,
+                action_type=item.get("type"),
+                action_code=item.get("state") or item.get("vote_type"),
+                source_system_code=None,
+                source_system_name="unitedstates/congress amendment JSON",
+                source_artifact_id=artifact.id,
+            )
+            session.add(action)
+            session.flush()
+            roll = item.get("roll")
+            chamber = normalize_chamber(item.get("where"))
+            session_number = item.get("session")
+            if roll and chamber and session_number:
+                session.add(
+                    AmendmentActionRecordedVote(
+                        amendment_action_id=action.id,
+                        congress=int(congress),
+                        chamber=chamber,
+                        session_number=int(session_number),
+                        roll_number=int(roll),
+                        vote_datetime=parse_datetime_like(item.get("acted_at")),
+                        vote_url=item.get("url"),
+                    )
+                )
+    return amendment
+
+
+def _extract_time_component(raw_value: Any) -> str | None:
+    if raw_value is None:
+        return None
+    text = str(raw_value)
+    if "T" not in text:
+        return None
+    return text.split("T", 1)[1].replace("Z", "")
+
+
+def _read_json(path: Path) -> dict[str, Any] | None:
+    import orjson
+
+    try:
+        return orjson.loads(path.read_bytes())
+    except Exception:
+        logger.exception("Failed to parse %s", path)
+        return None
+
+
+def build_vote_action_matches(
+    session: Session,
+    *,
+    congress_numbers: Sequence[int],
+) -> None:
+    """Match raw votes to official bill/amendment actions and persist all candidates."""
+    vote_ids_subquery = select(Vote.id).where(Vote.congress.in_(list(congress_numbers)))
+    has_votes = session.scalar(select(Vote.id).where(Vote.congress.in_(list(congress_numbers))).limit(1))
+    if has_votes is None:
+        return
+
+    session.execute(
+        delete(VoteActionMatch).where(VoteActionMatch.vote_id.in_(vote_ids_subquery))
+    )
+    session.execute(
+        delete(VoteContextAudit).where(VoteContextAudit.vote_id.in_(vote_ids_subquery))
+    )
+    session.commit()
+
+    vote_stmt = select(Vote).where(Vote.congress.in_(list(congress_numbers))).order_by(Vote.id)
+    votes = session.scalars(vote_stmt).all()
+    bill_vote_index = _build_bill_action_vote_index(session, congress_numbers)
+    amendment_vote_index = _build_amendment_action_vote_index(session, congress_numbers)
+
+    for vote in votes:
+        candidates = rank_action_candidates(
+            vote=vote,
+            bill_vote_index=bill_vote_index,
+            amendment_vote_index=amendment_vote_index,
+        )
+        if not candidates:
+            session.add(
+                VoteContextAudit(
+                    vote_id=vote.id,
+                    step="vote_action_match",
+                    message="no official action matched vote tuple; classification will fall back to vote XML",
+                    severity="warning",
+                )
+            )
+            continue
+
+        selected = candidates[0]
+        if len(candidates) > 1:
+            session.add(
+                VoteContextAudit(
+                    vote_id=vote.id,
+                    step="vote_action_match",
+                    message="multiple official actions matched vote tuple; selected highest-ranked candidate",
+                    severity="info",
+                )
+            )
+
+        for index, candidate in enumerate(candidates):
+            session.add(
+                VoteActionMatch(
+                    vote_id=vote.id,
+                    action_scope=candidate.scope,
+                    bill_action_id=candidate.bill_action.id if candidate.bill_action else None,
+                    amendment_action_id=(
+                        candidate.amendment_action.id if candidate.amendment_action else None
+                    ),
+                    is_selected=index == 0,
+                    match_method=candidate.match_method,
+                    match_reason=candidate.match_reason,
+                    match_confidence=candidate.match_confidence,
+                )
+            )
+
+    session.commit()
+
+
+def _build_bill_action_vote_index(
+    session: Session,
+    congress_numbers: Sequence[int],
+) -> dict[tuple[int, str, int, int], list[BillActionRecordedVote]]:
+    rows = session.scalars(
+        select(BillActionRecordedVote)
+        .join(BillAction, BillAction.id == BillActionRecordedVote.bill_action_id)
+        .join(Bill, Bill.id == BillAction.bill_id)
+        .where(Bill.congress.in_(list(congress_numbers)))
+        .options(joinedload(BillActionRecordedVote.bill_action).joinedload(BillAction.bill))
+    ).all()
+    index: dict[tuple[int, str, int, int], list[BillActionRecordedVote]] = {}
+    for row in rows:
+        key = (row.congress, row.chamber, row.session_number, row.roll_number)
+        index.setdefault(key, []).append(row)
+    return index
+
+
+def _build_amendment_action_vote_index(
+    session: Session,
+    congress_numbers: Sequence[int],
+) -> dict[tuple[int, str, int, int], list[AmendmentActionRecordedVote]]:
+    rows = session.scalars(
+        select(AmendmentActionRecordedVote)
+        .join(AmendmentAction, AmendmentAction.id == AmendmentActionRecordedVote.amendment_action_id)
+        .join(Amendment, Amendment.id == AmendmentAction.amendment_id)
+        .where(Amendment.congress.in_(list(congress_numbers)))
+        .options(
+            joinedload(AmendmentActionRecordedVote.amendment_action).joinedload(
+                AmendmentAction.amendment
+            )
+        )
+    ).all()
+    index: dict[tuple[int, str, int, int], list[AmendmentActionRecordedVote]] = {}
+    for row in rows:
+        key = (row.congress, row.chamber, row.session_number, row.roll_number)
+        index.setdefault(key, []).append(row)
+    return index
+
+
+def rank_action_candidates(
+    *,
+    vote: Vote,
+    bill_vote_index: dict[tuple[int, str, int, int], list[BillActionRecordedVote]],
+    amendment_vote_index: dict[
+        tuple[int, str, int, int], list[AmendmentActionRecordedVote]
+    ],
+) -> list[ActionCandidate]:
+    """Rank candidate official actions for one vote."""
+    key = (vote.congress, vote.chamber, vote.session_number, vote.roll_number)
+    bill_candidates = bill_vote_index.get(key, [])
+    amendment_candidates = amendment_vote_index.get(key, [])
+    prefer_amendment = has_amendment_signal(
+        vote.question,
+        vote.result_text,
+        raw_amendment_ref=vote.raw_amendment_ref,
+    )
+    question_text = normalized_text(vote.question, vote.result_text, vote.vote_type)
+    candidates: list[ActionCandidate] = []
+
+    for row in amendment_candidates:
+        action_text = normalized_text(row.amendment_action.action_text, vote.question)
+        score = 100
+        if prefer_amendment:
+            score += 50
+        if is_direct_amendment_text_question(action_text):
+            score += 15
+        if row.amendment_action.action_time:
+            score += 1
+        candidates.append(
+            ActionCandidate(
+                scope=VoteActionScope.AMENDMENT,
+                bill_action=None,
+                amendment_action=row.amendment_action,
+                score=score,
+                match_method="canonical_vote_tuple",
+                match_reason="matched amendment action recorded vote tuple",
+                match_confidence=ConfidenceLevel.HIGH,
+            )
+        )
+
+    for row in bill_candidates:
+        source_name = row.bill_action.source_system_name or ""
+        score = 50
+        if not prefer_amendment:
+            score += 20
+        if "library of congress" not in source_name.casefold():
+            score += 10
+        if _semantic_alignment_score(question_text, row.bill_action.action_text) > 0:
+            score += 10
+        if row.bill_action.action_time:
+            score += 1
+        candidates.append(
+            ActionCandidate(
+                scope=VoteActionScope.BILL,
+                bill_action=row.bill_action,
+                amendment_action=None,
+                score=score,
+                match_method="canonical_vote_tuple",
+                match_reason="matched bill action recorded vote tuple",
+                match_confidence=ConfidenceLevel.HIGH,
+            )
+        )
+
+    candidates.sort(
+        key=lambda candidate: (
+            -candidate.score,
+            candidate.bill_action.sequence if candidate.bill_action else candidate.amendment_action.sequence,
+        )
+    )
+    return candidates
+
+
+def _semantic_alignment_score(question_text: str, action_text: str) -> int:
+    normalized_action = normalized_text(action_text)
+    if question_text and normalized_action and question_text in normalized_action:
+        return 5
+    if is_direct_measure_text_question(question_text) and is_direct_measure_text_question(normalized_action):
+        return 3
+    if is_procedural_question(question_text) and is_procedural_question(normalized_action):
+        return 3
+    return 0
+
+
+def classify_votes(
+    session: Session,
+    *,
+    congress_numbers: Sequence[int],
+    bill_map: dict[tuple[int, str, int], int],
+) -> None:
+    """Populate vote classifications and measure links from selected matches."""
+    has_votes = session.scalar(select(Vote.id).where(Vote.congress.in_(list(congress_numbers))).limit(1))
+    if has_votes is None:
+        return
+
+    vote_ids_subquery = select(Vote.id).where(Vote.congress.in_(list(congress_numbers)))
+    session.execute(
+        delete(VoteMeasureLink).where(VoteMeasureLink.vote_id.in_(vote_ids_subquery))
+    )
+    session.execute(
+        delete(VoteClassification).where(VoteClassification.vote_id.in_(vote_ids_subquery))
+    )
+    session.commit()
+
+    vote_stmt = (
+        select(Vote)
+        .where(Vote.congress.in_(list(congress_numbers)))
+        .options(
+            selectinload(Vote.action_matches).joinedload(VoteActionMatch.bill_action).joinedload(BillAction.bill),
+            selectinload(Vote.action_matches)
+            .joinedload(VoteActionMatch.amendment_action)
+            .joinedload(AmendmentAction.amendment),
+        )
+        .order_by(Vote.id)
+    )
+    votes = session.scalars(vote_stmt).all()
+
+    for vote in votes:
+        selected_match = next((match for match in vote.action_matches if match.is_selected), None)
+        classification, measure_links, audit_rows = classify_single_vote(
+            vote=vote,
+            selected_match=selected_match,
+            bill_map=bill_map,
+            session=session,
+        )
+        session.add(classification)
+        for link in measure_links:
+            session.add(link)
+        for audit_row in audit_rows:
+            session.add(audit_row)
+
+    session.commit()
+
+
+def classify_single_vote(
+    *,
+    vote: Vote,
+    selected_match: VoteActionMatch | None,
+    bill_map: dict[tuple[int, str, int], int],
+    session: Session,
+) -> tuple[VoteClassification, list[VoteMeasureLink], list[VoteContextAudit]]:
+    """Classify a single vote and produce measure links."""
+    audit_rows: list[VoteContextAudit] = []
+    question_text = vote.question or ""
+    result_text = vote.result_text or ""
+    selected_action_text = ""
+    bill: Bill | None = None
+    amendment: Amendment | None = None
+    method = ClassificationMethod.VOTE_XML_ONLY
+    confidence = ConfidenceLevel.MEDIUM
+
+    if selected_match is not None:
+        if selected_match.bill_action is not None:
+            bill = selected_match.bill_action.bill
+        if selected_match.amendment_action is not None:
+            amendment = selected_match.amendment_action.amendment
+        selected_action_text = (
+            selected_match.bill_action.action_text
+            if selected_match.bill_action is not None
+            else selected_match.amendment_action.action_text
+            if selected_match.amendment_action is not None
+            else ""
+        )
+        method = (
+            ClassificationMethod.RECORDED_VOTE_ACTION_EXACT
+            if len(vote.action_matches) <= 1
+            else ClassificationMethod.RECORDED_VOTE_ACTION_DUPLICATE_SOURCE_DEDUPED
+        )
+        confidence = ConfidenceLevel.HIGH
+
+    if bill is None and vote.raw_bill_ref:
+        raw_key = raw_bill_key_from_ref(vote.raw_bill_ref, default_congress=vote.congress)
+        if raw_key is not None:
+            raw_bill_id = bill_map.get(raw_key)
+            if raw_bill_id is not None:
+                bill = session.get(Bill, raw_bill_id)
+
+    subject_type = SubjectType.UNKNOWN
+    vote_relationship = VoteRelationship.UNKNOWN
+    measure_subtype: MeasureSubtype | None = None
+    measure_function: MeasureFunction | None = None
+    measure_type: str | None = None
+    is_legislation_related = False
+    is_direct_text = False
+    is_substantive = False
+    is_lawmaking_vehicle = False
+    is_special_rule = False
+    measure_links: list[VoteMeasureLink] = []
+
+    if vote.raw_nomination_ref or "nomination" in normalized_text(question_text, result_text):
+        subject_type = SubjectType.NOMINATION
+        vote_relationship = VoteRelationship.NON_LEGISLATIVE
+    elif vote.raw_treaty_ref or "treaty" in normalized_text(question_text, result_text):
+        subject_type = SubjectType.TREATY
+        vote_relationship = VoteRelationship.NON_LEGISLATIVE
+    elif is_non_legislative_question(question_text, result_text):
+        subject_type = SubjectType.CHAMBER_ADMIN
+        vote_relationship = VoteRelationship.NON_LEGISLATIVE
+    elif amendment is not None or has_amendment_signal(question_text, selected_action_text, raw_amendment_ref=vote.raw_amendment_ref):
+        subject_type = SubjectType.AMENDMENT
+        is_legislation_related = True
+        if is_direct_amendment_text_question(question_text, selected_action_text):
+            vote_relationship = VoteRelationship.AMENDMENT_TEXT_VOTE
+            is_direct_text = True
+            is_substantive = True
+        else:
+            vote_relationship = VoteRelationship.PROCEDURAL_RELATED_TO_AMENDMENT
+        if amendment is not None and amendment.amended_bill_id is not None:
+            role = (
+                VoteMeasureRole.AMENDS
+                if vote_relationship is VoteRelationship.AMENDMENT_TEXT_VOTE
+                else VoteMeasureRole.PROCEDURAL_TARGET
+            )
+            measure_links.append(
+                VoteMeasureLink(
+                    vote_id=vote.id,
+                    measure_id=amendment.amended_bill_id,
+                    role=role,
+                    source=method.value,
+                    confidence=confidence,
+                    notes=amendment.purpose,
+                )
+            )
+    elif bill is not None or vote.raw_bill_ref:
+        subject_type = SubjectType.MEASURE
+        is_legislation_related = True
+        if bill is not None:
+            measure_type = measure_type_value(bill.bill_type)
+            measure_subtype = measure_subtype_for_bill_type(bill.bill_type)
+            measure_function = measure_function_for_vote(
+                bill=bill,
+                question=question_text,
+                action_text=selected_action_text,
+            )
+            is_special_rule = measure_function is MeasureFunction.SPECIAL_RULE
+            is_lawmaking_vehicle = measure_subtype in {
+                MeasureSubtype.BILL,
+                MeasureSubtype.JOINT_RESOLUTION,
+                MeasureSubtype.CONCURRENT_RESOLUTION,
+            }
+
+        if is_direct_measure_text_question(question_text, selected_action_text):
+            vote_relationship = VoteRelationship.DIRECT_TEXT_VOTE
+            is_direct_text = True
+            is_substantive = not is_special_rule and measure_function not in {
+                MeasureFunction.CHAMBER_INTERNAL,
+                MeasureFunction.COMMEMORATIVE_OR_SENSE_OF,
+            }
+        elif is_procedural_question(question_text, selected_action_text):
+            vote_relationship = VoteRelationship.PROCEDURAL_RELATED_TO_MEASURE
+        else:
+            vote_relationship = VoteRelationship.UNKNOWN
+
+        if bill is not None:
+            role = (
+                VoteMeasureRole.VOTED_ON
+                if vote_relationship is VoteRelationship.DIRECT_TEXT_VOTE
+                else VoteMeasureRole.PROCEDURAL_TARGET
+            )
+            measure_links.append(
+                VoteMeasureLink(
+                    vote_id=vote.id,
+                    measure_id=bill.id,
+                    role=role,
+                    source=method.value,
+                    confidence=confidence,
+                    notes=None,
+                )
+            )
+            if is_special_rule:
+                underlying_refs = parse_measure_references(
+                    " ".join(
+                        filter(
+                            None,
+                            [bill.title, bill.title_short, bill.official_title, selected_action_text, question_text],
+                        )
+                    ),
+                    congress=vote.congress,
+                )
+                seen_measure_ids: set[int] = {bill.id}
+                for key in underlying_refs:
+                    linked_bill_id = bill_map.get(key)
+                    if linked_bill_id is None or linked_bill_id in seen_measure_ids:
+                        continue
+                    seen_measure_ids.add(linked_bill_id)
+                    measure_links.append(
+                        VoteMeasureLink(
+                            vote_id=vote.id,
+                            measure_id=linked_bill_id,
+                            role=VoteMeasureRole.RULE_FOR,
+                            source="measure_text_parse",
+                            confidence=ConfidenceLevel.MEDIUM,
+                            notes="parsed from rule title/question/action text",
+                        )
+                    )
+                if len(seen_measure_ids) <= 1:
+                    audit_rows.append(
+                        VoteContextAudit(
+                            vote_id=vote.id,
+                            step="vote_context_classify",
+                            message="special rule detected but no underlying measure could be resolved from available text",
+                            severity="warning",
+                        )
+                    )
+    else:
+        audit_rows.append(
+            VoteContextAudit(
+                vote_id=vote.id,
+                step="vote_context_classify",
+                message="vote remains unclassified after action matching and raw-source parsing",
+                severity="warning",
+            )
+        )
+
+    classification = VoteClassification(
+        vote_id=vote.id,
+        subject_type=subject_type,
+        measure_type=measure_type,
+        measure_subtype=measure_subtype,
+        measure_function=measure_function,
+        vote_relationship=vote_relationship,
+        is_legislation_related=is_legislation_related,
+        is_direct_vote_on_legislative_text=is_direct_text,
+        is_substantive_policy_vote=is_substantive,
+        is_lawmaking_vehicle=is_lawmaking_vehicle,
+        is_special_rule=is_special_rule,
+        classification_method=method,
+        classification_confidence_reason=(
+            "matched recorded vote tuple to official action"
+            if selected_match is not None
+            else "classified from raw vote metadata only"
+        ),
+        confidence=confidence,
+        classified_at=datetime.now(UTC),
+        classification_version=CLASSIFICATION_VERSION,
+    )
+    return classification, measure_links, audit_rows
+
+
+def resolve_vote_text_targets(
+    session: Session,
+    *,
+    congress_numbers: Sequence[int],
+) -> None:
+    """Populate voted/resulting text targets for classified votes."""
+    has_votes = session.scalar(select(Vote.id).where(Vote.congress.in_(list(congress_numbers))).limit(1))
+    if has_votes is None:
+        return
+
+    vote_ids_subquery = select(Vote.id).where(Vote.congress.in_(list(congress_numbers)))
+    session.execute(
+        delete(VoteTextTarget).where(VoteTextTarget.vote_id.in_(vote_ids_subquery))
+    )
+    session.commit()
+
+    vote_stmt = (
+        select(Vote)
+        .where(Vote.congress.in_(list(congress_numbers)))
+        .options(
+            joinedload(Vote.classification),
+            selectinload(Vote.vote_measure_links).joinedload(VoteMeasureLink.measure).selectinload(Bill.bill_texts),
+            selectinload(Vote.action_matches).joinedload(VoteActionMatch.bill_action).joinedload(BillAction.bill),
+            selectinload(Vote.action_matches)
+            .joinedload(VoteActionMatch.amendment_action)
+            .joinedload(AmendmentAction.amendment),
+        )
+        .order_by(Vote.id)
+    )
+    votes = session.scalars(vote_stmt).all()
+
+    for vote in votes:
+        if vote.classification is None:
+            continue
+        selected_match = next((match for match in vote.action_matches if match.is_selected), None)
+        text_target = resolve_text_target_for_vote(vote=vote, selected_match=selected_match)
+        session.add(text_target)
+
+    session.commit()
+
+
+def resolve_text_target_for_vote(
+    *,
+    vote: Vote,
+    selected_match: VoteActionMatch | None,
+) -> VoteTextTarget:
+    """Resolve one vote's official text target."""
+    classification = vote.classification
+    assert classification is not None
+
+    if classification.subject_type is SubjectType.AMENDMENT:
+        related_amendment_id = (
+            selected_match.amendment_action.amendment_id
+            if selected_match and selected_match.amendment_action is not None
+            else None
+        )
+        return VoteTextTarget(
+            vote_id=vote.id,
+            text_target_type=TextTargetType.AMENDMENT_TEXT,
+            voted_text_version_id=None,
+            resulting_text_version_id=None,
+            related_amendment_id=related_amendment_id,
+            text_target_basis=TextTargetBasis.AMENDMENT_TEXT,
+            text_resolution_method=TextResolutionMethod.AMENDMENT_TEXT_UNMODELED_PHASE1,
+            text_resolution_confidence_reason="phase 1 does not store amendment text artifacts",
+            confidence=classification.confidence,
+            notes=None,
+        )
+
+    if (
+        classification.subject_type is not SubjectType.MEASURE
+        or not classification.is_direct_vote_on_legislative_text
+    ):
+        return VoteTextTarget(
+            vote_id=vote.id,
+            text_target_type=TextTargetType.NONE,
+            voted_text_version_id=None,
+            resulting_text_version_id=None,
+            related_amendment_id=None,
+            text_target_basis=TextTargetBasis.NO_TEXT_TARGET,
+            text_resolution_method=TextResolutionMethod.NO_TEXT_TARGET,
+            text_resolution_confidence_reason="vote was not a direct vote on legislative text",
+            confidence=classification.confidence,
+            notes=None,
+        )
+
+    voted_on_measure = next(
+        (
+            link.measure
+            for link in vote.vote_measure_links
+            if link.role is VoteMeasureRole.VOTED_ON
+        ),
+        None,
+    )
+    if voted_on_measure is None:
+        return VoteTextTarget(
+            vote_id=vote.id,
+            text_target_type=TextTargetType.UNKNOWN,
+            voted_text_version_id=None,
+            resulting_text_version_id=None,
+            related_amendment_id=None,
+            text_target_basis=TextTargetBasis.UNKNOWN,
+            text_resolution_method=TextResolutionMethod.UNKNOWN,
+            text_resolution_confidence_reason="no voted_on measure link exists for direct text vote",
+            confidence=ConfidenceLevel.LOW,
+            notes=None,
+        )
+
+    action_date = None
+    action_text = ""
+    if selected_match is not None:
+        if selected_match.bill_action is not None:
+            action_date = selected_match.bill_action.action_date
+            action_text = selected_match.bill_action.action_text
+        elif selected_match.amendment_action is not None:
+            action_date = selected_match.amendment_action.action_date
+            action_text = selected_match.amendment_action.action_text
+    if action_date is None:
+        action_date = legislative_date_for_comparison(
+            vote_datetime=vote.vote_datetime,
+            fallback_date=vote.vote_date,
+        )
+
+    candidate_texts = list(voted_on_measure.bill_texts)
+    preferred_codes = preferred_bill_text_codes(
+        vote=vote,
+        bill=voted_on_measure,
+        classification=classification,
+        action_text=action_text,
+    )
+    voted_text, method, basis = choose_best_bill_text(
+        candidate_texts=candidate_texts,
+        action_date=action_date,
+        preferred_codes=preferred_codes,
+        classification=classification,
+    )
+    resulting_text = determine_resulting_text_version(
+        candidate_texts=candidate_texts,
+        action_date=action_date,
+        action_text=action_text,
+        voted_text=voted_text,
+    )
+    text_target_type = (
+        TextTargetType.RESOLUTION_TEXT
+        if classification.measure_subtype is not MeasureSubtype.BILL
+        else TextTargetType.BILL_TEXT
+    )
+    if classification.is_special_rule:
+        basis = TextTargetBasis.RULE_RESOLUTION_TEXT
+
+    confidence = classification.confidence if voted_text is not None else ConfidenceLevel.LOW
+    reason = "resolved from official bill text versions" if voted_text is not None else "no eligible bill text version found"
+    return VoteTextTarget(
+        vote_id=vote.id,
+        text_target_type=text_target_type,
+        voted_text_version_id=voted_text.id if voted_text is not None else None,
+        resulting_text_version_id=resulting_text.id if resulting_text is not None else None,
+        related_amendment_id=None,
+        text_target_basis=basis,
+        text_resolution_method=method,
+        text_resolution_confidence_reason=reason,
+        confidence=confidence,
+        notes=None,
+    )
+
+
+def preferred_bill_text_codes(
+    *,
+    vote: Vote,
+    bill: Bill,
+    classification: VoteClassification,
+    action_text: str | None,
+) -> tuple[str, ...]:
+    """Preferred text-version codes for the vote's immediate measure."""
+    action = normalized_text(vote.question, action_text, vote.result_text)
+    origin_is_house = is_house_origin_measure(bill)
+    origin_is_senate = is_senate_origin_measure(bill)
+
+    if classification.is_special_rule:
+        return ("ath", "ats", "eh", "es", "cph", "cps")
+    if "conference report" in action:
+        return ("enr", "eah", "eas", "eh", "es")
+    if "concur" in action or "with an amendment" in action or "agreed to senate amendments" in action:
+        return ("eah", "eas", "enr")
+
+    if vote.chamber == "House" and origin_is_house:
+        return ("eh", "cph")
+    if vote.chamber == "Senate" and origin_is_senate:
+        return ("es", "cps")
+
+    if "without amendment" in action:
+        if vote.chamber == "Senate" and origin_is_house:
+            return ("rfs", "rds", "eh", "cph")
+        if vote.chamber == "House" and origin_is_senate:
+            return ("rfh", "rdh", "es", "cps")
+
+    if vote.chamber == "House":
+        return ("eah", "eh", "cph")
+    return ("eas", "es", "cps")
+
+
+def choose_best_bill_text(
+    *,
+    candidate_texts: Sequence[BillText],
+    action_date: date | None,
+    preferred_codes: Sequence[str],
+    classification: VoteClassification,
+) -> tuple[BillText | None, TextResolutionMethod, TextTargetBasis]:
+    """Resolve the best official text version for a direct measure vote."""
+    if not candidate_texts:
+        return None, TextResolutionMethod.UNKNOWN, TextTargetBasis.UNKNOWN
+
+    preferred_code_set = tuple(code.lower() for code in preferred_codes)
+    eligible = [
+        bill_text
+        for bill_text in candidate_texts
+        if action_date is None or bill_text.date is None or bill_text.date <= action_date
+    ]
+    if not eligible:
+        eligible = list(candidate_texts)
+
+    def sort_key(bill_text: BillText) -> tuple[int, int, date, int]:
+        code = bill_text.version_code.lower()
+        exact_date = int(action_date is not None and bill_text.date == action_date)
+        code_rank = -preferred_code_set.index(code) if code in preferred_code_set else -999
+        bill_date = bill_text.date or date.min
+        return (exact_date, code_rank, bill_date.toordinal(), bill_text.id)
+
+    best = max(eligible, key=sort_key)
+    code = best.version_code.lower()
+    if action_date is not None and best.date == action_date and code in preferred_code_set:
+        return (
+            best,
+            TextResolutionMethod.TEXT_EXACT_ACTION_DATE_AND_CODE,
+            TextTargetBasis.EXACT_ACTION_TEXT_VERSION,
+        )
+    if action_date is not None and best.date == action_date:
+        return (
+            best,
+            TextResolutionMethod.TEXT_EXACT_ACTION_DATE_WRONG_CODE,
+            TextTargetBasis.EXACT_ACTION_TEXT_VERSION,
+        )
+    if code in preferred_code_set:
+        basis = (
+            TextTargetBasis.RECEIVED_PRIOR_CHAMBER_VERSION
+            if code in {"rfh", "rdh", "rfs", "rds"}
+            else TextTargetBasis.RESULTING_ENGROSSED_VERSION
+        )
+        method = (
+            TextResolutionMethod.TEXT_RECEIVED_PRIOR_CHAMBER_VERSION
+            if basis is TextTargetBasis.RECEIVED_PRIOR_CHAMBER_VERSION
+            else TextResolutionMethod.TEXT_PRIOR_VERSION_CODE_MATCH
+        )
+        return best, method, basis
+    return (
+        best,
+        TextResolutionMethod.TEXT_PRIOR_VERSION_CODE_MATCH,
+        TextTargetBasis.RESULTING_ENGROSSED_VERSION,
+    )
+
+
+def determine_resulting_text_version(
+    *,
+    candidate_texts: Sequence[BillText],
+    action_date: date | None,
+    action_text: str | None,
+    voted_text: BillText | None,
+) -> BillText | None:
+    """Resolve a resulting/enrolled text version without overwriting the voted text."""
+    if voted_text is None:
+        return None
+    action = normalized_text(action_text)
+    if not action:
+        return voted_text
+    if any(pattern in action for pattern in ("without amendment", "conference report", "agreed to senate amendment", "agreed to house amendment")):
+        enrolled = sorted(
+            (
+                bill_text
+                for bill_text in candidate_texts
+                if bill_text.version_code.lower() == "enr"
+                and (action_date is None or bill_text.date is None or bill_text.date >= action_date)
+            ),
+            key=lambda bill_text: (bill_text.date or date.max, bill_text.id),
+        )
+        if enrolled:
+            return enrolled[0]
+    return voted_text
+
+
+def resolve_vote_position_meanings(
+    session: Session,
+    *,
+    congress_numbers: Sequence[int],
+) -> None:
+    """Populate yea/nay/present semantic effects for each classified vote."""
+    has_votes = session.scalar(select(Vote.id).where(Vote.congress.in_(list(congress_numbers))).limit(1))
+    if has_votes is None:
+        return
+
+    session.execute(
+        delete(VotePositionMeaning).where(
+            VotePositionMeaning.vote_id.in_(
+                select(Vote.id).where(Vote.congress.in_(list(congress_numbers)))
+            )
+        )
+    )
+    session.commit()
+
+    vote_stmt = (
+        select(Vote)
+        .where(Vote.congress.in_(list(congress_numbers)))
+        .options(joinedload(Vote.classification))
+        .order_by(Vote.id)
+    )
+    votes = session.scalars(vote_stmt).all()
+
+    for vote in votes:
+        if vote.classification is None:
+            continue
+        session.add(resolve_vote_position_meaning_for_vote(vote=vote))
+
+    session.commit()
+
+
+def resolve_vote_position_meaning_for_vote(*, vote: Vote) -> VotePositionMeaning:
+    """Map Yea/Nay/Present into semantic effects for one classified vote."""
+    classification = vote.classification
+    assert classification is not None
+    relationship = classification.vote_relationship
+    normalized_question = normalized_text(vote.question, vote.result_text)
+
+    yea_effect = VoteEffect.UNKNOWN
+    nay_effect = VoteEffect.UNKNOWN
+    present_effect = VoteEffect.UNKNOWN
+    confidence = classification.confidence
+    method = "classification_relationship"
+
+    if relationship in {
+        VoteRelationship.DIRECT_TEXT_VOTE,
+        VoteRelationship.AMENDMENT_TEXT_VOTE,
+    }:
+        yea_effect = VoteEffect.SUPPORTS_TEXT
+        nay_effect = VoteEffect.OPPOSES_TEXT
+    elif relationship is VoteRelationship.PROCEDURAL_RELATED_TO_MEASURE or relationship is VoteRelationship.PROCEDURAL_RELATED_TO_AMENDMENT:
+        if "motion to table" in normalized_question:
+            yea_effect = VoteEffect.BLOCKS_PROCEDURE
+            nay_effect = VoteEffect.ADVANCES_PROCEDURE
+        elif any(token in normalized_question for token in ("cloture", "motion to proceed", "previous question", "ordering the previous question")):
+            yea_effect = VoteEffect.ADVANCES_PROCEDURE
+            nay_effect = VoteEffect.BLOCKS_PROCEDURE
+        else:
+            confidence = ConfidenceLevel.LOW
+            method = "classification_relationship_unknown_procedural_polarity"
+    else:
+        confidence = ConfidenceLevel.LOW
+        method = "non_legislative_or_unknown"
+
+    return VotePositionMeaning(
+        vote_id=vote.id,
+        yea_effect=yea_effect,
+        nay_effect=nay_effect,
+        present_effect=present_effect,
+        polarity_confidence=confidence,
+        polarity_method=method,
+        notes=None,
+    )
+
+
+def create_score_run(session: Session) -> ScoreRun:
+    """Create a score run tied to the most recent ingest snapshot when available."""
+    latest_ingest_run_id = session.scalar(
+        select(IngestRun.id).order_by(IngestRun.id.desc()).limit(1)
+    )
+    score_run = ScoreRun(
+        ingest_run_id=latest_ingest_run_id,
+        classifier_version=CLASSIFICATION_VERSION,
+        scoring_version=SCORING_VERSION,
+        included_vote_count=0,
+        excluded_vote_count=0,
+        started_at=datetime.now(UTC),
+        completed_at=None,
+    )
+    session.add(score_run)
+    session.flush()
+    return score_run
+
+
+def finalize_score_run(
+    session: Session,
+    *,
+    score_run: ScoreRun,
+    included_vote_count: int,
+    excluded_vote_count: int,
+) -> None:
+    """Mark a score run complete."""
+    score_run.included_vote_count = included_vote_count
+    score_run.excluded_vote_count = excluded_vote_count
+    score_run.completed_at = datetime.now(UTC)
+    session.flush()
diff --git a/pipelines/jobs/ingest_congress.py b/pipelines/jobs/ingest_congress.py
new file mode 100644
index 0000000..6287fb5
--- /dev/null
+++ b/pipelines/jobs/ingest_congress.py
@@ -0,0 +1,1084 @@
+"""Ingestion pipeline for loading congress data from unitedstates/congress JSON files.
+
+Loads legislators, bills, votes, vote records, and bill text into the data_science_dev database.
+Expects the parent directory to contain congress-tracker/ and congress-legislators/ as siblings.
+
+Usage:
+    ingest-congress /path/to/parent/
+    ingest-congress /path/to/parent/ --congress 118
+    ingest-congress /path/to/parent/ --congress 118 --only bills
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from datetime import date
+from pathlib import Path  # noqa: TC003 needed at runtime for typer CLI argument
+from typing import TYPE_CHECKING, Annotated
+
+import orjson
+import typer
+import yaml
+from sqlalchemy import func, select
+from sqlalchemy.orm import Session
+
+from pipelines.common import configure_logger
+from pipelines.pipelines.jobs.congress_vote_context import (
+    build_billstatus_text_version_index,
+    build_vote_action_matches,
+    classify_votes,
+    coerce_raw_ref,
+    derive_session_number,
+    finish_ingest_run,
+    filter_context_supported_congress_dirs,
+    ingest_bill_status_context as rebuild_bill_status_context,
+    parse_date_like,
+    parse_vote_source_url,
+    parsed_vote_datetime,
+    register_source_artifact,
+    require_billstatus_artifacts,
+    resolve_vote_position_meanings,
+    resolve_vote_text_targets,
+    start_ingest_run,
+)
+from pipelines.orm.common import get_postgres_engine
+from pipelines.orm.data_science_dev.congress import (
+    Bill,
+    BillText,
+    Legislator,
+    LegislatorSocialMedia,
+    Vote,
+    VoteActionMatch,
+    VoteClassification,
+    VoteContextAudit,
+    VoteRecord,
+)
+from pipelines.parallelize import parallelize_thread
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator, Sequence
+
+    from sqlalchemy.engine import Engine
+
+logger = logging.getLogger(__name__)
+
+BATCH_SIZE = 10_000
+PARALLEL_FILE_CHUNK_SIZE = 1_000
+PARALLEL_PROGRESS_TRACKER = 250
+
+app = typer.Typer(help="Ingest unitedstates/congress data into data_science_dev.")
+
+
+@app.command()
+def main(
+    parent_dir: Annotated[
+        Path,
+        typer.Argument(
+            help="Parent directory containing congress-tracker/ and congress-legislators/"
+        ),
+    ],
+    congress: Annotated[
+        int | None, typer.Option(help="Only ingest a specific congress number")
+    ] = None,
+    only: Annotated[
+        str | None,
+        typer.Option(
+            help=(
+                "Only run a specific step: legislators, legislators-social-media, "
+                "bills, bill-text, votes, bill-status-context, vote-context-match, "
+                "vote-context-classify, vote-text-resolve, vote-polarity-resolve, "
+                "vote-context-diagnostics"
+            )
+        ),
+    ] = None,
+) -> None:
+    """Ingest congress data from unitedstates/congress JSON files."""
+    configure_logger(level="INFO")
+
+    data_dir = parent_dir / "congress-tracker/congress/data/"
+    legislators_dir = parent_dir / "congress-legislators"
+
+    if not data_dir.is_dir():
+        typer.echo(f"Expected congress-tracker/ directory: {data_dir}", err=True)
+        raise typer.Exit(code=1)
+
+    if not legislators_dir.is_dir():
+        typer.echo(
+            f"Expected congress-legislators/ directory: {legislators_dir}", err=True
+        )
+        raise typer.Exit(code=1)
+
+    engine = get_postgres_engine(name="DATA_SCIENCE_DEV")
+
+    congress_dirs = _resolve_congress_dirs(data_dir, congress)
+    if not congress_dirs:
+        typer.echo("No congress directories found.", err=True)
+        raise typer.Exit(code=1)
+
+    logger.info("Found %d congress directories to process", len(congress_dirs))
+
+    with Session(engine) as session:
+        ingest_run = start_ingest_run(
+            session,
+            source_snapshot_label=str(parent_dir),
+            repo_root=Path(__file__).resolve().parent.parent,
+        )
+        ingest_run_id = ingest_run.id
+
+    steps: dict[str, tuple] = {
+        "legislators": (ingest_legislators, (engine, legislators_dir)),
+        "legislators-social-media": (ingest_social_media, (engine, legislators_dir)),
+        "bills": (ingest_bills, (engine, congress_dirs)),
+        "bill-text": (ingest_bill_text, (engine, congress_dirs, ingest_run_id)),
+        "votes": (ingest_votes, (engine, congress_dirs, ingest_run_id)),
+        "bill-status-context": (
+            ingest_bill_status_context_step,
+            (engine, congress_dirs, ingest_run_id),
+        ),
+        "vote-context-match": (vote_context_match_step, (engine, congress_dirs)),
+        "vote-context-classify": (vote_context_classify_step, (engine, congress_dirs)),
+        "vote-text-resolve": (vote_text_resolve_step, (engine, congress_dirs)),
+        "vote-polarity-resolve": (vote_polarity_resolve_step, (engine, congress_dirs)),
+        "vote-context-diagnostics": (
+            vote_context_diagnostics_step,
+            (engine, congress_dirs),
+        ),
+    }
+
+    if only:
+        if only not in steps:
+            typer.echo(
+                f"Unknown step: {only}. Choose from: {', '.join(steps)}", err=True
+            )
+            raise typer.Exit(code=1)
+        steps = {only: steps[only]}
+
+    try:
+        for step_name, (step_func, step_args) in steps.items():
+            logger.info("=== Starting step: %s ===", step_name)
+            step_func(*step_args)
+            logger.info("=== Finished step: %s ===", step_name)
+    except Exception:
+        with Session(engine) as session:
+            finish_ingest_run(session, ingest_run_id, status="failed")
+        raise
+
+    with Session(engine) as session:
+        finish_ingest_run(session, ingest_run_id, status="completed")
+    logger.info("ingest-congress done")
+
+
+def _resolve_congress_dirs(data_dir: Path, congress: int | None) -> list[Path]:
+    """Find congress number directories under data_dir."""
+    if congress is not None:
+        target = data_dir / str(congress)
+        return [target] if target.is_dir() else []
+    return sorted(
+        path for path in data_dir.iterdir() if path.is_dir() and path.name.isdigit()
+    )
+
+
+def _flush_batch(session: Session, batch: list[object], label: str) -> int:
+    """Add a batch of ORM objects to the session and commit. Returns count added."""
+    if not batch:
+        return 0
+    session.add_all(batch)
+    session.commit()
+    count = len(batch)
+    logger.info("Committed %d %s", count, label)
+    batch.clear()
+    return count
+
+
+@dataclass(frozen=True)
+class LoadedJsonFile:
+    path: Path
+    data: dict | None
+
+
+@dataclass(frozen=True)
+class PreparedBillTextInput:
+    bill_id: int
+    bill_key: tuple[int, str, int]
+    version_code: str
+    text_content: str | None
+    version_data: dict | None
+    source_file: Path | None
+    billstatus_version: object | None
+
+
+def _chunked[T](items: Sequence[T], chunk_size: int) -> Iterator[Sequence[T]]:
+    """Yield fixed-size slices from a sequence."""
+    for start in range(0, len(items), chunk_size):
+        yield items[start : start + chunk_size]
+
+
+def _load_json_file(*, path: Path) -> LoadedJsonFile:
+    """Read one JSON file off disk for later serial DB processing."""
+    return LoadedJsonFile(path=path, data=_read_json(path))
+
+
+def _prepare_bill_text_input(
+    *,
+    bill_id: int,
+    bill_key: tuple[int, str, int],
+    version_dir: Path,
+    billstatus_version: object | None,
+) -> PreparedBillTextInput:
+    """Load one bill-text directory off disk for later serial DB processing."""
+    source_file = version_dir / "data.json"
+    if not source_file.exists():
+        source_file = _find_text_source_file(version_dir)
+    return PreparedBillTextInput(
+        bill_id=bill_id,
+        bill_key=bill_key,
+        version_code=version_dir.name,
+        text_content=_read_bill_text(version_dir),
+        version_data=_read_json(version_dir / "data.json"),
+        source_file=source_file if source_file and source_file.exists() else None,
+        billstatus_version=billstatus_version,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Legislators — loaded from congress-legislators YAML files
+# ---------------------------------------------------------------------------
+
+
+def ingest_legislators(engine: Engine, legislators_dir: Path) -> None:
+    """Load legislators from congress-legislators YAML files."""
+    legislators_data = _load_legislators_yaml(legislators_dir)
+    logger.info("Loaded %d legislators from YAML files", len(legislators_data))
+
+    with Session(engine) as session:
+        existing_legislators = {
+            legislator.bioguide_id: legislator
+            for legislator in session.scalars(select(Legislator)).all()
+        }
+        logger.info("Found %d existing legislators in DB", len(existing_legislators))
+
+        total_inserted = 0
+        total_updated = 0
+        for entry in legislators_data:
+            bioguide_id = entry.get("id", {}).get("bioguide")
+            if not bioguide_id:
+                continue
+
+            fields = _parse_legislator(entry)
+            if existing := existing_legislators.get(bioguide_id):
+                changed = False
+                for field, value in fields.items():
+                    if value is not None and getattr(existing, field) != value:
+                        setattr(existing, field, value)
+                        changed = True
+                if changed:
+                    total_updated += 1
+            else:
+                session.add(Legislator(bioguide_id=bioguide_id, **fields))
+                total_inserted += 1
+
+        session.commit()
+    logger.info(
+        "Inserted %d new legislators, updated %d existing",
+        total_inserted,
+        total_updated,
+    )
+
+
+def _load_legislators_yaml(legislators_dir: Path) -> list[dict]:
+    """Load and combine legislators-current.yaml and legislators-historical.yaml."""
+    legislators: list[dict] = []
+    for filename in ("legislators-current.yaml", "legislators-historical.yaml"):
+        path = legislators_dir / filename
+        if not path.exists():
+            logger.warning("Legislators file not found: %s", path)
+            continue
+        with path.open() as file:
+            data = yaml.safe_load(file)
+            if isinstance(data, list):
+                legislators.extend(data)
+    return legislators
+
+
+def _parse_legislator(entry: dict) -> dict:
+    """Extract Legislator fields from a congress-legislators YAML entry."""
+    ids = entry.get("id", {})
+    name = entry.get("name", {})
+    bio = entry.get("bio", {})
+    terms = entry.get("terms", [])
+    latest_term = terms[-1] if terms else {}
+
+    fec_ids = ids.get("fec")
+    fec_ids_joined = ",".join(fec_ids) if isinstance(fec_ids, list) else fec_ids
+
+    chamber = latest_term.get("type")
+    chamber_normalized = {"rep": "House", "sen": "Senate"}.get(chamber, chamber)
+
+    return {
+        "thomas_id": ids.get("thomas"),
+        "lis_id": ids.get("lis"),
+        "govtrack_id": ids.get("govtrack"),
+        "opensecrets_id": ids.get("opensecrets"),
+        "fec_ids": fec_ids_joined,
+        "first_name": name.get("first"),
+        "last_name": name.get("last"),
+        "official_full_name": name.get("official_full"),
+        "nickname": name.get("nickname"),
+        "birthday": bio.get("birthday"),
+        "gender": bio.get("gender"),
+        "current_party": latest_term.get("party"),
+        "current_state": latest_term.get("state"),
+        "current_district": latest_term.get("district"),
+        "current_chamber": chamber_normalized,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Social Media — loaded from legislators-social-media.yaml
+# ---------------------------------------------------------------------------
+
+SOCIAL_MEDIA_PLATFORMS = {
+    "twitter": "https://twitter.com/{account}",
+    "facebook": "https://facebook.com/{account}",
+    "youtube": "https://youtube.com/{account}",
+    "instagram": "https://instagram.com/{account}",
+    "mastodon": None,
+}
+
+
+def ingest_social_media(engine: Engine, legislators_dir: Path) -> None:
+    """Load social media accounts from legislators-social-media.yaml."""
+    social_media_path = legislators_dir / "legislators-social-media.yaml"
+    if not social_media_path.exists():
+        logger.warning("Social media file not found: %s", social_media_path)
+        return
+
+    with social_media_path.open() as file:
+        social_media_data = yaml.safe_load(file)
+
+    if not isinstance(social_media_data, list):
+        logger.warning("Unexpected format in %s", social_media_path)
+        return
+
+    logger.info(
+        "Loaded %d entries from legislators-social-media.yaml", len(social_media_data)
+    )
+
+    with Session(engine) as session:
+        legislator_map = _build_legislator_map(session)
+        existing_accounts = {
+            (account.legislator_id, account.platform)
+            for account in session.scalars(select(LegislatorSocialMedia)).all()
+        }
+        logger.info(
+            "Found %d existing social media accounts in DB", len(existing_accounts)
+        )
+
+        total_inserted = 0
+        total_updated = 0
+        for entry in social_media_data:
+            bioguide_id = entry.get("id", {}).get("bioguide")
+            if not bioguide_id:
+                continue
+
+            legislator_id = legislator_map.get(bioguide_id)
+            if legislator_id is None:
+                continue
+
+            social = entry.get("social", {})
+            for platform, url_template in SOCIAL_MEDIA_PLATFORMS.items():
+                account_name = social.get(platform)
+                if not account_name:
+                    continue
+
+                url = (
+                    url_template.format(account=account_name) if url_template else None
+                )
+
+                if (legislator_id, platform) in existing_accounts:
+                    total_updated += 1
+                else:
+                    session.add(
+                        LegislatorSocialMedia(
+                            legislator_id=legislator_id,
+                            platform=platform,
+                            account_name=str(account_name),
+                            url=url,
+                            source="https://github.com/unitedstates/congress-legislators",
+                        )
+                    )
+                    existing_accounts.add((legislator_id, platform))
+                    total_inserted += 1
+
+        session.commit()
+    logger.info(
+        "Inserted %d new social media accounts, updated %d existing",
+        total_inserted,
+        total_updated,
+    )
+
+
+def _iter_voters(position_group: object) -> Iterator[dict]:
+    """Yield voter dicts from a vote position group (handles list, single dict, or string)."""
+    if isinstance(position_group, dict):
+        yield position_group
+    elif isinstance(position_group, list):
+        for voter in position_group:
+            if isinstance(voter, dict):
+                yield voter
+
+
+# ---------------------------------------------------------------------------
+# Bills
+# ---------------------------------------------------------------------------
+
+
+def ingest_bills(engine: Engine, congress_dirs: list[Path]) -> None:
+    """Load bill data.json files."""
+    with Session(engine) as session:
+        existing_bills = {
+            (bill.congress, bill.bill_type, bill.number)
+            for bill in session.scalars(select(Bill)).all()
+        }
+        logger.info("Found %d existing bills in DB", len(existing_bills))
+
+        total_inserted = 0
+        batch: list[Bill] = []
+        for congress_dir in congress_dirs:
+            bills_dir = congress_dir / "bills"
+            if not bills_dir.is_dir():
+                continue
+            bill_files = sorted(bills_dir.rglob("data.json"))
+            logger.info(
+                "Scanning %d bill files from %s",
+                len(bill_files),
+                congress_dir.name,
+            )
+            for chunk in _chunked(bill_files, PARALLEL_FILE_CHUNK_SIZE):
+                results = parallelize_thread(
+                    _load_json_file,
+                    [{"path": bill_file} for bill_file in chunk],
+                    progress_tracker=PARALLEL_PROGRESS_TRACKER,
+                )
+                for loaded in results.results:
+                    if loaded.data is None:
+                        continue
+                    bill = _parse_bill(loaded.data, existing_bills)
+                    if bill is not None:
+                        batch.append(bill)
+                        if len(batch) >= BATCH_SIZE:
+                            total_inserted += _flush_batch(session, batch, "bills")
+
+        total_inserted += _flush_batch(session, batch, "bills")
+    logger.info("Inserted %d new bills total", total_inserted)
+
+
+def _parse_bill(data: dict, existing_bills: set[tuple[int, str, int]]) -> Bill | None:
+    """Parse a bill data.json dict into a Bill ORM object, skipping existing."""
+    raw_congress = data.get("congress")
+    bill_type = data.get("bill_type")
+    raw_number = data.get("number")
+    if raw_congress is None or bill_type is None or raw_number is None:
+        return None
+    congress = int(raw_congress)
+    number = int(raw_number)
+    if (congress, bill_type, number) in existing_bills:
+        return None
+
+    sponsor_bioguide = None
+    sponsor = data.get("sponsor")
+    if sponsor:
+        sponsor_bioguide = sponsor.get("bioguide_id")
+
+    return Bill(
+        congress=congress,
+        bill_type=bill_type,
+        number=number,
+        title=data.get("short_title") or data.get("official_title"),
+        title_short=data.get("short_title"),
+        official_title=data.get("official_title"),
+        status=data.get("status"),
+        status_at=data.get("status_at"),
+        sponsor_bioguide_id=sponsor_bioguide,
+        subjects_top_term=data.get("subjects_top_term"),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Votes (and vote records)
+# ---------------------------------------------------------------------------
+
+
+def ingest_votes(
+    engine: Engine,
+    congress_dirs: list[Path],
+    ingest_run_id: int | None,
+) -> None:
+    """Load raw vote data and member positions without any guessed text linkage."""
+    legislator_map = _build_legislator_map_for_engine(engine)
+    logger.info("Loaded %d legislators into lookup map", len(legislator_map))
+    with Session(engine) as session:
+        existing_votes = {
+            (vote.congress, vote.chamber, vote.session_number, vote.roll_number)
+            for vote in session.scalars(select(Vote)).all()
+        }
+        logger.info("Found %d existing votes in DB", len(existing_votes))
+
+        total_inserted = 0
+        batch: list[Vote] = []
+        for congress_dir in congress_dirs:
+            votes_dir = congress_dir / "votes"
+            if not votes_dir.is_dir():
+                continue
+            vote_files = sorted(votes_dir.rglob("data.json"))
+            logger.info(
+                "Scanning %d vote files from %s",
+                len(vote_files),
+                congress_dir.name,
+            )
+            for chunk in _chunked(vote_files, PARALLEL_FILE_CHUNK_SIZE):
+                results = parallelize_thread(
+                    _load_json_file,
+                    [{"path": vote_file} for vote_file in chunk],
+                    progress_tracker=PARALLEL_PROGRESS_TRACKER,
+                )
+                for loaded in results.results:
+                    if loaded.data is None:
+                        continue
+                    chamber = normalize_vote_chamber(loaded.data.get("chamber"))
+                    if chamber is None:
+                        continue
+                    artifact = register_source_artifact(
+                        session,
+                        path=loaded.path,
+                        source_kind="vote_json",
+                        congress=int(loaded.data.get("congress", congress_dir.name)),
+                        chamber=chamber,
+                        ingest_run_id=ingest_run_id,
+                        source_url=parse_vote_source_url(loaded.data),
+                    )
+                    vote = _parse_vote(
+                        loaded.data,
+                        legislator_map,
+                        existing_votes,
+                        raw_vote_source_artifact_id=artifact.id,
+                    )
+                    if vote is not None:
+                        batch.append(vote)
+                        if len(batch) >= BATCH_SIZE:
+                            total_inserted += _flush_batch(session, batch, "votes")
+
+        total_inserted += _flush_batch(session, batch, "votes")
+    logger.info("Inserted %d new votes total", total_inserted)
+
+
+def _build_legislator_map(session: Session) -> dict[str, int]:
+    """Build a mapping of bioguide_id -> legislator.id."""
+    return {
+        legislator.bioguide_id: legislator.id
+        for legislator in session.scalars(select(Legislator)).all()
+    }
+
+
+def _build_bill_map(session: Session) -> dict[tuple[int, str, int], int]:
+    """Build a mapping of (congress, bill_type, number) -> bill.id."""
+    return {
+        (bill.congress, bill.bill_type, bill.number): bill.id
+        for bill in session.scalars(select(Bill)).all()
+    }
+
+
+def _build_legislator_map_for_engine(engine: Engine) -> dict[str, int]:
+    """Build the legislator lookup in a short-lived read session."""
+    with Session(engine) as session:
+        return _build_legislator_map(session)
+
+
+def _build_bill_map_for_engine(engine: Engine) -> dict[tuple[int, str, int], int]:
+    """Build the bill lookup in a short-lived read session."""
+    with Session(engine) as session:
+        return _build_bill_map(session)
+
+
+def _parse_vote(
+    data: dict,
+    legislator_map: dict[str, int],
+    existing_votes: set[tuple[int, str, int, int]],
+    *,
+    raw_vote_source_artifact_id: int | None,
+) -> Vote | None:
+    """Parse a vote data.json dict into a Vote ORM object with records."""
+    raw_congress = data.get("congress")
+    chamber = data.get("chamber")
+    raw_number = data.get("number")
+    vote_date = data.get("date")
+    if (
+        raw_congress is None
+        or chamber is None
+        or raw_number is None
+        or vote_date is None
+    ):
+        return None
+
+    raw_session = data.get("session")
+    congress = int(raw_congress)
+    number = int(raw_number)
+    parsed_vote_date = _coerce_iso_date(vote_date)
+    session_year = parsed_vote_date.year
+    if raw_session is None:
+        session_number = derive_session_number(congress, session_year)
+    else:
+        session_number = int(raw_session)
+
+    # Normalize chamber from "h"/"s" to "House"/"Senate"
+    chamber_normalized = normalize_vote_chamber(chamber)
+    if chamber_normalized is None:
+        return None
+
+    if (congress, chamber_normalized, session_number, number) in existing_votes:
+        return None
+
+    raw_votes = data.get("votes", {})
+    vote_counts = _count_votes(raw_votes)
+    vote_records = _build_vote_records(raw_votes, legislator_map)
+
+    return Vote(
+        congress=congress,
+        chamber=chamber_normalized,
+        session_year=session_year,
+        session_number=session_number,
+        roll_number=number,
+        vote_type=data.get("type"),
+        question=data.get("question"),
+        result=data.get("result"),
+        result_text=data.get("result_text"),
+        vote_date=parsed_vote_date,
+        vote_datetime=parsed_vote_datetime(data),
+        raw_vote_source_url=parse_vote_source_url(data),
+        raw_bill_ref=coerce_raw_ref(data.get("bill")),
+        raw_amendment_ref=coerce_raw_ref(data.get("amendment")),
+        raw_nomination_ref=coerce_raw_ref(data.get("nomination")),
+        raw_treaty_ref=coerce_raw_ref(data.get("treaty")),
+        raw_vote_source_artifact_id=raw_vote_source_artifact_id,
+        vote_records=vote_records,
+        **vote_counts,
+    )
+
+
+def _count_votes(raw_votes: dict) -> dict[str, int]:
+    """Count voters per position category, correctly handling dict and list formats."""
+    yea_count = 0
+    nay_count = 0
+    not_voting_count = 0
+    present_count = 0
+
+    for position, position_group in raw_votes.items():
+        voter_count = sum(1 for _ in _iter_voters(position_group))
+        if position in ("Yea", "Aye"):
+            yea_count += voter_count
+        elif position in ("Nay", "No"):
+            nay_count += voter_count
+        elif position == "Not Voting":
+            not_voting_count += voter_count
+        elif position == "Present":
+            present_count += voter_count
+
+    return {
+        "yea_count": yea_count,
+        "nay_count": nay_count,
+        "not_voting_count": not_voting_count,
+        "present_count": present_count,
+    }
+
+
+def _build_vote_records(
+    raw_votes: dict, legislator_map: dict[str, int]
+) -> list[VoteRecord]:
+    """Build VoteRecord objects from raw vote data."""
+    records: list[VoteRecord] = []
+    for position, position_group in raw_votes.items():
+        for voter in _iter_voters(position_group):
+            bioguide_id = voter.get("id")
+            if not bioguide_id:
+                continue
+            legislator_id = legislator_map.get(bioguide_id)
+            if legislator_id is None:
+                continue
+            records.append(
+                VoteRecord(
+                    legislator_id=legislator_id,
+                    position=position,
+                )
+            )
+    return records
+
+
+def normalize_vote_chamber(raw_chamber: str | None) -> str | None:
+    """Normalize vote JSON chamber codes."""
+    if raw_chamber is None:
+        return None
+    value = raw_chamber.strip().lower()
+    return {"h": "House", "house": "House", "s": "Senate", "senate": "Senate"}.get(
+        value,
+        raw_chamber,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Bill Text
+# ---------------------------------------------------------------------------
+
+
+def ingest_bill_text(
+    engine: Engine,
+    congress_dirs: list[Path],
+    ingest_run_id: int | None,
+) -> None:
+    """Load bill text from text-versions directories with official metadata when available."""
+    bill_map = _build_bill_map_for_engine(engine)
+    with Session(engine) as session:
+        logger.info("Loaded %d bills into lookup map", len(bill_map))
+        billstatus_text_index = build_billstatus_text_version_index(congress_dirs)
+        logger.info(
+            "Loaded bill status text metadata for %d bills",
+            len(billstatus_text_index),
+        )
+        existing_bill_texts = {
+            (bill_text.bill_id, bill_text.version_code)
+            for bill_text in session.scalars(select(BillText)).all()
+        }
+        logger.info(
+            "Found %d existing bill text versions in DB", len(existing_bill_texts)
+        )
+
+        total_inserted = 0
+        batch: list[BillText] = []
+        for congress_dir in congress_dirs:
+            logger.info("Scanning bill texts from %s", congress_dir.name)
+            for bill_text in _iter_bill_texts(
+                session,
+                congress_dir,
+                bill_map,
+                existing_bill_texts,
+                billstatus_text_index,
+                ingest_run_id,
+            ):
+                batch.append(bill_text)
+                if len(batch) >= BATCH_SIZE:
+                    total_inserted += _flush_batch(session, batch, "bill texts")
+
+        total_inserted += _flush_batch(session, batch, "bill texts")
+    logger.info("Inserted %d new bill text versions total", total_inserted)
+
+
+def _iter_bill_texts(
+    session: Session,
+    congress_dir: Path,
+    bill_map: dict[tuple[int, str, int], int],
+    existing_bill_texts: set[tuple[int, str]],
+    billstatus_text_index: dict[tuple[int, str, int], dict[str, object]],
+    ingest_run_id: int | None,
+) -> Iterator[BillText]:
+    """Yield BillText objects for a single congress directory, skipping existing."""
+    bills_dir = congress_dir / "bills"
+    if not bills_dir.is_dir():
+        return
+
+    tasks: list[dict[str, object]] = []
+    for bill_dir in bills_dir.rglob("text-versions"):
+        if not bill_dir.is_dir():
+            continue
+        bill_key = _bill_key_from_dir(bill_dir.parent, congress_dir)
+        if bill_key is None:
+            continue
+        bill_id = bill_map.get(bill_key)
+        if bill_id is None:
+            continue
+
+        for version_dir in sorted(bill_dir.iterdir()):
+            if not version_dir.is_dir():
+                continue
+            if (bill_id, version_dir.name) in existing_bill_texts:
+                continue
+            tasks.append(
+                {
+                    "bill_id": bill_id,
+                    "bill_key": bill_key,
+                    "version_dir": version_dir,
+                    "billstatus_version": billstatus_text_index.get(bill_key, {}).get(
+                        version_dir.name.lower()
+                    ),
+                }
+            )
+
+    for chunk in _chunked(tasks, PARALLEL_FILE_CHUNK_SIZE):
+        results = parallelize_thread(
+            _prepare_bill_text_input,
+            list(chunk),
+            progress_tracker=PARALLEL_PROGRESS_TRACKER,
+        )
+        for prepared in results.results:
+            source_artifact_id = None
+            if prepared.source_file is not None:
+                artifact = register_source_artifact(
+                    session,
+                    path=prepared.source_file,
+                    source_kind="bill_text_artifact",
+                    congress=prepared.bill_key[0],
+                    chamber=None,
+                    ingest_run_id=ingest_run_id,
+                    source_url=None,
+                )
+                source_artifact_id = artifact.id
+            metadata = _merge_bill_text_metadata(
+                version_code=prepared.version_code,
+                version_data=prepared.version_data,
+                billstatus_version=prepared.billstatus_version,
+            )
+            yield BillText(
+                bill_id=prepared.bill_id,
+                version_code=prepared.version_code,
+                version_name=metadata["version_name"],
+                date=metadata["date"],
+                text_content=prepared.text_content,
+                source_datetime_raw=metadata["source_datetime_raw"],
+                text_url_xml=metadata["text_url_xml"],
+                text_url_pdf=metadata["text_url_pdf"],
+                text_url_html=metadata["text_url_html"],
+                source_artifact_id=source_artifact_id,
+            )
+
+
+def _bill_key_from_dir(
+    bill_dir: Path, congress_dir: Path
+) -> tuple[int, str, int] | None:
+    """Extract (congress, bill_type, number) from directory structure."""
+    congress = int(congress_dir.name)
+    bill_type = bill_dir.parent.name
+    name = bill_dir.name
+    # Directory name is like "hr3590" — strip the type prefix to get the number
+    number_str = name[len(bill_type) :]
+    if not number_str.isdigit():
+        return None
+    return (congress, bill_type, int(number_str))
+
+
+def _read_bill_text(version_dir: Path) -> str | None:
+    """Read bill text from a version directory, preferring .txt over .xml."""
+    for extension in ("txt", "htm", "html", "xml"):
+        candidates = list(version_dir.glob(f"document.{extension}"))
+        if not candidates:
+            candidates = list(version_dir.glob(f"*.{extension}"))
+        if candidates:
+            try:
+                return candidates[0].read_text(encoding="utf-8")
+            except Exception:
+                logger.exception("Failed to read %s", candidates[0])
+    return None
+
+
+def _find_text_source_file(version_dir: Path) -> Path | None:
+    """Locate one representative local file for the bill text artifact manifest."""
+    for extension in ("txt", "htm", "html", "xml"):
+        candidates = list(version_dir.glob(f"document.{extension}"))
+        if not candidates:
+            candidates = list(version_dir.glob(f"*.{extension}"))
+        if candidates:
+            return candidates[0]
+    return None
+
+
+def _merge_bill_text_metadata(
+    *,
+    version_code: str,
+    version_data: dict | None,
+    billstatus_version: object | None,
+) -> dict[str, object | None]:
+    """Merge unitedstates/congress text-version metadata with official billstatus metadata."""
+    version_name = None
+    issued_on = None
+    source_datetime_raw = None
+    text_url_xml = None
+    text_url_pdf = None
+    text_url_html = None
+
+    if version_data:
+        version_name = version_data.get("version_name")
+        issued_on = parse_date_like(version_data.get("issued_on"))
+        urls = version_data.get("urls")
+        if isinstance(urls, dict):
+            text_url_xml = urls.get("xml") or urls.get("formatted_xml")
+            text_url_pdf = urls.get("pdf")
+            text_url_html = urls.get("html") or urls.get("formatted_text")
+
+    if billstatus_version is not None:
+        version_name = getattr(billstatus_version, "version_name", None) or version_name
+        issued_on = getattr(billstatus_version, "version_date", None) or issued_on
+        source_datetime_raw = (
+            getattr(billstatus_version, "source_datetime_raw", None) or source_datetime_raw
+        )
+        text_url_xml = getattr(billstatus_version, "text_url_xml", None) or text_url_xml
+        text_url_pdf = getattr(billstatus_version, "text_url_pdf", None) or text_url_pdf
+        text_url_html = getattr(billstatus_version, "text_url_html", None) or text_url_html
+
+    return {
+        "version_code": version_code,
+        "version_name": version_name,
+        "date": issued_on,
+        "source_datetime_raw": source_datetime_raw,
+        "text_url_xml": text_url_xml,
+        "text_url_pdf": text_url_pdf,
+        "text_url_html": text_url_html,
+    }
+
+
+def _coerce_iso_date(value: str | date) -> date:
+    """Normalize YYYY-MM-DD strings from the source data into date objects."""
+    if isinstance(value, date):
+        return value
+    return date.fromisoformat(value[:10])
+
+
+def ingest_bill_status_context_step(
+    engine: Engine,
+    congress_dirs: list[Path],
+    ingest_run_id: int | None,
+) -> None:
+    """Rebuild official bill/amendment context from offline artifacts."""
+    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
+    if not supported_congress_dirs:
+        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
+        return
+    require_billstatus_artifacts(supported_congress_dirs)
+    bill_map = _build_bill_map_for_engine(engine)
+    with Session(engine) as session:
+        rebuild_bill_status_context(
+            session,
+            congress_dirs=supported_congress_dirs,
+            bill_map=bill_map,
+            ingest_run_id=ingest_run_id,
+        )
+
+
+def vote_context_match_step(engine: Engine, congress_dirs: list[Path]) -> None:
+    """Persist canonical vote->action matches from recorded vote tuples."""
+    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
+    if not supported_congress_dirs:
+        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
+        return
+    require_billstatus_artifacts(supported_congress_dirs)
+    congress_numbers = [int(path.name) for path in supported_congress_dirs]
+    with Session(engine) as session:
+        build_vote_action_matches(session, congress_numbers=congress_numbers)
+
+
+def vote_context_classify_step(engine: Engine, congress_dirs: list[Path]) -> None:
+    """Classify votes and measure relationships after action matching."""
+    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
+    if not supported_congress_dirs:
+        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
+        return
+    require_billstatus_artifacts(supported_congress_dirs)
+    congress_numbers = [int(path.name) for path in supported_congress_dirs]
+    bill_map = _build_bill_map_for_engine(engine)
+    with Session(engine) as session:
+        classify_votes(
+            session,
+            congress_numbers=congress_numbers,
+            bill_map=bill_map,
+        )
+
+
+def vote_text_resolve_step(engine: Engine, congress_dirs: list[Path]) -> None:
+    """Resolve official text targets for direct legislative text votes."""
+    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
+    if not supported_congress_dirs:
+        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
+        return
+    require_billstatus_artifacts(supported_congress_dirs)
+    congress_numbers = [int(path.name) for path in supported_congress_dirs]
+    with Session(engine) as session:
+        resolve_vote_text_targets(session, congress_numbers=congress_numbers)
+
+
+def vote_polarity_resolve_step(engine: Engine, congress_dirs: list[Path]) -> None:
+    """Resolve position polarity/effect metadata for all classified votes."""
+    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
+    if not supported_congress_dirs:
+        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
+        return
+    require_billstatus_artifacts(supported_congress_dirs)
+    congress_numbers = [int(path.name) for path in supported_congress_dirs]
+    with Session(engine) as session:
+        resolve_vote_position_meanings(session, congress_numbers=congress_numbers)
+
+
+def vote_context_diagnostics_step(engine: Engine, congress_dirs: list[Path]) -> None:
+    """Log aggregate vote-context coverage and unresolved audit rows."""
+    supported_congress_dirs = filter_context_supported_congress_dirs(congress_dirs)
+    if not supported_congress_dirs:
+        logger.info("No congress directories support offline BILLSTATUS context; skipping.")
+        return
+    congress_numbers = [int(path.name) for path in supported_congress_dirs]
+    with Session(engine) as session:
+        classification_counts = session.execute(
+            select(
+                VoteClassification.subject_type,
+                VoteClassification.vote_relationship,
+                func.count(VoteClassification.vote_id),
+            )
+            .join(Vote, Vote.id == VoteClassification.vote_id)
+            .where(Vote.congress.in_(congress_numbers))
+            .group_by(
+                VoteClassification.subject_type,
+                VoteClassification.vote_relationship,
+            )
+            .order_by(
+                VoteClassification.subject_type,
+                VoteClassification.vote_relationship,
+            )
+        ).all()
+        match_counts = session.execute(
+            select(VoteActionMatch.match_method, func.count(VoteActionMatch.id))
+            .join(Vote, Vote.id == VoteActionMatch.vote_id)
+            .where(Vote.congress.in_(congress_numbers), VoteActionMatch.is_selected.is_(True))
+            .group_by(VoteActionMatch.match_method)
+            .order_by(VoteActionMatch.match_method)
+        ).all()
+        unresolved_audits = session.scalar(
+            select(func.count(VoteContextAudit.id))
+            .join(Vote, Vote.id == VoteContextAudit.vote_id)
+            .where(Vote.congress.in_(congress_numbers), VoteContextAudit.severity.in_(("warning", "error")))
+        )
+
+    for subject_type, vote_relationship, count in classification_counts:
+        logger.info(
+            "vote-context subject=%s relationship=%s count=%d",
+            subject_type,
+            vote_relationship,
+            count,
+        )
+    for match_method, count in match_counts:
+        logger.info("vote-context selected_match method=%s count=%d", match_method, count)
+    logger.info("vote-context unresolved audit rows=%d", unresolved_audits or 0)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _read_json(path: Path) -> dict | None:
+    """Read and parse a JSON file, returning None on failure."""
+    try:
+        return orjson.loads(path.read_bytes())
+    except FileNotFoundError:
+        return None
+    except Exception:
+        logger.exception("Failed to parse %s", path)
+        return None
+
+
+if __name__ == "__main__":
+    app()
diff --git a/pipelines/jobs/ingest_posts.py b/pipelines/jobs/ingest_posts.py
new file mode 100644
index 0000000..fc3a602
--- /dev/null
+++ b/pipelines/jobs/ingest_posts.py
@@ -0,0 +1,281 @@
+"""Ingestion pipeline for loading JSONL post files into the weekly-partitioned posts table.
+
+Usage:
+    ingest-posts /path/to/files/
+    ingest-posts /path/to/single_file.jsonl
+    ingest-posts /data/dir/ --workers 4 --batch-size 5000
+"""
+
+from __future__ import annotations
+
+import logging
+from datetime import UTC, datetime
+from pathlib import Path  # noqa: TC003 this is needed for typer
+from typing import TYPE_CHECKING, Annotated
+
+import orjson
+import psycopg
+import typer
+
+from pipelines.pipelines.common import configure_logger
+from pipelines.orm.common import get_connection_info
+from pipelines.pipelines.parallelize import parallelize_process
+
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+logger = logging.getLogger(__name__)
+
+
+app = typer.Typer(help="Ingest JSONL post files into the partitioned posts table.")
+
+
+@app.command()
+def main(
+    path: Annotated[
+        Path,
+        typer.Argument(help="Directory containing JSONL files, or a single JSONL file"),
+    ],
+    batch_size: Annotated[int, typer.Option(help="Rows per INSERT batch")] = 10000,
+    workers: Annotated[
+        int, typer.Option(help="Parallel workers for multi-file ingestion")
+    ] = 4,
+    pattern: Annotated[
+        str, typer.Option(help="Glob pattern for JSONL files")
+    ] = "*.jsonl",
+) -> None:
+    """Ingest JSONL post files into the weekly-partitioned posts table."""
+    configure_logger(level="INFO")
+
+    logger.info("starting ingest-posts")
+    logger.info(
+        "path=%s batch_size=%d workers=%d pattern=%s",
+        path,
+        batch_size,
+        workers,
+        pattern,
+    )
+    if path.is_file():
+        ingest_file(path, batch_size=batch_size)
+    elif path.is_dir():
+        ingest_directory(
+            path, batch_size=batch_size, max_workers=workers, pattern=pattern
+        )
+    else:
+        typer.echo(f"Path does not exist: {path}", err=True)
+        raise typer.Exit(code=1)
+
+    logger.info("ingest-posts done")
+
+
+def ingest_directory(
+    directory: Path,
+    *,
+    batch_size: int,
+    max_workers: int,
+    pattern: str = "*.jsonl",
+) -> None:
+    """Ingest all JSONL files in a directory using parallel workers."""
+    files = sorted(directory.glob(pattern))
+    if not files:
+        logger.warning("No JSONL files found in %s", directory)
+        return
+
+    logger.info("Found %d JSONL files to ingest", len(files))
+
+    kwargs_list = [{"path": fp, "batch_size": batch_size} for fp in files]
+    parallelize_process(ingest_file, kwargs_list, max_workers=max_workers)
+
+
+SCHEMA = "main"
+
+COLUMNS = (
+    "post_id",
+    "user_id",
+    "instance",
+    "date",
+    "text",
+    "langs",
+    "like_count",
+    "reply_count",
+    "repost_count",
+    "reply_to",
+    "replied_author",
+    "thread_root",
+    "thread_root_author",
+    "repost_from",
+    "reposted_author",
+    "quotes",
+    "quoted_author",
+    "labels",
+    "sent_label",
+    "sent_score",
+)
+
+INSERT_FROM_STAGING = f"""
+    INSERT INTO {SCHEMA}.posts ({", ".join(COLUMNS)})
+    SELECT {", ".join(COLUMNS)} FROM pg_temp.staging
+    ON CONFLICT (post_id, date) DO NOTHING
+"""  # noqa: S608
+
+FAILED_INSERT = f"""
+    INSERT INTO {SCHEMA}.failed_ingestion (raw_line, error)
+    VALUES (%(raw_line)s, %(error)s)
+"""  # noqa: S608
+
+
+def get_psycopg_connection() -> psycopg.Connection:
+    """Create a raw psycopg3 connection from environment variables."""
+    database, host, port, username, password = get_connection_info("DATA_SCIENCE_DEV")
+    return psycopg.connect(
+        dbname=database,
+        host=host,
+        port=int(port),
+        user=username,
+        password=password,
+        autocommit=False,
+    )
+
+
+def ingest_file(path: Path, *, batch_size: int) -> None:
+    """Ingest a single JSONL file into the posts table."""
+    log_trigger = max(100_000 // batch_size, 1)
+    failed_lines: list[dict] = []
+    try:
+        with get_psycopg_connection() as connection:
+            for index, batch in enumerate(
+                read_jsonl_batches(path, batch_size, failed_lines), 1
+            ):
+                ingest_batch(connection, batch)
+                if index % log_trigger == 0:
+                    logger.info(
+                        "Ingested %d batches (%d rows) from %s",
+                        index,
+                        index * batch_size,
+                        path,
+                    )
+
+            if failed_lines:
+                logger.warning(
+                    "Recording %d malformed lines from %s", len(failed_lines), path.name
+                )
+                with connection.cursor() as cursor:
+                    cursor.executemany(FAILED_INSERT, failed_lines)
+                connection.commit()
+    except Exception:
+        logger.exception("Failed to ingest file: %s", path)
+        raise
+
+
+def ingest_batch(connection: psycopg.Connection, batch: list[dict]) -> None:
+    """COPY batch into a temp staging table, then INSERT ... ON CONFLICT into posts."""
+    if not batch:
+        return
+
+    try:
+        with connection.cursor() as cursor:
+            cursor.execute(f"""
+                CREATE TEMP TABLE IF NOT EXISTS staging
+                (LIKE {SCHEMA}.posts INCLUDING DEFAULTS)
+                ON COMMIT DELETE ROWS
+            """)
+            cursor.execute("TRUNCATE pg_temp.staging")
+
+            with cursor.copy(
+                f"COPY pg_temp.staging ({', '.join(COLUMNS)}) FROM STDIN"
+            ) as copy:
+                for row in batch:
+                    copy.write_row(tuple(row.get(column) for column in COLUMNS))
+
+            cursor.execute(INSERT_FROM_STAGING)
+        connection.commit()
+    except Exception as error:
+        connection.rollback()
+
+        if len(batch) == 1:
+            logger.exception("Skipping bad row post_id=%s", batch[0].get("post_id"))
+            with connection.cursor() as cursor:
+                cursor.execute(
+                    FAILED_INSERT,
+                    {
+                        "raw_line": orjson.dumps(batch[0], default=str).decode(),
+                        "error": str(error),
+                    },
+                )
+            connection.commit()
+            return
+
+        midpoint = len(batch) // 2
+        ingest_batch(connection, batch[:midpoint])
+        ingest_batch(connection, batch[midpoint:])
+
+
+def read_jsonl_batches(
+    file_path: Path, batch_size: int, failed_lines: list[dict]
+) -> Iterator[list[dict]]:
+    """Stream a JSONL file and yield batches of transformed rows."""
+    batch: list[dict] = []
+    with file_path.open("r", encoding="utf-8") as handle:
+        for raw_line in handle:
+            line = raw_line.strip()
+            if not line:
+                continue
+            batch.extend(parse_line(line, file_path, failed_lines))
+            if len(batch) >= batch_size:
+                yield batch
+                batch = []
+    if batch:
+        yield batch
+
+
+def parse_line(line: str, file_path: Path, failed_lines: list[dict]) -> Iterator[dict]:
+    """Parse a JSONL line, handling concatenated JSON objects."""
+    try:
+        yield transform_row(orjson.loads(line))
+    except orjson.JSONDecodeError:
+        if "}{" not in line:
+            logger.warning(
+                "Skipping malformed line in %s: %s", file_path.name, line[:120]
+            )
+            failed_lines.append({"raw_line": line, "error": "malformed JSON"})
+            return
+        fragments = line.replace("}{", "}\n{").split("\n")
+        for fragment in fragments:
+            try:
+                yield transform_row(orjson.loads(fragment))
+            except (orjson.JSONDecodeError, KeyError, ValueError) as error:
+                logger.warning(
+                    "Skipping malformed fragment in %s: %s",
+                    file_path.name,
+                    fragment[:120],
+                )
+                failed_lines.append({"raw_line": fragment, "error": str(error)})
+    except Exception as error:
+        logger.exception("Skipping bad row in %s: %s", file_path.name, line[:120])
+        failed_lines.append({"raw_line": line, "error": str(error)})
+
+
+def transform_row(raw: dict) -> dict:
+    """Transform a raw JSONL row into a dict matching the Posts table columns."""
+    raw["date"] = parse_date(raw["date"])
+    if raw.get("langs") is not None:
+        raw["langs"] = orjson.dumps(raw["langs"])
+    if raw.get("text") is not None:
+        raw["text"] = raw["text"].replace("\x00", "")
+    return raw
+
+
+def parse_date(raw_date: int) -> datetime:
+    """Parse compact YYYYMMDDHHmm integer into a naive datetime (input is UTC by spec)."""
+    return datetime(
+        raw_date // 100000000,
+        (raw_date // 1000000) % 100,
+        (raw_date // 10000) % 100,
+        (raw_date // 100) % 100,
+        raw_date % 100,
+        tzinfo=UTC,
+    )
+
+
+if __name__ == "__main__":
+    app()
-- 
2.54.0


From d4c587362d061b210255b70b649e99a93a8f151e Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Tue, 28 Apr 2026 23:01:54 -0400
Subject: [PATCH 2/4] remoed old prompts

---
 pipelines/tools/summarization_prompts.py | 34 ------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 pipelines/tools/summarization_prompts.py

diff --git a/pipelines/tools/summarization_prompts.py b/pipelines/tools/summarization_prompts.py
deleted file mode 100644
index bfdd5a5..0000000
--- a/pipelines/tools/summarization_prompts.py
+++ /dev/null
@@ -1,34 +0,0 @@
-SUMMARIZATION_SYSTEM_PROMPT = """You are a legislative analyst extracting policy substance from Congressional bill text.
-
-Your job is to compress a bill into a dense, neutral structured summary that captures every distinct policy action — including secondary effects that might be buried in subsections.
-
-EXTRACTION RULES:
-- IGNORE: whereas clauses, congressional findings that are purely political statements, recitals, preambles, citations of existing law by number alone, and procedural boilerplate.
-- FOCUS ON: operative verbs — what the bill SHALL do, PROHIBIT, REQUIRE, AUTHORIZE, AMEND, APPROPRIATE, or ESTABLISH.
-- SURFACE ALL THREADS: If the bill touches multiple policy areas, list each thread separately. Do not collapse them.
-- BE CONCRETE: Name the affected population, the mechanism, and the direction (expands/restricts/maintains).
-- STAY NEUTRAL: No political framing. Describe what the text does, not what its sponsors claim it does.
-
-OUTPUT FORMAT — plain structured text, not JSON:
-
-OPERATIVE ACTIONS:
-[Numbered list of what the bill actually does, one action per line, max 20 words each]
-
-AFFECTED POPULATIONS:
-[Who gains something, who loses something, or whose behavior is regulated]
-
-MECHANISMS:
-[How it works: new funding, mandate, prohibition, amendment to existing statute, grant program, study commission, etc.]
-
-POLICY THREADS:
-[List each distinct policy domain this bill touches, even minor ones. Use plain language, not domain codes.]
-
-SYMBOLIC/PROCEDURAL ONLY:
-[Yes or No — is this bill primarily a resolution, designation, or awareness declaration with no operative effect?]
-
-LENGTH TARGET: 150-250 words total. Be ruthless about cutting. Density over completeness."""
-
-SUMMARIZATION_USER_TEMPLATE = """Summarize the following Congressional bill according to your instructions.
-
-BILL TEXT:
-{text_content}"""
-- 
2.54.0


From 28993213aff252f46af9bd9eedfbad4a6d8246d9 Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Tue, 28 Apr 2026 23:02:18 -0400
Subject: [PATCH 3/4] fixed pyproject.toml

---
 pyprject.toml  |  0
 pyproject.toml | 22 ++++++++++++++++++++++
 2 files changed, 22 insertions(+)
 delete mode 100644 pyprject.toml
 create mode 100644 pyproject.toml

diff --git a/pyprject.toml b/pyprject.toml
deleted file mode 100644
index e69de29..0000000
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..4bfb941
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,22 @@
+[project]
+name = "ds-testing-pipelines"
+version = "0.1.0"
+description = "Data science pipeline tools and legislative dashboard."
+requires-python = ">=3.12"
+dependencies = [
+    "fastapi",
+    "httpx",
+    "uvicorn[standard]",
+    "jinja2",
+    "sqlalchemy",
+    "psycopg",
+]
+
+[project.optional-dependencies]
+test = [
+    "pytest",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["."]
-- 
2.54.0


From 21448eb515bd1bd08898537eed47fdc47bbfa990 Mon Sep 17 00:00:00 2001
From: Richie Cahill <Richie@tmmworkshop.com>
Date: Tue, 28 Apr 2026 23:02:31 -0400
Subject: [PATCH 4/4] updated __init__.py

---
 pipelines/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipelines/__init__.py b/pipelines/__init__.py
index dc58a44..14e8999 100644
--- a/pipelines/__init__.py
+++ b/pipelines/__init__.py
@@ -1 +1 @@
-"""Prompt benchmarking system for evaluating LLMs via vLLM."""
+"""Init."""
-- 
2.54.0