deleting data_science code

this code was moved to https://gitea.tmmworkshop.com/Nornsight/weave
2026-06-13 21:14:42 -04:00
parent e05e5c77bc
commit 5d3a851137
24 changed files with 0 additions and 7232 deletions
@@ -1,11 +1,9 @@
 """ORM package exports."""

-from python.orm.data_science_dev.base import DataScienceDevBase
 from python.orm.richie.base import RichieBase
 from python.orm.van_inventory.base import VanInventoryBase

 __all__ = [
-    "DataScienceDevBase",
    "RichieBase",
    "VanInventoryBase",
 ]
@@ -1,11 +0,0 @@
-"""Data science dev database ORM exports."""
-
-from __future__ import annotations
-
-from python.orm.data_science_dev.base import DataScienceDevBase, DataScienceDevTableBase, DataScienceDevTableBaseBig
-
-__all__ = [
-    "DataScienceDevBase",
-    "DataScienceDevTableBase",
-    "DataScienceDevTableBaseBig",
-]
@@ -1,52 +0,0 @@
-"""Data science dev database ORM base."""
-
-from __future__ import annotations
-
-from datetime import datetime
-
-from sqlalchemy import BigInteger, DateTime, MetaData, func
-from sqlalchemy.ext.declarative import AbstractConcreteBase
-from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
-
-from python.orm.common import NAMING_CONVENTION
-
-
-class DataScienceDevBase(DeclarativeBase):
-    """Base class for data_science_dev database ORM models."""
-
-    schema_name = "main"
-
-    metadata = MetaData(
-        schema=schema_name,
-        naming_convention=NAMING_CONVENTION,
-    )
-
-
-class _TableMixin:
-    """Shared timestamp columns for all table bases."""
-
-    created: Mapped[datetime] = mapped_column(
-        DateTime(timezone=True),
-        server_default=func.now(),
-    )
-    updated: Mapped[datetime] = mapped_column(
-        DateTime(timezone=True),
-        server_default=func.now(),
-        onupdate=func.now(),
-    )
-
-
-class DataScienceDevTableBase(_TableMixin, AbstractConcreteBase, DataScienceDevBase):
-    """Table with Integer primary key."""
-
-    __abstract__ = True
-
-    id: Mapped[int] = mapped_column(primary_key=True)
-
-
-class DataScienceDevTableBaseBig(_TableMixin, AbstractConcreteBase, DataScienceDevBase):
-    """Table with BigInteger primary key."""
-
-    __abstract__ = True
-
-    id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
@@ -1,14 +0,0 @@
-"""init."""
-
-from python.orm.data_science_dev.congress.bill import Bill, BillText
-from python.orm.data_science_dev.congress.legislator import Legislator, LegislatorSocialMedia
-from python.orm.data_science_dev.congress.vote import Vote, VoteRecord
-
-__all__ = [
-    "Bill",
-    "BillText",
-    "Legislator",
-    "LegislatorSocialMedia",
-    "Vote",
-    "VoteRecord",
-]
@@ -1,66 +0,0 @@
-"""Bill model - legislation introduced in Congress."""
-
-from __future__ import annotations
-
-from datetime import date
-from typing import TYPE_CHECKING
-
-from sqlalchemy import ForeignKey, Index, UniqueConstraint
-from sqlalchemy.orm import Mapped, mapped_column, relationship
-
-from python.orm.data_science_dev.base import DataScienceDevTableBase
-
-if TYPE_CHECKING:
-    from python.orm.data_science_dev.congress.vote import Vote
-
-
-class Bill(DataScienceDevTableBase):
-    """Legislation with congress number, type, titles, status, and sponsor."""
-
-    __tablename__ = "bill"
-
-    congress: Mapped[int]
-    bill_type: Mapped[str]
-    number: Mapped[int]
-
-    title: Mapped[str | None]
-    title_short: Mapped[str | None]
-    official_title: Mapped[str | None]
-
-    status: Mapped[str | None]
-    status_at: Mapped[date | None]
-
-    sponsor_bioguide_id: Mapped[str | None]
-
-    subjects_top_term: Mapped[str | None]
-
-    votes: Mapped[list[Vote]] = relationship(
-        "Vote",
-        back_populates="bill",
-    )
-    bill_texts: Mapped[list[BillText]] = relationship(
-        "BillText",
-        back_populates="bill",
-        cascade="all, delete-orphan",
-    )
-
-    __table_args__ = (
-        UniqueConstraint("congress", "bill_type", "number", name="uq_bill_congress_type_number"),
-        Index("ix_bill_congress", "congress"),
-    )
-
-
-class BillText(DataScienceDevTableBase):
-    """Stores different text versions of a bill (introduced, enrolled, etc.)."""
-
-    __tablename__ = "bill_text"
-
-    bill_id: Mapped[int] = mapped_column(ForeignKey("main.bill.id", ondelete="CASCADE"))
-    version_code: Mapped[str]
-    version_name: Mapped[str | None]
-    text_content: Mapped[str | None]
-    date: Mapped[date | None]
-
-    bill: Mapped[Bill] = relationship("Bill", back_populates="bill_texts")
-
-    __table_args__ = (UniqueConstraint("bill_id", "version_code", name="uq_bill_text_bill_id_version_code"),)
@@ -1,66 +0,0 @@
-"""Legislator model - members of Congress."""
-
-from __future__ import annotations
-
-from datetime import date
-from typing import TYPE_CHECKING
-
-from sqlalchemy import ForeignKey, Text
-from sqlalchemy.orm import Mapped, mapped_column, relationship
-
-from python.orm.data_science_dev.base import DataScienceDevTableBase
-
-if TYPE_CHECKING:
-    from python.orm.data_science_dev.congress.vote import VoteRecord
-
-
-class Legislator(DataScienceDevTableBase):
-    """Members of Congress with identification and current term info."""
-
-    __tablename__ = "legislator"
-
-    bioguide_id: Mapped[str] = mapped_column(Text, unique=True, index=True)
-
-    thomas_id: Mapped[str | None]
-    lis_id: Mapped[str | None]
-    govtrack_id: Mapped[int | None]
-    opensecrets_id: Mapped[str | None]
-    fec_ids: Mapped[str | None]
-
-    first_name: Mapped[str]
-    last_name: Mapped[str]
-    official_full_name: Mapped[str | None]
-    nickname: Mapped[str | None]
-
-    birthday: Mapped[date | None]
-    gender: Mapped[str | None]
-
-    current_party: Mapped[str | None]
-    current_state: Mapped[str | None]
-    current_district: Mapped[int | None]
-    current_chamber: Mapped[str | None]
-
-    social_media_accounts: Mapped[list[LegislatorSocialMedia]] = relationship(
-        "LegislatorSocialMedia",
-        back_populates="legislator",
-        cascade="all, delete-orphan",
-    )
-    vote_records: Mapped[list[VoteRecord]] = relationship(
-        "VoteRecord",
-        back_populates="legislator",
-        cascade="all, delete-orphan",
-    )
-
-
-class LegislatorSocialMedia(DataScienceDevTableBase):
-    """Social media account linked to a legislator."""
-
-    __tablename__ = "legislator_social_media"
-
-    legislator_id: Mapped[int] = mapped_column(ForeignKey("main.legislator.id"))
-    platform: Mapped[str]
-    account_name: Mapped[str]
-    url: Mapped[str | None]
-    source: Mapped[str]
-
-    legislator: Mapped[Legislator] = relationship(back_populates="social_media_accounts")
@@ -1,79 +0,0 @@
-"""Vote model - roll call votes in Congress."""
-
-from __future__ import annotations
-
-from datetime import date
-from typing import TYPE_CHECKING
-
-from sqlalchemy import ForeignKey, Index, UniqueConstraint
-from sqlalchemy.orm import Mapped, mapped_column, relationship
-
-from python.orm.data_science_dev.base import DataScienceDevBase, DataScienceDevTableBase
-
-if TYPE_CHECKING:
-    from python.orm.data_science_dev.congress.bill import Bill
-    from python.orm.data_science_dev.congress.legislator import Legislator
-    from python.orm.data_science_dev.congress.vote import Vote
-
-
-class VoteRecord(DataScienceDevBase):
-    """Links a vote to a legislator with their position (Yea, Nay, etc.)."""
-
-    __tablename__ = "vote_record"
-
-    vote_id: Mapped[int] = mapped_column(
-        ForeignKey("main.vote.id", ondelete="CASCADE"),
-        primary_key=True,
-    )
-    legislator_id: Mapped[int] = mapped_column(
-        ForeignKey("main.legislator.id", ondelete="CASCADE"),
-        primary_key=True,
-    )
-    position: Mapped[str]
-
-    vote: Mapped[Vote] = relationship("Vote", back_populates="vote_records")
-    legislator: Mapped[Legislator] = relationship("Legislator", back_populates="vote_records")
-
-
-class Vote(DataScienceDevTableBase):
-    """Roll call votes with counts and optional bill linkage."""
-
-    __tablename__ = "vote"
-
-    congress: Mapped[int]
-    chamber: Mapped[str]
-    session: Mapped[int]
-    number: Mapped[int]
-
-    vote_type: Mapped[str | None]
-    question: Mapped[str | None]
-    result: Mapped[str | None]
-    result_text: Mapped[str | None]
-
-    vote_date: Mapped[date]
-
-    yea_count: Mapped[int | None]
-    nay_count: Mapped[int | None]
-    not_voting_count: Mapped[int | None]
-    present_count: Mapped[int | None]
-
-    bill_id: Mapped[int | None] = mapped_column(ForeignKey("main.bill.id"))
-
-    bill: Mapped[Bill | None] = relationship("Bill", back_populates="votes")
-    vote_records: Mapped[list[VoteRecord]] = relationship(
-        "VoteRecord",
-        back_populates="vote",
-        cascade="all, delete-orphan",
-    )
-
-    __table_args__ = (
-        UniqueConstraint(
-            "congress",
-            "chamber",
-            "session",
-            "number",
-            name="uq_vote_congress_chamber_session_number",
-        ),
-        Index("ix_vote_date", "vote_date"),
-        Index("ix_vote_congress_chamber", "congress", "chamber"),
-    )
@@ -1,16 +0,0 @@
-"""Data science dev database ORM models."""
-
-from __future__ import annotations
-
-from python.orm.data_science_dev.congress import Bill, BillText, Legislator, Vote, VoteRecord
-from python.orm.data_science_dev.posts import partitions  # noqa: F401 — registers partition classes in metadata
-from python.orm.data_science_dev.posts.tables import Posts
-
-__all__ = [
-    "Bill",
-    "BillText",
-    "Legislator",
-    "Posts",
-    "Vote",
-    "VoteRecord",
-]
@@ -1,11 +0,0 @@
-"""Posts module — weekly-partitioned posts table and partition ORM models."""
-
-from __future__ import annotations
-
-from python.orm.data_science_dev.posts.failed_ingestion import FailedIngestion
-from python.orm.data_science_dev.posts.tables import Posts
-
-__all__ = [
-    "FailedIngestion",
-    "Posts",
-]
@@ -1,33 +0,0 @@
-"""Shared column definitions for the posts partitioned table family."""
-
-from __future__ import annotations
-
-from datetime import datetime
-
-from sqlalchemy import BigInteger, SmallInteger, Text
-from sqlalchemy.orm import Mapped, mapped_column
-
-
-class PostsColumns:
-    """Mixin providing all posts columns. Used by both the parent table and partitions."""
-
-    post_id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
-    user_id: Mapped[int] = mapped_column(BigInteger)
-    instance: Mapped[str]
-    date: Mapped[datetime] = mapped_column(primary_key=True)
-    text: Mapped[str] = mapped_column(Text)
-    langs: Mapped[str | None]
-    like_count: Mapped[int]
-    reply_count: Mapped[int]
-    repost_count: Mapped[int]
-    reply_to: Mapped[int | None] = mapped_column(BigInteger)
-    replied_author: Mapped[int | None] = mapped_column(BigInteger)
-    thread_root: Mapped[int | None] = mapped_column(BigInteger)
-    thread_root_author: Mapped[int | None] = mapped_column(BigInteger)
-    repost_from: Mapped[int | None] = mapped_column(BigInteger)
-    reposted_author: Mapped[int | None] = mapped_column(BigInteger)
-    quotes: Mapped[int | None] = mapped_column(BigInteger)
-    quoted_author: Mapped[int | None] = mapped_column(BigInteger)
-    labels: Mapped[str | None]
-    sent_label: Mapped[int | None] = mapped_column(SmallInteger)
-    sent_score: Mapped[float | None]
@@ -1,17 +0,0 @@
-"""Table for storing JSONL lines that failed during post ingestion."""
-
-from __future__ import annotations
-
-from sqlalchemy import Text
-from sqlalchemy.orm import Mapped, mapped_column
-
-from python.orm.data_science_dev.base import DataScienceDevTableBase
-
-
-class FailedIngestion(DataScienceDevTableBase):
-    """Stores raw JSONL lines and their error messages when ingestion fails."""
-
-    __tablename__ = "failed_ingestion"
-
-    raw_line: Mapped[str] = mapped_column(Text)
-    error: Mapped[str] = mapped_column(Text)
@@ -1,71 +0,0 @@
-"""Dynamically generated ORM classes for each weekly partition of the posts table.
-
-Each class maps to a PostgreSQL partition table (e.g. posts_2024_01).
-These are real ORM models tracked by Alembic autogenerate.
-
-Uses ISO week numbering (datetime.isocalendar().week). ISO years can have
-52 or 53 weeks, and week boundaries are always Monday to Monday.
-"""
-
-from __future__ import annotations
-
-import sys
-from datetime import UTC, datetime
-
-from python.orm.data_science_dev.base import DataScienceDevBase
-from python.orm.data_science_dev.posts.columns import PostsColumns
-
-PARTITION_START_YEAR = 2023
-PARTITION_END_YEAR = 2026
-
-_current_module = sys.modules[__name__]
-
-
-def iso_weeks_in_year(year: int) -> int:
-    """Return the number of ISO weeks in a given year (52 or 53)."""
-    dec_28 = datetime(year, 12, 28, tzinfo=UTC)
-    return dec_28.isocalendar().week
-
-
-def week_bounds(year: int, week: int) -> tuple[datetime, datetime]:
-    """Return (start, end) datetimes for an ISO week.
-
-    Start = Monday 00:00:00 UTC of the given ISO week.
-    End   = Monday 00:00:00 UTC of the following ISO week.
-    """
-    start = datetime.fromisocalendar(year, week, 1).replace(tzinfo=UTC)
-    if week < iso_weeks_in_year(year):
-        end = datetime.fromisocalendar(year, week + 1, 1).replace(tzinfo=UTC)
-    else:
-        end = datetime.fromisocalendar(year + 1, 1, 1).replace(tzinfo=UTC)
-    return start, end
-
-
-def _build_partition_classes() -> dict[str, type]:
-    """Generate one ORM class per ISO week partition."""
-    classes: dict[str, type] = {}
-
-    for year in range(PARTITION_START_YEAR, PARTITION_END_YEAR + 1):
-        for week in range(1, iso_weeks_in_year(year) + 1):
-            class_name = f"PostsWeek{year}W{week:02d}"
-            table_name = f"posts_{year}_{week:02d}"
-
-            partition_class = type(
-                class_name,
-                (PostsColumns, DataScienceDevBase),
-                {
-                    "__tablename__": table_name,
-                    "__table_args__": ({"implicit_returning": False},),
-                },
-            )
-
-            classes[class_name] = partition_class
-
-    return classes
-
-
-# Generate all partition classes and register them on this module
-_partition_classes = _build_partition_classes()
-for _name, _cls in _partition_classes.items():
-    setattr(_current_module, _name, _cls)
-__all__ = list(_partition_classes.keys())
@@ -1,13 +0,0 @@
-"""Posts parent table with PostgreSQL weekly range partitioning on date column."""
-
-from __future__ import annotations
-
-from python.orm.data_science_dev.base import DataScienceDevBase
-from python.orm.data_science_dev.posts.columns import PostsColumns
-
-
-class Posts(PostsColumns, DataScienceDevBase):
-    """Parent partitioned table for posts, partitioned by week on `date`."""
-
-    __tablename__ = "posts"
-    __table_args__ = ({"postgresql_partition_by": "RANGE (date)"},)