"""EPUB search models.""" from __future__ import annotations from datetime import datetime from pgvector.sqlalchemy import Vector from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, Index, String, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column, relationship from python.orm.richie.base import TableBase, TableBaseBig class EbookSource(TableBase): """One indexed EPUB file.""" __tablename__ = "ebook_source" __table_args__ = ( UniqueConstraint("file_path"), UniqueConstraint("file_sha256"), ) title: Mapped[str] author: Mapped[str | None] language: Mapped[str | None] publisher: Mapped[str | None] identifier: Mapped[str | None] file_path: Mapped[str] file_sha256: Mapped[str] = mapped_column(String(64)) file_mtime: Mapped[datetime] = mapped_column(DateTime(timezone=True)) file_size: Mapped[int] = mapped_column(BigInteger) chapters: Mapped[list[EbookChapter]] = relationship( "EbookChapter", back_populates="source", cascade="all, delete-orphan", passive_deletes=True, ) chunks: Mapped[list[EbookChunk]] = relationship( "EbookChunk", back_populates="source", cascade="all, delete-orphan", passive_deletes=True, ) class EbookChapter(TableBase): """A chapter or spine document inside an EPUB.""" __tablename__ = "ebook_chapter" __table_args__ = (UniqueConstraint("source_id", "spine_index"),) source_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_source.id", ondelete="CASCADE")) spine_index: Mapped[int] title: Mapped[str | None] href: Mapped[str | None] source: Mapped[EbookSource] = relationship("EbookSource", back_populates="chapters") chunks: Mapped[list[EbookChunk]] = relationship( "EbookChunk", back_populates="chapter", cascade="all, delete-orphan", passive_deletes=True, ) class EbookChunk(TableBaseBig): """A searchable text chunk.""" __tablename__ = "ebook_chunk" __table_args__ = ( UniqueConstraint("source_id", "chunk_index", name="uq_ebook_chunk_source_id_chunk_index"), UniqueConstraint("source_id", "content_sha256", name="uq_ebook_chunk_source_id_content_sha256"), ) source_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_source.id", ondelete="CASCADE")) chapter_id: Mapped[int | None] = mapped_column(ForeignKey("main.ebook_chapter.id", ondelete="SET NULL")) chunk_index: Mapped[int] text: Mapped[str] token_start: Mapped[int] token_count: Mapped[int] page_label: Mapped[str | None] content_sha256: Mapped[str] = mapped_column(String(64)) search_text: Mapped[str] source: Mapped[EbookSource] = relationship("EbookSource", back_populates="chunks") chapter: Mapped[EbookChapter | None] = relationship("EbookChapter", back_populates="chunks") class EbookEmbeddingModel(TableBase): """A supported embedding model.""" __tablename__ = "ebook_embedding_model" name: Mapped[str] = mapped_column(String, unique=True) dimension: Mapped[int] is_default: Mapped[bool] = mapped_column(Boolean, default=False) class EbookChunkEmbedding1024(TableBaseBig): """1024-dimensional chunk embedding.""" __tablename__ = "ebook_chunk_embedding_1024" __table_args__ = ( UniqueConstraint("chunk_id", "model_id"), Index( "ix_ebook_chunk_embedding_1024_embedding_cosine", "embedding", postgresql_using="hnsw", postgresql_ops={"embedding": "vector_cosine_ops"}, ), ) chunk_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_chunk.id", ondelete="CASCADE")) model_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_embedding_model.id", ondelete="CASCADE")) embedding: Mapped[list[float]] = mapped_column(Vector(1024)) class EbookChunkEmbedding2560(TableBaseBig): """2560-dimensional chunk embedding.""" __tablename__ = "ebook_chunk_embedding_2560" __table_args__ = (UniqueConstraint("chunk_id", "model_id"),) chunk_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_chunk.id", ondelete="CASCADE")) model_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_embedding_model.id", ondelete="CASCADE")) embedding: Mapped[list[float]] = mapped_column(Vector(2560)) class EbookChunkEmbedding4096(TableBaseBig): """4096-dimensional chunk embedding.""" __tablename__ = "ebook_chunk_embedding_4096" __table_args__ = (UniqueConstraint("chunk_id", "model_id"),) chunk_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_chunk.id", ondelete="CASCADE")) model_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_embedding_model.id", ondelete="CASCADE")) embedding: Mapped[list[float]] = mapped_column(Vector(4096))