added ebook embedding to orm
This commit is contained in:
@@ -0,0 +1,200 @@
|
||||
"""add ebook search tables.
|
||||
|
||||
Revision ID: 2db132cace1a
|
||||
Revises: b3c60cc5beb5
|
||||
Create Date: 2026-06-10 22:10:54.379159
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pgvector
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
from python.orm import RichieBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "2db132cace1a"
|
||||
down_revision: str | None = "b3c60cc5beb5"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
schema = RichieBase.schema_name
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table(
|
||||
"ebook_embedding_model",
|
||||
sa.Column("name", sa.String(), nullable=False),
|
||||
sa.Column("dimension", sa.Integer(), nullable=False),
|
||||
sa.Column("is_default", sa.Boolean(), nullable=False),
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_embedding_model")),
|
||||
sa.UniqueConstraint("name", name=op.f("uq_ebook_embedding_model_name")),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_table(
|
||||
"ebook_source",
|
||||
sa.Column("title", sa.String(), nullable=False),
|
||||
sa.Column("author", sa.String(), nullable=True),
|
||||
sa.Column("language", sa.String(), nullable=True),
|
||||
sa.Column("publisher", sa.String(), nullable=True),
|
||||
sa.Column("identifier", sa.String(), nullable=True),
|
||||
sa.Column("file_path", sa.String(), nullable=False),
|
||||
sa.Column("file_sha256", sa.String(length=64), nullable=False),
|
||||
sa.Column("file_mtime", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("file_size", sa.BigInteger(), nullable=False),
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_source")),
|
||||
sa.UniqueConstraint("file_path", name=op.f("uq_ebook_source_file_path")),
|
||||
sa.UniqueConstraint("file_sha256", name=op.f("uq_ebook_source_file_sha256")),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_table(
|
||||
"ebook_chapter",
|
||||
sa.Column("source_id", sa.Integer(), nullable=False),
|
||||
sa.Column("spine_index", sa.Integer(), nullable=False),
|
||||
sa.Column("title", sa.String(), nullable=True),
|
||||
sa.Column("href", sa.String(), nullable=True),
|
||||
sa.Column("id", sa.Integer(), nullable=False),
|
||||
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["source_id"],
|
||||
[f"{schema}.ebook_source.id"],
|
||||
name=op.f("fk_ebook_chapter_source_id_ebook_source"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chapter")),
|
||||
sa.UniqueConstraint("source_id", "spine_index", name=op.f("uq_ebook_chapter_source_id")),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_table(
|
||||
"ebook_chunk",
|
||||
sa.Column("source_id", sa.Integer(), nullable=False),
|
||||
sa.Column("chapter_id", sa.Integer(), nullable=True),
|
||||
sa.Column("chunk_index", sa.Integer(), nullable=False),
|
||||
sa.Column("text", sa.String(), nullable=False),
|
||||
sa.Column("token_start", sa.Integer(), nullable=False),
|
||||
sa.Column("token_count", sa.Integer(), nullable=False),
|
||||
sa.Column("page_label", sa.String(), nullable=True),
|
||||
sa.Column("content_sha256", sa.String(length=64), nullable=False),
|
||||
sa.Column("search_text", sa.String(), nullable=False),
|
||||
sa.Column("id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["chapter_id"],
|
||||
[f"{schema}.ebook_chapter.id"],
|
||||
name=op.f("fk_ebook_chunk_chapter_id_ebook_chapter"),
|
||||
ondelete="SET NULL",
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["source_id"],
|
||||
[f"{schema}.ebook_source.id"],
|
||||
name=op.f("fk_ebook_chunk_source_id_ebook_source"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk")),
|
||||
sa.UniqueConstraint("source_id", "chunk_index", name="uq_ebook_chunk_source_id_chunk_index"),
|
||||
sa.UniqueConstraint("source_id", "content_sha256", name="uq_ebook_chunk_source_id_content_sha256"),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_table(
|
||||
"ebook_chunk_embedding_1024",
|
||||
sa.Column("chunk_id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("model_id", sa.Integer(), nullable=False),
|
||||
sa.Column("embedding", pgvector.sqlalchemy.vector.VECTOR(dim=1024), nullable=False),
|
||||
sa.Column("id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["chunk_id"],
|
||||
[f"{schema}.ebook_chunk.id"],
|
||||
name=op.f("fk_ebook_chunk_embedding_1024_chunk_id_ebook_chunk"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["model_id"],
|
||||
[f"{schema}.ebook_embedding_model.id"],
|
||||
name=op.f("fk_ebook_chunk_embedding_1024_model_id_ebook_embedding_model"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk_embedding_1024")),
|
||||
sa.UniqueConstraint("chunk_id", "model_id", name=op.f("uq_ebook_chunk_embedding_1024_chunk_id")),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_table(
|
||||
"ebook_chunk_embedding_2560",
|
||||
sa.Column("chunk_id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("model_id", sa.Integer(), nullable=False),
|
||||
sa.Column("embedding", pgvector.sqlalchemy.vector.VECTOR(dim=2560), nullable=False),
|
||||
sa.Column("id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["chunk_id"],
|
||||
[f"{schema}.ebook_chunk.id"],
|
||||
name=op.f("fk_ebook_chunk_embedding_2560_chunk_id_ebook_chunk"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["model_id"],
|
||||
[f"{schema}.ebook_embedding_model.id"],
|
||||
name=op.f("fk_ebook_chunk_embedding_2560_model_id_ebook_embedding_model"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk_embedding_2560")),
|
||||
sa.UniqueConstraint("chunk_id", "model_id", name=op.f("uq_ebook_chunk_embedding_2560_chunk_id")),
|
||||
schema=schema,
|
||||
)
|
||||
op.create_table(
|
||||
"ebook_chunk_embedding_4096",
|
||||
sa.Column("chunk_id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("model_id", sa.Integer(), nullable=False),
|
||||
sa.Column("embedding", pgvector.sqlalchemy.vector.VECTOR(dim=4096), nullable=False),
|
||||
sa.Column("id", sa.BigInteger(), nullable=False),
|
||||
sa.Column("created", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.Column("updated", sa.DateTime(timezone=True), server_default=sa.text("now()"), nullable=False),
|
||||
sa.ForeignKeyConstraint(
|
||||
["chunk_id"],
|
||||
[f"{schema}.ebook_chunk.id"],
|
||||
name=op.f("fk_ebook_chunk_embedding_4096_chunk_id_ebook_chunk"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.ForeignKeyConstraint(
|
||||
["model_id"],
|
||||
[f"{schema}.ebook_embedding_model.id"],
|
||||
name=op.f("fk_ebook_chunk_embedding_4096_model_id_ebook_embedding_model"),
|
||||
ondelete="CASCADE",
|
||||
),
|
||||
sa.PrimaryKeyConstraint("id", name=op.f("pk_ebook_chunk_embedding_4096")),
|
||||
sa.UniqueConstraint("chunk_id", "model_id", name=op.f("uq_ebook_chunk_embedding_4096_chunk_id")),
|
||||
schema=schema,
|
||||
)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_table("ebook_chunk_embedding_4096", schema=schema)
|
||||
op.drop_table("ebook_chunk_embedding_2560", schema=schema)
|
||||
op.drop_table("ebook_chunk_embedding_1024", schema=schema)
|
||||
op.drop_table("ebook_chunk", schema=schema)
|
||||
op.drop_table("ebook_chapter", schema=schema)
|
||||
op.drop_table("ebook_source", schema=schema)
|
||||
op.drop_table("ebook_embedding_model", schema=schema)
|
||||
# ### end Alembic commands ###
|
||||
Reference in New Issue
Block a user