added a index for the VEctor DB
This commit is contained in:
+54
@@ -0,0 +1,54 @@
|
|||||||
|
"""add 1024 ebook embedding cosine index.
|
||||||
|
|
||||||
|
Revision ID: c460105682d2
|
||||||
|
Revises: 2db132cace1a
|
||||||
|
Create Date: 2026-06-13 19:53:45.680289
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
from python.orm import RichieBase
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision: str = "c460105682d2"
|
||||||
|
down_revision: str | None = "2db132cace1a"
|
||||||
|
branch_labels: str | Sequence[str] | None = None
|
||||||
|
depends_on: str | Sequence[str] | None = None
|
||||||
|
|
||||||
|
schema = RichieBase.schema_name
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
"""Upgrade."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.create_index(
|
||||||
|
"ix_ebook_chunk_embedding_1024_embedding_cosine",
|
||||||
|
"ebook_chunk_embedding_1024",
|
||||||
|
["embedding"],
|
||||||
|
unique=False,
|
||||||
|
schema=schema,
|
||||||
|
postgresql_using="hnsw",
|
||||||
|
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||||
|
)
|
||||||
|
# ### end Alembic commands ###
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
"""Downgrade."""
|
||||||
|
# ### commands auto generated by Alembic - please adjust! ###
|
||||||
|
op.drop_index(
|
||||||
|
"ix_ebook_chunk_embedding_1024_embedding_cosine",
|
||||||
|
table_name="ebook_chunk_embedding_1024",
|
||||||
|
schema=schema,
|
||||||
|
postgresql_using="hnsw",
|
||||||
|
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||||
|
)
|
||||||
|
# ### end Alembic commands ###
|
||||||
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from pgvector.sqlalchemy import Vector
|
from pgvector.sqlalchemy import Vector
|
||||||
from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, String, UniqueConstraint
|
from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, Index, String, UniqueConstraint
|
||||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
from python.orm.richie.base import TableBase, TableBaseBig
|
from python.orm.richie.base import TableBase, TableBaseBig
|
||||||
@@ -101,7 +101,15 @@ class EbookChunkEmbedding1024(TableBaseBig):
|
|||||||
"""1024-dimensional chunk embedding."""
|
"""1024-dimensional chunk embedding."""
|
||||||
|
|
||||||
__tablename__ = "ebook_chunk_embedding_1024"
|
__tablename__ = "ebook_chunk_embedding_1024"
|
||||||
__table_args__ = (UniqueConstraint("chunk_id", "model_id"),)
|
__table_args__ = (
|
||||||
|
UniqueConstraint("chunk_id", "model_id"),
|
||||||
|
Index(
|
||||||
|
"ix_ebook_chunk_embedding_1024_embedding_cosine",
|
||||||
|
"embedding",
|
||||||
|
postgresql_using="hnsw",
|
||||||
|
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
chunk_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_chunk.id", ondelete="CASCADE"))
|
chunk_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_chunk.id", ondelete="CASCADE"))
|
||||||
model_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_embedding_model.id", ondelete="CASCADE"))
|
model_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_embedding_model.id", ondelete="CASCADE"))
|
||||||
|
|||||||
@@ -38,7 +38,14 @@ from python.ebook_search.search import (
|
|||||||
search_ebooks,
|
search_ebooks,
|
||||||
)
|
)
|
||||||
from python.ebook_search.timing import RuntimeStep
|
from python.ebook_search.timing import RuntimeStep
|
||||||
from python.orm.richie import EbookChapter, EbookChunk, EbookEmbeddingModel, EbookSource, RichieBase
|
from python.orm.richie import (
|
||||||
|
EbookChapter,
|
||||||
|
EbookChunk,
|
||||||
|
EbookChunkEmbedding1024,
|
||||||
|
EbookEmbeddingModel,
|
||||||
|
EbookSource,
|
||||||
|
RichieBase,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_chunk_text_uses_overlap() -> None:
|
def test_chunk_text_uses_overlap() -> None:
|
||||||
@@ -464,6 +471,15 @@ def test_ensure_embedding_models_registers_service_names() -> None:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_1024_embedding_table_has_cosine_hnsw_index() -> None:
|
||||||
|
indexes = {index.name: index for index in EbookChunkEmbedding1024.__table__.indexes}
|
||||||
|
index = indexes["ix_ebook_chunk_embedding_1024_embedding_cosine"]
|
||||||
|
|
||||||
|
assert [column.name for column in index.columns] == ["embedding"]
|
||||||
|
assert index.dialect_options["postgresql"]["using"] == "hnsw"
|
||||||
|
assert index.dialect_options["postgresql"]["ops"] == {"embedding": "vector_cosine_ops"}
|
||||||
|
|
||||||
|
|
||||||
def test_embedding_model_aliases_normalize_to_provider_names() -> None:
|
def test_embedding_model_aliases_normalize_to_provider_names() -> None:
|
||||||
assert normalize_embedding_model() == "qwen3-embedding-0.6b"
|
assert normalize_embedding_model() == "qwen3-embedding-0.6b"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user