added a index for the VEctor DB
This commit is contained in:
+54
@@ -0,0 +1,54 @@
|
||||
"""add 1024 ebook embedding cosine index.
|
||||
|
||||
Revision ID: c460105682d2
|
||||
Revises: 2db132cace1a
|
||||
Create Date: 2026-06-13 19:53:45.680289
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from alembic import op
|
||||
|
||||
from python.orm import RichieBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "c460105682d2"
|
||||
down_revision: str | None = "2db132cace1a"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
schema = RichieBase.schema_name
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_index(
|
||||
"ix_ebook_chunk_embedding_1024_embedding_cosine",
|
||||
"ebook_chunk_embedding_1024",
|
||||
["embedding"],
|
||||
unique=False,
|
||||
schema=schema,
|
||||
postgresql_using="hnsw",
|
||||
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||
)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index(
|
||||
"ix_ebook_chunk_embedding_1024_embedding_cosine",
|
||||
table_name="ebook_chunk_embedding_1024",
|
||||
schema=schema,
|
||||
postgresql_using="hnsw",
|
||||
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||
)
|
||||
# ### end Alembic commands ###
|
||||
@@ -5,7 +5,7 @@ from __future__ import annotations
|
||||
from datetime import datetime
|
||||
|
||||
from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, String, UniqueConstraint
|
||||
from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, Index, String, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from python.orm.richie.base import TableBase, TableBaseBig
|
||||
@@ -101,7 +101,15 @@ class EbookChunkEmbedding1024(TableBaseBig):
|
||||
"""1024-dimensional chunk embedding."""
|
||||
|
||||
__tablename__ = "ebook_chunk_embedding_1024"
|
||||
__table_args__ = (UniqueConstraint("chunk_id", "model_id"),)
|
||||
__table_args__ = (
|
||||
UniqueConstraint("chunk_id", "model_id"),
|
||||
Index(
|
||||
"ix_ebook_chunk_embedding_1024_embedding_cosine",
|
||||
"embedding",
|
||||
postgresql_using="hnsw",
|
||||
postgresql_ops={"embedding": "vector_cosine_ops"},
|
||||
),
|
||||
)
|
||||
|
||||
chunk_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_chunk.id", ondelete="CASCADE"))
|
||||
model_id: Mapped[int] = mapped_column(ForeignKey("main.ebook_embedding_model.id", ondelete="CASCADE"))
|
||||
|
||||
@@ -38,7 +38,14 @@ from python.ebook_search.search import (
|
||||
search_ebooks,
|
||||
)
|
||||
from python.ebook_search.timing import RuntimeStep
|
||||
from python.orm.richie import EbookChapter, EbookChunk, EbookEmbeddingModel, EbookSource, RichieBase
|
||||
from python.orm.richie import (
|
||||
EbookChapter,
|
||||
EbookChunk,
|
||||
EbookChunkEmbedding1024,
|
||||
EbookEmbeddingModel,
|
||||
EbookSource,
|
||||
RichieBase,
|
||||
)
|
||||
|
||||
|
||||
def test_chunk_text_uses_overlap() -> None:
|
||||
@@ -464,6 +471,15 @@ def test_ensure_embedding_models_registers_service_names() -> None:
|
||||
]
|
||||
|
||||
|
||||
def test_1024_embedding_table_has_cosine_hnsw_index() -> None:
|
||||
indexes = {index.name: index for index in EbookChunkEmbedding1024.__table__.indexes}
|
||||
index = indexes["ix_ebook_chunk_embedding_1024_embedding_cosine"]
|
||||
|
||||
assert [column.name for column in index.columns] == ["embedding"]
|
||||
assert index.dialect_options["postgresql"]["using"] == "hnsw"
|
||||
assert index.dialect_options["postgresql"]["ops"] == {"embedding": "vector_cosine_ops"}
|
||||
|
||||
|
||||
def test_embedding_model_aliases_normalize_to_provider_names() -> None:
|
||||
assert normalize_embedding_model() == "qwen3-embedding-0.6b"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user