diff --git a/python/ebook_search/api/main.py b/python/ebook_search/api/main.py index b6c3a62..f9937f5 100644 --- a/python/ebook_search/api/main.py +++ b/python/ebook_search/api/main.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) async def lifespan(app: FastAPI) -> AsyncIterator[None]: """Manage application startup and shutdown resources.""" logger.info("ebook_search_startup") - app.state.engine = get_postgres_engine(name="RICHIE") + app.state.engine = get_postgres_engine(name="RICHIE", vector_engine=True) with Session(app.state.engine) as session: ensure_bm25_corpus(session, app.state.config) try: diff --git a/python/orm/common.py b/python/orm/common.py index 6f86462..1214346 100644 --- a/python/orm/common.py +++ b/python/orm/common.py @@ -31,8 +31,24 @@ def get_connection_info(name: str) -> tuple[str, str, str, str, str | None]: return cast("tuple[str, str, str, str, str | None]", (database, host, port, username, password)) -def get_postgres_engine(*, name: str = "POSTGRES", pool_pre_ping: bool = True) -> Engine: - """Create a SQLAlchemy engine from environment variables.""" +def get_postgres_engine( + *, + name: str = "POSTGRES", + pool_pre_ping: bool = True, + vector_engine: bool = False, +) -> Engine: + """Create a SQLAlchemy engine from environment variables. + + Args: + name (str, optional): The name of the environment variable prefix. Defaults to "POSTGRES". + pool_pre_ping (bool, optional): Whether to ping the database before each connection. Defaults to True. + This fixes the issue of trying to use a conection that has timed out on the database side. + vector_engine (bool, optional): Whether to use the vector search schema. Defaults to False. + This updates the search path the incldued the vecore types and operators. + + Returns: + Engine: The SQLAlchemy engine. + """ database, host, port, username, password = get_connection_info(name) url = URL.create( @@ -44,8 +60,14 @@ def get_postgres_engine(*, name: str = "POSTGRES", pool_pre_ping: bool = True) - database=database, ) + connect_args = {} + # There more better way to do this is with separate PG account and a dedicated vector schema for the vector types + if vector_engine: + connect_args["options"] = "-csearch_path=main,public" + return create_engine( url=url, pool_pre_ping=pool_pre_ping, pool_recycle=1800, + connect_args=connect_args, )