Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vector search: hide vector index for sqlite planner #1678

Merged
merged 4 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
** src/vtab.c
** src/wal.c
** src/wal.h
** src/where.c
** src/wherecode.c
** test/all.test
** test/permutations.test
Expand Down Expand Up @@ -126752,11 +126753,6 @@ SQLITE_PRIVATE void sqlite3CreateIndex(
goto exit_create_index;
}
if( vectorIdxRc >= 1 ){
/*
* SQLite can use B-Tree indices in some optimizations (like SELECT COUNT(*) can use any full B-Tree index instead of PK index)
* But, SQLite pretty conservative about usage of unordered indices - that's what we need here
*/
pIndex->bUnordered = 1;
pIndex->idxIsVector = 1;
}
if( vectorIdxRc == 1 ){
Expand Down Expand Up @@ -152451,6 +152447,7 @@ SQLITE_PRIVATE int sqlite3Select(
if( pIdx->bUnordered==0
&& pIdx->szIdxRow<pTab->szTabRow
&& pIdx->pPartIdxWhere==0
&& pIdx->idxIsVector==0
&& (!pBest || pIdx->szIdxRow<pBest->szIdxRow)
){
pBest = pIdx;
Expand Down Expand Up @@ -166075,9 +166072,10 @@ static int whereLoopAddBtreeIndex(
assert( pNew->u.btree.nBtm==0 );
opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS;
}
if( pProbe->bUnordered || pProbe->bLowQual ){
if( pProbe->bUnordered || pProbe->bLowQual || pProbe->idxIsVector ){
if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE);
if( pProbe->bLowQual ) opMask &= ~(WO_EQ|WO_IN|WO_IS);
if( pProbe->idxIsVector ) opMask = 0;
}

assert( pNew->u.btree.nEq<pProbe->nColumn );
Expand Down Expand Up @@ -166459,7 +166457,7 @@ static int indexMightHelpWithOrderBy(
ExprList *aColExpr;
int ii, jj;

if( pIndex->bUnordered ) return 0;
if( pIndex->bUnordered || pIndex->idxIsVector ) return 0;
if( (pOB = pBuilder->pWInfo->pOrderBy)==0 ) return 0;
for(ii=0; ii<pOB->nExpr; ii++){
Expr *pExpr = sqlite3ExprSkipCollateAndLikely(pOB->a[ii].pExpr);
Expand Down Expand Up @@ -166628,6 +166626,9 @@ static SQLITE_NOINLINE u32 whereIsCoveringIndex(
** if pIdx is covering. Assume it is not. */
return 0;
}
if( pIdx->idxIsVector==1 ){
return 0;
}
if( pIdx->bHasExpr==0 ){
for(i=0; i<pIdx->nColumn; i++){
if( pIdx->aiColumn[i]>=BMS-1 ) break;
Expand Down Expand Up @@ -166916,6 +166917,9 @@ static int whereLoopAddBtree(
testcase( pNew->iTab!=pSrc->iCursor ); /* See ticket [98d973b8f5] */
continue; /* Partial index inappropriate for this query */
}
if( pProbe->idxIsVector!=0 ){
continue; /* Vector index inappropriate for this query */
}
if( pProbe->bNoQuery ) continue;
rSize = pProbe->aiRowLogEst[0];
pNew->u.btree.nEq = 0;
Expand Down Expand Up @@ -167919,7 +167923,7 @@ static i8 wherePathSatisfiesOrderBy(
pIndex = 0;
nKeyCol = 0;
nColumn = 1;
}else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered ){
}else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered || pIndex->idxIsVector ){
return 0;
}else{
nKeyCol = pIndex->nKeyCol;
Expand Down Expand Up @@ -215466,7 +215470,6 @@ int vectorIndexSearch(
rc = SQLITE_ERROR;
goto out;
}
assert( type == VECTOR_TYPE_FLOAT32 || type == VECTOR_TYPE_FLOAT64 || type == VECTOR_TYPE_FLOAT1BIT );

pVector = vectorAlloc(type, dims);
if( pVector == NULL ){
Expand Down
21 changes: 12 additions & 9 deletions libsql-ffi/bundled/src/sqlite3.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
** src/vtab.c
** src/wal.c
** src/wal.h
** src/where.c
** src/wherecode.c
** test/all.test
** test/permutations.test
Expand Down Expand Up @@ -126752,11 +126753,6 @@ SQLITE_PRIVATE void sqlite3CreateIndex(
goto exit_create_index;
}
if( vectorIdxRc >= 1 ){
/*
* SQLite can use B-Tree indices in some optimizations (like SELECT COUNT(*) can use any full B-Tree index instead of PK index)
* But, SQLite pretty conservative about usage of unordered indices - that's what we need here
*/
pIndex->bUnordered = 1;
pIndex->idxIsVector = 1;
}
if( vectorIdxRc == 1 ){
Expand Down Expand Up @@ -152451,6 +152447,7 @@ SQLITE_PRIVATE int sqlite3Select(
if( pIdx->bUnordered==0
&& pIdx->szIdxRow<pTab->szTabRow
&& pIdx->pPartIdxWhere==0
&& pIdx->idxIsVector==0
&& (!pBest || pIdx->szIdxRow<pBest->szIdxRow)
){
pBest = pIdx;
Expand Down Expand Up @@ -166075,9 +166072,10 @@ static int whereLoopAddBtreeIndex(
assert( pNew->u.btree.nBtm==0 );
opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS;
}
if( pProbe->bUnordered || pProbe->bLowQual ){
if( pProbe->bUnordered || pProbe->bLowQual || pProbe->idxIsVector ){
if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE);
if( pProbe->bLowQual ) opMask &= ~(WO_EQ|WO_IN|WO_IS);
if( pProbe->idxIsVector ) opMask = 0;
}

assert( pNew->u.btree.nEq<pProbe->nColumn );
Expand Down Expand Up @@ -166459,7 +166457,7 @@ static int indexMightHelpWithOrderBy(
ExprList *aColExpr;
int ii, jj;

if( pIndex->bUnordered ) return 0;
if( pIndex->bUnordered || pIndex->idxIsVector ) return 0;
if( (pOB = pBuilder->pWInfo->pOrderBy)==0 ) return 0;
for(ii=0; ii<pOB->nExpr; ii++){
Expr *pExpr = sqlite3ExprSkipCollateAndLikely(pOB->a[ii].pExpr);
Expand Down Expand Up @@ -166628,6 +166626,9 @@ static SQLITE_NOINLINE u32 whereIsCoveringIndex(
** if pIdx is covering. Assume it is not. */
return 0;
}
if( pIdx->idxIsVector==1 ){
return 0;
}
if( pIdx->bHasExpr==0 ){
for(i=0; i<pIdx->nColumn; i++){
if( pIdx->aiColumn[i]>=BMS-1 ) break;
Expand Down Expand Up @@ -166916,6 +166917,9 @@ static int whereLoopAddBtree(
testcase( pNew->iTab!=pSrc->iCursor ); /* See ticket [98d973b8f5] */
continue; /* Partial index inappropriate for this query */
}
if( pProbe->idxIsVector!=0 ){
continue; /* Vector index inappropriate for this query */
}
if( pProbe->bNoQuery ) continue;
rSize = pProbe->aiRowLogEst[0];
pNew->u.btree.nEq = 0;
Expand Down Expand Up @@ -167919,7 +167923,7 @@ static i8 wherePathSatisfiesOrderBy(
pIndex = 0;
nKeyCol = 0;
nColumn = 1;
}else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered ){
}else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered || pIndex->idxIsVector ){
return 0;
}else{
nKeyCol = pIndex->nKeyCol;
Expand Down Expand Up @@ -215466,7 +215470,6 @@ int vectorIndexSearch(
rc = SQLITE_ERROR;
goto out;
}
assert( type == VECTOR_TYPE_FLOAT32 || type == VECTOR_TYPE_FLOAT64 || type == VECTOR_TYPE_FLOAT1BIT );

pVector = vectorAlloc(type, dims);
if( pVector == NULL ){
Expand Down
5 changes: 0 additions & 5 deletions libsql-sqlite3/src/build.c
Original file line number Diff line number Diff line change
Expand Up @@ -4345,11 +4345,6 @@ void sqlite3CreateIndex(
goto exit_create_index;
}
if( vectorIdxRc >= 1 ){
/*
* SQLite can use B-Tree indices in some optimizations (like SELECT COUNT(*) can use any full B-Tree index instead of PK index)
* But, SQLite pretty conservative about usage of unordered indices - that's what we need here
*/
pIndex->bUnordered = 1;
pIndex->idxIsVector = 1;
}
if( vectorIdxRc == 1 ){
Expand Down
1 change: 1 addition & 0 deletions libsql-sqlite3/src/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -8423,6 +8423,7 @@ int sqlite3Select(
if( pIdx->bUnordered==0
&& pIdx->szIdxRow<pTab->szTabRow
&& pIdx->pPartIdxWhere==0
&& pIdx->idxIsVector==0
&& (!pBest || pIdx->szIdxRow<pBest->szIdxRow)
){
pBest = pIdx;
Expand Down
1 change: 0 additions & 1 deletion libsql-sqlite3/src/vectorIndex.c
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,6 @@ int vectorIndexSearch(
rc = SQLITE_ERROR;
goto out;
}
assert( type == VECTOR_TYPE_FLOAT32 || type == VECTOR_TYPE_FLOAT64 || type == VECTOR_TYPE_FLOAT1BIT );

pVector = vectorAlloc(type, dims);
if( pVector == NULL ){
Expand Down
13 changes: 10 additions & 3 deletions libsql-sqlite3/src/where.c
Original file line number Diff line number Diff line change
Expand Up @@ -2973,9 +2973,10 @@ static int whereLoopAddBtreeIndex(
assert( pNew->u.btree.nBtm==0 );
opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS;
}
if( pProbe->bUnordered || pProbe->bLowQual ){
if( pProbe->bUnordered || pProbe->bLowQual || pProbe->idxIsVector ){
if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE);
if( pProbe->bLowQual ) opMask &= ~(WO_EQ|WO_IN|WO_IS);
if( pProbe->idxIsVector ) opMask = 0;
}

assert( pNew->u.btree.nEq<pProbe->nColumn );
Expand Down Expand Up @@ -3357,7 +3358,7 @@ static int indexMightHelpWithOrderBy(
ExprList *aColExpr;
int ii, jj;

if( pIndex->bUnordered ) return 0;
if( pIndex->bUnordered || pIndex->idxIsVector ) return 0;
if( (pOB = pBuilder->pWInfo->pOrderBy)==0 ) return 0;
for(ii=0; ii<pOB->nExpr; ii++){
Expr *pExpr = sqlite3ExprSkipCollateAndLikely(pOB->a[ii].pExpr);
Expand Down Expand Up @@ -3526,6 +3527,9 @@ static SQLITE_NOINLINE u32 whereIsCoveringIndex(
** if pIdx is covering. Assume it is not. */
return 0;
}
if( pIdx->idxIsVector==1 ){
return 0;
}
if( pIdx->bHasExpr==0 ){
for(i=0; i<pIdx->nColumn; i++){
if( pIdx->aiColumn[i]>=BMS-1 ) break;
Expand Down Expand Up @@ -3814,6 +3818,9 @@ static int whereLoopAddBtree(
testcase( pNew->iTab!=pSrc->iCursor ); /* See ticket [98d973b8f5] */
continue; /* Partial index inappropriate for this query */
}
if( pProbe->idxIsVector!=0 ){
continue; /* Vector index inappropriate for this query */
}
if( pProbe->bNoQuery ) continue;
rSize = pProbe->aiRowLogEst[0];
pNew->u.btree.nEq = 0;
Expand Down Expand Up @@ -4817,7 +4824,7 @@ static i8 wherePathSatisfiesOrderBy(
pIndex = 0;
nKeyCol = 0;
nColumn = 1;
}else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered ){
}else if( (pIndex = pLoop->u.btree.pIndex)==0 || pIndex->bUnordered || pIndex->idxIsVector ){
return 0;
}else{
nKeyCol = pIndex->nKeyCol;
Expand Down
18 changes: 16 additions & 2 deletions libsql-sqlite3/test/libsql_vector_index.test
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,20 @@ do_execsql_test vector-index-dont-affect-sql {
SELECT rowid FROM t_vector_other_sql WHERE emb = vector('[7,8]');
} {4 1 1 2 3 4}

do_execsql_test vector-partial-index-dont-affect-sql {
CREATE TABLE t_vector_other_sql_part ( emb FLOAT32(2), t TEXT );
INSERT INTO t_vector_other_sql_part VALUES (vector('[1,2]'), 'paper'), (vector('[3,4]'), 'journal');
CREATE INDEX t_vector_other_sql_part_idx ON t_vector_other_sql_part(libsql_vector_idx(emb));
INSERT INTO t_vector_other_sql_part VALUES (vector('[5,6]'), 'journal'), (vector('[7,8]'), 'paper');
SELECT COUNT(*) FROM t_vector_other_sql_part;
SELECT COUNT(*) FROM t_vector_other_sql_part WHERE emb = vector('[1,2]');
SELECT COUNT(*) FROM t_vector_other_sql_part WHERE t = 'paper';
SELECT rowid FROM t_vector_other_sql_part WHERE emb = vector('[1,2]');
SELECT rowid FROM t_vector_other_sql_part WHERE emb = vector('[3,4]');
SELECT rowid FROM t_vector_other_sql_part WHERE emb = vector('[5,6]');
SELECT rowid FROM t_vector_other_sql_part WHERE emb = vector('[7,8]');
} {4 1 2 1 2 3 4}

do_execsql_test vector-index-dont-affect-sql-pk {
CREATE TABLE t_vector_other_sql_pk ( name TEXT PRIMARY KEY, emb FLOAT32(2) );
INSERT INTO t_vector_other_sql_pk VALUES ('a', vector('[1,2]')), ('b', vector('[3,4]'));
Expand Down Expand Up @@ -307,6 +321,8 @@ do_execsql_test vector-f64-index {
SELECT * FROM vector_top_k('t_f64_idx', vector64('[1,2]'), 2);
} {1 2}

reset_db
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We currently have some issue and test without db reset starting to generate generic sqlite error in some unexpected places.

This is really weird but I don't understand why it happens and how we can fix it.
Last time I investigated this issue I noticed that SELECT over libsql_vector_meta_shadow table with condition over name field sometimes returns nothing but if you will force full scan of the table - then everything will be fine.

I suspect that issue arise when split of libsql_vector_meta_shadow B-Tree happens. But still not sure why we have this issue...

CC @penberg


do_execsql_test vector-partial {
CREATE TABLE t_partial( name TEXT, type INT, v FLOAT32(3));
INSERT INTO t_partial VALUES ( 'a', 0, vector('[1,2,3]') );
Expand Down Expand Up @@ -392,8 +408,6 @@ proc error_messages {sql} {
set ret [sqlite3_errmsg db]
}

reset_db

do_test vector-errors {
set ret [list]
lappend ret [error_messages {CREATE INDEX t_no_idx ON t_no( libsql_vector_idx(v) )}]
Expand Down
Loading