From 59710437e4a885252de5e5555fbcf42d223b092c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Melvyn=20La=C3=AFly?= Date: Fri, 26 Apr 2024 10:43:52 +0200 Subject: [PATCH] Return the search terms as search highlights for SQLite instead of nothing (#17000) Fixes https://github.com/element-hq/synapse/issues/16999 and https://github.com/element-hq/element-android/pull/8729 by returning the search terms as search highlights. --- changelog.d/17000.bugfix | 1 + synapse/storage/databases/main/search.py | 31 ++++++++++++++++++------ tests/storage/test_room_search.py | 13 +++++----- 3 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 changelog.d/17000.bugfix diff --git a/changelog.d/17000.bugfix b/changelog.d/17000.bugfix new file mode 100644 index 00000000000..86b21c9615d --- /dev/null +++ b/changelog.d/17000.bugfix @@ -0,0 +1 @@ +Fixed search feature of Element Android on homesevers using SQLite by returning search terms as search highlights. \ No newline at end of file diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index 4a0afb50ac7..20fcfd3122d 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -470,6 +470,8 @@ async def search_msgs( count_args = args count_clauses = clauses + sqlite_highlights: List[str] = [] + if isinstance(self.database_engine, PostgresEngine): search_query = search_term sql = """ @@ -486,7 +488,7 @@ async def search_msgs( """ count_args = [search_query] + count_args elif isinstance(self.database_engine, Sqlite3Engine): - search_query = _parse_query_for_sqlite(search_term) + search_query, sqlite_highlights = _parse_query_for_sqlite(search_term) sql = """ SELECT rank(matchinfo(event_search)) as rank, room_id, event_id @@ -531,9 +533,11 @@ async def search_msgs( event_map = {ev.event_id: ev for ev in events} - highlights = None + highlights: Collection[str] = [] if isinstance(self.database_engine, PostgresEngine): highlights = await self._find_highlights_in_postgres(search_query, events) + else: + highlights = sqlite_highlights count_sql += " GROUP BY room_id" @@ -597,6 +601,8 @@ async def search_rooms( count_args = list(args) count_clauses = list(clauses) + sqlite_highlights: List[str] = [] + if pagination_token: try: origin_server_ts_str, stream_str = pagination_token.split(",") @@ -647,7 +653,7 @@ async def search_rooms( CROSS JOIN events USING (event_id) WHERE """ - search_query = _parse_query_for_sqlite(search_term) + search_query, sqlite_highlights = _parse_query_for_sqlite(search_term) args = [search_query] + args count_sql = """ @@ -694,9 +700,11 @@ async def search_rooms( event_map = {ev.event_id: ev for ev in events} - highlights = None + highlights: Collection[str] = [] if isinstance(self.database_engine, PostgresEngine): highlights = await self._find_highlights_in_postgres(search_query, events) + else: + highlights = sqlite_highlights count_sql += " GROUP BY room_id" @@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList: return tokens -def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: +def _tokens_to_sqlite_match_query(tokens: TokenList) -> Tuple[str, List[str]]: """ Convert the list of tokens to a string suitable for passing to sqlite's MATCH. Assume sqlite was compiled with enhanced query syntax. + Returns the sqlite-formatted query string and the tokenized search terms + that can be used as highlights. + Ref: https://www.sqlite.org/fts3.html#full_text_index_queries """ match_query = [] + highlights = [] for token in tokens: if isinstance(token, str): match_query.append(token) + highlights.append(token) elif isinstance(token, Phrase): match_query.append('"' + " ".join(token.phrase) + '"') + highlights.append(" ".join(token.phrase)) elif token == SearchToken.Not: # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search # term has already been added before this. @@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: else: raise ValueError(f"unknown token {token}") - return "".join(match_query) + return "".join(match_query), highlights -def _parse_query_for_sqlite(search_term: str) -> str: +def _parse_query_for_sqlite(search_term: str) -> Tuple[str, List[str]]: """Takes a plain unicode string from the user and converts it into a form that can be passed to sqllite's matchinfo(). + + Returns the converted query string and the tokenized search terms + that can be used as highlights. """ return _tokens_to_sqlite_match_query(_tokenize_query(search_term)) diff --git a/tests/storage/test_room_search.py b/tests/storage/test_room_search.py index 1eab89f140b..340642b7e7a 100644 --- a/tests/storage/test_room_search.py +++ b/tests/storage/test_room_search.py @@ -71,17 +71,16 @@ def test_null_byte(self) -> None: store.search_msgs([room_id], "hi bob", ["content.body"]) ) self.assertEqual(result.get("count"), 1) - if isinstance(store.database_engine, PostgresEngine): - self.assertIn("hi", result.get("highlights")) - self.assertIn("bob", result.get("highlights")) + self.assertIn("hi", result.get("highlights")) + self.assertIn("bob", result.get("highlights")) # Check that search works for an unrelated message result = self.get_success( store.search_msgs([room_id], "another", ["content.body"]) ) self.assertEqual(result.get("count"), 1) - if isinstance(store.database_engine, PostgresEngine): - self.assertIn("another", result.get("highlights")) + + self.assertIn("another", result.get("highlights")) # Check that search works for a search term that overlaps with the message # containing a null byte and an unrelated message. @@ -90,8 +89,8 @@ def test_null_byte(self) -> None: result = self.get_success( store.search_msgs([room_id], "hi alice", ["content.body"]) ) - if isinstance(store.database_engine, PostgresEngine): - self.assertIn("alice", result.get("highlights")) + + self.assertIn("alice", result.get("highlights")) def test_non_string(self) -> None: """Test that non-string `value`s are not inserted into `event_search`.