From 0b365998ed6e4f537dbdf7983a077bc53e785bb9 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Mon, 6 Jan 2025 16:23:59 -0800 Subject: [PATCH] Always use constant_score query for match_only_text (#16964) In some cases, when we create a term query over a `match_only_text` field, it may still try to compute scores, which prevents early termination. We should *always* use a constant score query when querying `match_only_text`, since we don't have the statistics required to compute scores. --------- Signed-off-by: Michael Froh --- CHANGELOG.md | 1 + .../mapper/MatchOnlyTextFieldMapper.java | 11 +++++++++ .../mapper/MatchOnlyTextFieldMapperTests.java | 23 ++++++++++++++++++- .../mapper/MatchOnlyTextFieldTypeTests.java | 18 +++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcf1904db8d27..1b49368a20fa8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -87,6 +87,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Skip remote-repositories validations for node-joins when RepositoriesService is not in sync with cluster-state ([#16763](https://github.com/opensearch-project/OpenSearch/pull/16763)) - Fix _list/shards API failing when closed indices are present ([#16606](https://github.com/opensearch-project/OpenSearch/pull/16606)) - Fix remote shards balance ([#15335](https://github.com/opensearch-project/OpenSearch/pull/15335)) +- Always use `constant_score` query for `match_only_text` field ([#16964](https://github.com/opensearch-project/OpenSearch/pull/16964)) ### Security diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index fb97f8c309a70..757de65248d33 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -16,6 +16,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; @@ -290,6 +291,16 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context); } + @Override + public Query termQuery(Object value, QueryShardContext context) { + return new ConstantScoreQuery(super.termQuery(value, context)); + } + + @Override + public Query termQueryCaseInsensitive(Object value, QueryShardContext context) { + return new ConstantScoreQuery(super.termQueryCaseInsensitive(value, context)); + } + private List> getTermsFromTokenStream(TokenStream stream) throws IOException { final List> termArray = new ArrayList<>(); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 580f8cccc9af5..d9f0fd6657085 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -15,11 +15,13 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; +import org.opensearch.common.lucene.search.AutomatonQueries; import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -28,6 +30,7 @@ import org.opensearch.index.query.MatchPhraseQueryBuilder; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.SourceFieldMatchQuery; +import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.index.search.MatchQuery; import org.junit.Before; @@ -391,7 +394,7 @@ public void testPhraseQuery() throws IOException { assertThat(q, is(expectedQuery)); Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext); - assertThat(q4, is(new TermQuery(new Term("field", "singleton")))); + assertThat(q4, is(new ConstantScoreQuery(new TermQuery(new Term("field", "singleton"))))); Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext); expectedQuery = new SourceFieldMatchQuery( @@ -447,4 +450,22 @@ public void testPhraseQuery() throws IOException { ); assertThat(q6, is(expectedQuery)); } + + public void testTermQuery() throws Exception { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field"); + { + b.field("type", textFieldName); + b.field("analyzer", "my_stop_analyzer"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + Query q = new TermQueryBuilder("field", "foo").rewrite(queryShardContext).toQuery(queryShardContext); + assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), q); + + q = new TermQueryBuilder("field", "foo").caseInsensitive(true).rewrite(queryShardContext).toQuery(queryShardContext); + assertEquals(new ConstantScoreQuery(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "foo"))), q); + } } diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java index 51234fa04ddc2..0170cdde8b21c 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -8,7 +8,11 @@ package org.opensearch.index.mapper; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.TermQuery; import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.lucene.search.AutomatonQueries; public class MatchOnlyTextFieldTypeTests extends TextFieldTypeTests { @@ -28,4 +32,18 @@ TextFieldMapper.TextFieldType createFieldType(boolean searchable) { ParametrizedFieldMapper.Parameter.metaParam().get() ); } + + @Override + public void testTermQuery() { + MappedFieldType ft = createFieldType(true); + assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null)); + assertEquals( + new ConstantScoreQuery(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo"))), + ft.termQueryCaseInsensitive("fOo", null) + ); + + MappedFieldType unsearchable = createFieldType(false); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); + assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + } }