diff --git a/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java index 8af6804e0..ecd48aa23 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java +++ b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java @@ -452,7 +452,7 @@ private Map getFilteredScoreTokens(boolean aboveThreshold, float .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } - public BooleanQuery buildFeatureFieldQueryFromTokens(Map tokens, String fieldName) { + private BooleanQuery buildFeatureFieldQueryFromTokens(Map tokens, String fieldName) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Map.Entry entry : tokens.entrySet()) { builder.add(FeatureField.newLinearQuery(fieldName, entry.getKey(), entry.getValue()), BooleanClause.Occur.SHOULD); diff --git a/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseTwoPhaseParameters.java b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseTwoPhaseParameters.java index 6a4284429..7715c1c70 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseTwoPhaseParameters.java +++ b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseTwoPhaseParameters.java @@ -28,6 +28,11 @@ import java.util.Locale; import java.util.Objects; +/** + * Represents the parameters for neural_sparse two-phase process. + * This class encapsulates settings related to window size expansion, pruning ratio, and whether the two-phase search is enabled. + * It includes mechanisms to update settings from the cluster dynamically. + */ @Getter @Setter @Accessors(chain = true, fluent = true) @@ -52,6 +57,12 @@ public class NeuralSparseTwoPhaseParameters implements Writeable { private Float pruning_ratio; private Boolean enabled; + /** + * Initialize when start a cluster. + * + * @param clusterService The opensearch clusterService. + * @param settings The env settings to initialize. + */ public static void initialize(ClusterService clusterService, Settings settings) { DEFAULT_ENABLED = NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_ENABLED.get(settings); DEFAULT_WINDOW_SIZE_EXPANSION = NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_WINDOW_SIZE_EXPANSION.get(settings); @@ -60,23 +71,15 @@ public static void initialize(ClusterService clusterService, Settings settings) clusterService.getClusterSettings() .addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_ENABLED, it -> DEFAULT_ENABLED = it); clusterService.getClusterSettings() - .addSettingsUpdateConsumer( - NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_WINDOW_SIZE_EXPANSION, - it -> DEFAULT_WINDOW_SIZE_EXPANSION = it - ); + .addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_WINDOW_SIZE_EXPANSION, it -> DEFAULT_WINDOW_SIZE_EXPANSION = it); clusterService.getClusterSettings() - .addSettingsUpdateConsumer( - NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_PRUNING_RATIO, - it -> DEFAULT_PRUNING_RATIO = it - ); + .addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_PRUNING_RATIO, it -> DEFAULT_PRUNING_RATIO = it); clusterService.getClusterSettings() .addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_MAX_WINDOW_SIZE, it -> MAX_WINDOW_SIZE = it); } public static NeuralSparseTwoPhaseParameters getDefaultSettings() { - return new NeuralSparseTwoPhaseParameters().window_size_expansion(DEFAULT_WINDOW_SIZE_EXPANSION) - .pruning_ratio(DEFAULT_PRUNING_RATIO) - .enabled(DEFAULT_ENABLED); + return new NeuralSparseTwoPhaseParameters().window_size_expansion(DEFAULT_WINDOW_SIZE_EXPANSION).pruning_ratio(DEFAULT_PRUNING_RATIO).enabled(DEFAULT_ENABLED); } /** @@ -98,6 +101,13 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(enabled); } + /** + * Builds the content of this object into an XContentBuilder, typically for JSON serialization. + * + * @param builder The builder to fill. + * @return the given XContentBuilder with object content added. + * @throws IOException if building the content fails. + */ public XContentBuilder doXContent(XContentBuilder builder) throws IOException { builder.startObject(NAME.getPreferredName()); builder.field(WINDOW_SIZE_EXPANSION.getPreferredName(), window_size_expansion); @@ -107,6 +117,13 @@ public XContentBuilder doXContent(XContentBuilder builder) throws IOException { return builder; } + /** + * Parses a NeuralSparseTwoPhaseParameters object from XContent (typically JSON). + * + * @param parser the XContentParser to extract data from. + * @return a new instance of NeuralSparseTwoPhaseParameters initialized from the parser. + * @throws IOException if parsing fails. + */ public static NeuralSparseTwoPhaseParameters parseFromXContent(XContentParser parser) throws IOException { XContentParser.Token token; String currentFieldName = ""; @@ -157,6 +174,12 @@ public int hashcode() { return builder.toHashCode(); } + /** + * Checks if the two-phase search feature is enabled based on the given parameters. + * + * @param neuralSparseTwoPhaseParameters The parameters to check. + * @return true if enabled, false otherwise. + */ public static boolean isEnabled(NeuralSparseTwoPhaseParameters neuralSparseTwoPhaseParameters) { if (Objects.isNull(neuralSparseTwoPhaseParameters)) { return false; @@ -164,6 +187,11 @@ public static boolean isEnabled(NeuralSparseTwoPhaseParameters neuralSparseTwoPh return neuralSparseTwoPhaseParameters.enabled(); } + /** + * A flag to determine if this feature are support. + * + * @return True if cluster are on support, false if it doesn't. + */ public static boolean isClusterOnOrAfterMinReqVersionForTwoPhaseSearchSupport() { return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_TWO_PHASE_SEARCH); } diff --git a/src/main/java/org/opensearch/neuralsearch/search/util/NeuralSparseTwoPhaseUtil.java b/src/main/java/org/opensearch/neuralsearch/search/util/NeuralSparseTwoPhaseUtil.java index 6c163d01b..43c674a4f 100644 --- a/src/main/java/org/opensearch/neuralsearch/search/util/NeuralSparseTwoPhaseUtil.java +++ b/src/main/java/org/opensearch/neuralsearch/search/util/NeuralSparseTwoPhaseUtil.java @@ -27,13 +27,33 @@ * Include adding the second phase query to searchContext and set the currentQuery to highScoreTokenQuery. */ public class NeuralSparseTwoPhaseUtil { + /** + * @param query The whole query include neuralSparseQuery to executed. + * @param searchContext The searchContext with this query. + */ + public static void addRescoreContextFromNeuralSparseSparseQuery(final Query query, SearchContext searchContext) { + Map query2weight = new HashMap<>(); + float windowSizeExpansion = populateQueryWeightsMapAndGetWindowSizeExpansion(query, query2weight, 1.0f, 1.0f); + Query twoPhaseQuery; + if (query2weight.isEmpty()) { + return; + } else if (query2weight.size() == 1) { + Map.Entry entry = query2weight.entrySet().stream().findFirst().get(); + twoPhaseQuery = new BoostQuery(entry.getKey(), entry.getValue()); + } else { + twoPhaseQuery = getNestedTwoPhaseQuery(query2weight); + } + int curWindowSize = (int) (searchContext.size() * windowSizeExpansion); + if (curWindowSize < 0 || curWindowSize > min(NeuralSparseTwoPhaseParameters.MAX_WINDOW_SIZE, MAX_RESCORE_WINDOW_SETTING.get(searchContext.getQueryShardContext().getIndexSettings().getSettings()))) { + throw new IllegalArgumentException("Two phase final windowSize out of score with value " + curWindowSize + "."); + } + QueryRescorer.QueryRescoreContext rescoreContext = new QueryRescorer.QueryRescoreContext(curWindowSize); + rescoreContext.setQuery(twoPhaseQuery); + rescoreContext.setRescoreQueryWeight(getOriginQueryWeightAfterRescore(searchContext.rescore())); + searchContext.addRescore(rescoreContext); + } - private static float populateQueryWeightsMapAndGetWindowSizeExpansion( - final Query query, - Map query2Weight, - float weight, - float windoSizeExpansion - ) { + private static float populateQueryWeightsMapAndGetWindowSizeExpansion(final Query query, Map query2Weight, float weight, float windoSizeExpansion) { if (query instanceof BoostQuery) { BoostQuery boostQuery = (BoostQuery) query; weight *= boostQuery.getBoost(); @@ -53,53 +73,28 @@ private static float populateQueryWeightsMapAndGetWindowSizeExpansion( } else if (query instanceof NeuralSparseQuery) { query2Weight.put(((NeuralSparseQuery) query).getLowScoreTokenQuery(), weight); ((NeuralSparseQuery) query).setCurrentQueryToHighScoreTokenQuery(); - windoSizeExpansion = max(windoSizeExpansion, ((NeuralSparseQuery) query).getRescoreWindowSizeExpansion()); + windoSizeExpansion = max( + windoSizeExpansion, + ((NeuralSparseQuery) query).getRescoreWindowSizeExpansion() + ); } // ToDo Support for other compound query. return windoSizeExpansion; } private static float getOriginQueryWeightAfterRescore(List rescoreContextList) { - return rescoreContextList.stream() - .filter(ctx -> ctx instanceof QueryRescorer.QueryRescoreContext) + return rescoreContextList.stream(). + filter(ctx -> ctx instanceof QueryRescorer.QueryRescoreContext) .map(ctx -> ((QueryRescorer.QueryRescoreContext) ctx).queryWeight()) .reduce(1.0f, (a, b) -> a * b); } private static Query getNestedTwoPhaseQuery(Map query2weight) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); - query2weight.forEach((query, weight) -> { builder.add(new BoostQuery(query, weight), BooleanClause.Occur.SHOULD); }); + query2weight.forEach((query, weight) -> { + builder.add(new BoostQuery(query, weight), BooleanClause.Occur.SHOULD); + }); return builder.build(); } - /** - * - * @param query The whole query include neuralSparseQuery to executed. - * @param searchContext The searchContext with this query. - */ - public static void addRescoreContextFromNeuralSparseSparseQuery(final Query query, SearchContext searchContext) { - Map query2weight = new HashMap<>(); - float windowSizeExpansion = populateQueryWeightsMapAndGetWindowSizeExpansion(query, query2weight, 1.0f, 1.0f); - Query twoPhaseQuery; - if (query2weight.isEmpty()) { - return; - } else if (query2weight.size() == 1) { - Map.Entry entry = query2weight.entrySet().stream().findFirst().get(); - twoPhaseQuery = new BoostQuery(entry.getKey(), entry.getValue()); - } else { - twoPhaseQuery = getNestedTwoPhaseQuery(query2weight); - } - int curWindowSize = (int) (searchContext.size() * windowSizeExpansion); - if (curWindowSize < 0 - || curWindowSize > min( - NeuralSparseTwoPhaseParameters.MAX_WINDOW_SIZE, - MAX_RESCORE_WINDOW_SETTING.get(searchContext.getQueryShardContext().getIndexSettings().getSettings()) - )) { - throw new IllegalArgumentException("Two phase final windowSize out of score with value " + curWindowSize + "."); - } - QueryRescorer.QueryRescoreContext rescoreContext = new QueryRescorer.QueryRescoreContext(curWindowSize); - rescoreContext.setQuery(twoPhaseQuery); - rescoreContext.setRescoreQueryWeight(getOriginQueryWeightAfterRescore(searchContext.rescore())); - searchContext.addRescore(rescoreContext); - } } diff --git a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java index 1d103b01d..c04fdc59a 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java @@ -925,20 +925,6 @@ private void setUpClusterService(Version version) { NeuralSearchClusterUtil.instance().initialize(clusterService); } - @SneakyThrows - public void testBuildFeatureFieldQueryFormTokens() { - NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) - .queryText(QUERY_TEXT) - .modelId(MODEL_ID) - .queryTokensSupplier(QUERY_TOKENS_SUPPLIER); - BooleanQuery booleanQuery = sparseEncodingQueryBuilder.buildFeatureFieldQueryFromTokens( - sparseEncodingQueryBuilder.queryTokensSupplier().get(), - FIELD_NAME - ); - assertNotNull(booleanQuery); - assertSame(booleanQuery.clauses().size(), 2); - } - @SneakyThrows public void testTokenDividedByScores_whenDefaultSettings() { Map map = new HashMap<>();