Skip to content

Commit

Permalink
Add some java documentation.
Browse files Browse the repository at this point in the history
Signed-off-by: conggguan <[email protected]>
  • Loading branch information
conggguan committed Apr 22, 2024
1 parent df0c9fc commit 3e51226
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ private Map<String, Float> getFilteredScoreTokens(boolean aboveThreshold, float
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}

public BooleanQuery buildFeatureFieldQueryFromTokens(Map<String, Float> tokens, String fieldName) {
private BooleanQuery buildFeatureFieldQueryFromTokens(Map<String, Float> tokens, String fieldName) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Map.Entry<String, Float> entry : tokens.entrySet()) {
builder.add(FeatureField.newLinearQuery(fieldName, entry.getKey(), entry.getValue()), BooleanClause.Occur.SHOULD);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
import java.util.Locale;
import java.util.Objects;

/**
* Represents the parameters for neural_sparse two-phase process.
* This class encapsulates settings related to window size expansion, pruning ratio, and whether the two-phase search is enabled.
* It includes mechanisms to update settings from the cluster dynamically.
*/
@Getter
@Setter
@Accessors(chain = true, fluent = true)
Expand All @@ -52,6 +57,12 @@ public class NeuralSparseTwoPhaseParameters implements Writeable {
private Float pruning_ratio;
private Boolean enabled;

/**
* Initialize when start a cluster.
*
* @param clusterService The opensearch clusterService.
* @param settings The env settings to initialize.
*/
public static void initialize(ClusterService clusterService, Settings settings) {
DEFAULT_ENABLED = NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_ENABLED.get(settings);
DEFAULT_WINDOW_SIZE_EXPANSION = NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_WINDOW_SIZE_EXPANSION.get(settings);
Expand All @@ -60,23 +71,15 @@ public static void initialize(ClusterService clusterService, Settings settings)
clusterService.getClusterSettings()
.addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_ENABLED, it -> DEFAULT_ENABLED = it);
clusterService.getClusterSettings()
.addSettingsUpdateConsumer(
NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_WINDOW_SIZE_EXPANSION,
it -> DEFAULT_WINDOW_SIZE_EXPANSION = it
);
.addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_WINDOW_SIZE_EXPANSION, it -> DEFAULT_WINDOW_SIZE_EXPANSION = it);
clusterService.getClusterSettings()
.addSettingsUpdateConsumer(
NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_PRUNING_RATIO,
it -> DEFAULT_PRUNING_RATIO = it
);
.addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_PRUNING_RATIO, it -> DEFAULT_PRUNING_RATIO = it);
clusterService.getClusterSettings()
.addSettingsUpdateConsumer(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_MAX_WINDOW_SIZE, it -> MAX_WINDOW_SIZE = it);
}

public static NeuralSparseTwoPhaseParameters getDefaultSettings() {
return new NeuralSparseTwoPhaseParameters().window_size_expansion(DEFAULT_WINDOW_SIZE_EXPANSION)
.pruning_ratio(DEFAULT_PRUNING_RATIO)
.enabled(DEFAULT_ENABLED);
return new NeuralSparseTwoPhaseParameters().window_size_expansion(DEFAULT_WINDOW_SIZE_EXPANSION).pruning_ratio(DEFAULT_PRUNING_RATIO).enabled(DEFAULT_ENABLED);
}

/**
Expand All @@ -98,6 +101,13 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(enabled);
}

/**
* Builds the content of this object into an XContentBuilder, typically for JSON serialization.
*
* @param builder The builder to fill.
* @return the given XContentBuilder with object content added.
* @throws IOException if building the content fails.
*/
public XContentBuilder doXContent(XContentBuilder builder) throws IOException {
builder.startObject(NAME.getPreferredName());
builder.field(WINDOW_SIZE_EXPANSION.getPreferredName(), window_size_expansion);
Expand All @@ -107,6 +117,13 @@ public XContentBuilder doXContent(XContentBuilder builder) throws IOException {
return builder;
}

/**
* Parses a NeuralSparseTwoPhaseParameters object from XContent (typically JSON).
*
* @param parser the XContentParser to extract data from.
* @return a new instance of NeuralSparseTwoPhaseParameters initialized from the parser.
* @throws IOException if parsing fails.
*/
public static NeuralSparseTwoPhaseParameters parseFromXContent(XContentParser parser) throws IOException {
XContentParser.Token token;
String currentFieldName = "";
Expand Down Expand Up @@ -157,13 +174,24 @@ public int hashcode() {
return builder.toHashCode();
}

/**
* Checks if the two-phase search feature is enabled based on the given parameters.
*
* @param neuralSparseTwoPhaseParameters The parameters to check.
* @return true if enabled, false otherwise.
*/
public static boolean isEnabled(NeuralSparseTwoPhaseParameters neuralSparseTwoPhaseParameters) {
if (Objects.isNull(neuralSparseTwoPhaseParameters)) {
return false;
}
return neuralSparseTwoPhaseParameters.enabled();
}

/**
* A flag to determine if this feature are support.
*
* @return True if cluster are on support, false if it doesn't.
*/
public static boolean isClusterOnOrAfterMinReqVersionForTwoPhaseSearchSupport() {
return NeuralSearchClusterUtil.instance().getClusterMinVersion().onOrAfter(MINIMAL_SUPPORTED_VERSION_TWO_PHASE_SEARCH);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,33 @@
* Include adding the second phase query to searchContext and set the currentQuery to highScoreTokenQuery.
*/
public class NeuralSparseTwoPhaseUtil {
/**
* @param query The whole query include neuralSparseQuery to executed.
* @param searchContext The searchContext with this query.
*/
public static void addRescoreContextFromNeuralSparseSparseQuery(final Query query, SearchContext searchContext) {
Map<Query, Float> query2weight = new HashMap<>();
float windowSizeExpansion = populateQueryWeightsMapAndGetWindowSizeExpansion(query, query2weight, 1.0f, 1.0f);
Query twoPhaseQuery;
if (query2weight.isEmpty()) {
return;
} else if (query2weight.size() == 1) {
Map.Entry<Query, Float> entry = query2weight.entrySet().stream().findFirst().get();
twoPhaseQuery = new BoostQuery(entry.getKey(), entry.getValue());
} else {
twoPhaseQuery = getNestedTwoPhaseQuery(query2weight);
}
int curWindowSize = (int) (searchContext.size() * windowSizeExpansion);
if (curWindowSize < 0 || curWindowSize > min(NeuralSparseTwoPhaseParameters.MAX_WINDOW_SIZE, MAX_RESCORE_WINDOW_SETTING.get(searchContext.getQueryShardContext().getIndexSettings().getSettings()))) {
throw new IllegalArgumentException("Two phase final windowSize out of score with value " + curWindowSize + ".");
}
QueryRescorer.QueryRescoreContext rescoreContext = new QueryRescorer.QueryRescoreContext(curWindowSize);
rescoreContext.setQuery(twoPhaseQuery);
rescoreContext.setRescoreQueryWeight(getOriginQueryWeightAfterRescore(searchContext.rescore()));
searchContext.addRescore(rescoreContext);
}

private static float populateQueryWeightsMapAndGetWindowSizeExpansion(
final Query query,
Map<Query, Float> query2Weight,
float weight,
float windoSizeExpansion
) {
private static float populateQueryWeightsMapAndGetWindowSizeExpansion(final Query query, Map<Query, Float> query2Weight, float weight, float windoSizeExpansion) {
if (query instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) query;
weight *= boostQuery.getBoost();
Expand All @@ -53,53 +73,28 @@ private static float populateQueryWeightsMapAndGetWindowSizeExpansion(
} else if (query instanceof NeuralSparseQuery) {
query2Weight.put(((NeuralSparseQuery) query).getLowScoreTokenQuery(), weight);
((NeuralSparseQuery) query).setCurrentQueryToHighScoreTokenQuery();
windoSizeExpansion = max(windoSizeExpansion, ((NeuralSparseQuery) query).getRescoreWindowSizeExpansion());
windoSizeExpansion = max(
windoSizeExpansion,
((NeuralSparseQuery) query).getRescoreWindowSizeExpansion()
);
}
// ToDo Support for other compound query.
return windoSizeExpansion;
}

private static float getOriginQueryWeightAfterRescore(List<RescoreContext> rescoreContextList) {
return rescoreContextList.stream()
.filter(ctx -> ctx instanceof QueryRescorer.QueryRescoreContext)
return rescoreContextList.stream().
filter(ctx -> ctx instanceof QueryRescorer.QueryRescoreContext)
.map(ctx -> ((QueryRescorer.QueryRescoreContext) ctx).queryWeight())
.reduce(1.0f, (a, b) -> a * b);
}

private static Query getNestedTwoPhaseQuery(Map<Query, Float> query2weight) {
BooleanQuery.Builder builder = new BooleanQuery.Builder();
query2weight.forEach((query, weight) -> { builder.add(new BoostQuery(query, weight), BooleanClause.Occur.SHOULD); });
query2weight.forEach((query, weight) -> {
builder.add(new BoostQuery(query, weight), BooleanClause.Occur.SHOULD);
});
return builder.build();
}

/**
*
* @param query The whole query include neuralSparseQuery to executed.
* @param searchContext The searchContext with this query.
*/
public static void addRescoreContextFromNeuralSparseSparseQuery(final Query query, SearchContext searchContext) {
Map<Query, Float> query2weight = new HashMap<>();
float windowSizeExpansion = populateQueryWeightsMapAndGetWindowSizeExpansion(query, query2weight, 1.0f, 1.0f);
Query twoPhaseQuery;
if (query2weight.isEmpty()) {
return;
} else if (query2weight.size() == 1) {
Map.Entry<Query, Float> entry = query2weight.entrySet().stream().findFirst().get();
twoPhaseQuery = new BoostQuery(entry.getKey(), entry.getValue());
} else {
twoPhaseQuery = getNestedTwoPhaseQuery(query2weight);
}
int curWindowSize = (int) (searchContext.size() * windowSizeExpansion);
if (curWindowSize < 0
|| curWindowSize > min(
NeuralSparseTwoPhaseParameters.MAX_WINDOW_SIZE,
MAX_RESCORE_WINDOW_SETTING.get(searchContext.getQueryShardContext().getIndexSettings().getSettings())
)) {
throw new IllegalArgumentException("Two phase final windowSize out of score with value " + curWindowSize + ".");
}
QueryRescorer.QueryRescoreContext rescoreContext = new QueryRescorer.QueryRescoreContext(curWindowSize);
rescoreContext.setQuery(twoPhaseQuery);
rescoreContext.setRescoreQueryWeight(getOriginQueryWeightAfterRescore(searchContext.rescore()));
searchContext.addRescore(rescoreContext);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -925,20 +925,6 @@ private void setUpClusterService(Version version) {
NeuralSearchClusterUtil.instance().initialize(clusterService);
}

@SneakyThrows
public void testBuildFeatureFieldQueryFormTokens() {
NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME)
.queryText(QUERY_TEXT)
.modelId(MODEL_ID)
.queryTokensSupplier(QUERY_TOKENS_SUPPLIER);
BooleanQuery booleanQuery = sparseEncodingQueryBuilder.buildFeatureFieldQueryFromTokens(
sparseEncodingQueryBuilder.queryTokensSupplier().get(),
FIELD_NAME
);
assertNotNull(booleanQuery);
assertSame(booleanQuery.clauses().size(), 2);
}

@SneakyThrows
public void testTokenDividedByScores_whenDefaultSettings() {
Map<String, Float> map = new HashMap<>();
Expand Down

0 comments on commit 3e51226

Please sign in to comment.