diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aabbbf75f00c..eeeb752eabdd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,6 +91,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix remote shards balance ([#15335](https://github.com/opensearch-project/OpenSearch/pull/15335)) - Always use `constant_score` query for `match_only_text` field ([#16964](https://github.com/opensearch-project/OpenSearch/pull/16964)) - Fix Shallow copy snapshot failures on closed index ([#16868](https://github.com/opensearch-project/OpenSearch/pull/16868)) +- The `phone-search` analyzer no longer emits the international calling code as a token ([#16993](https://github.com/opensearch-project/OpenSearch/pull/16993)) ### Security diff --git a/plugins/analysis-phonenumber/src/main/java/org/opensearch/analysis/phone/PhoneNumberTermTokenizer.java b/plugins/analysis-phonenumber/src/main/java/org/opensearch/analysis/phone/PhoneNumberTermTokenizer.java index 6b95594204eb4..90f1c38bd133b 100644 --- a/plugins/analysis-phonenumber/src/main/java/org/opensearch/analysis/phone/PhoneNumberTermTokenizer.java +++ b/plugins/analysis-phonenumber/src/main/java/org/opensearch/analysis/phone/PhoneNumberTermTokenizer.java @@ -128,8 +128,11 @@ private Set getTokens() throws IOException { countryCode = Optional.of(String.valueOf(numberProto.getCountryCode())); input = String.valueOf(numberProto.getNationalNumber()); - // Add Country code, extension, and the number as tokens - tokens.add(countryCode.get()); + if (addNgrams) { + // Consider the country code as an ngram - it makes no sense in the search analyzer as it'd match all values with the same country code + tokens.add(countryCode.get()); + } + // Add extension, and the number as tokens tokens.add(countryCode.get() + input); if (!Strings.isEmpty(numberProto.getExtension())) { tokens.add(numberProto.getExtension()); diff --git a/plugins/analysis-phonenumber/src/test/java/org/opensearch/analysis/phone/PhoneNumberAnalyzerTests.java b/plugins/analysis-phonenumber/src/test/java/org/opensearch/analysis/phone/PhoneNumberAnalyzerTests.java index 332f6d21f47d6..1dc2f32e84921 100644 --- a/plugins/analysis-phonenumber/src/test/java/org/opensearch/analysis/phone/PhoneNumberAnalyzerTests.java +++ b/plugins/analysis-phonenumber/src/test/java/org/opensearch/analysis/phone/PhoneNumberAnalyzerTests.java @@ -90,7 +90,7 @@ public void testEuropeDetailledSearch() throws IOException { assertTokensAreInAnyOrder( phoneSearchAnalyzer, "tel:+441344840400", - Arrays.asList("tel:+441344840400", "tel:", "441344840400", "44", "1344840400") + Arrays.asList("tel:+441344840400", "tel:", "441344840400", "1344840400") ); } @@ -189,21 +189,21 @@ public void testLocalNumberWithCH() throws IOException { } public void testSearchInternationalPrefixWithZZ() throws IOException { - assertTokensInclude(phoneSearchAnalyzer, "+41583161010", Arrays.asList("41", "41583161010", "583161010")); + assertTokensInclude(phoneSearchAnalyzer, "+41583161010", Arrays.asList("41583161010", "583161010")); } public void testSearchInternationalPrefixWithCH() throws IOException { - assertTokensInclude(phoneSearchCHAnalyzer, "+41583161010", Arrays.asList("41", "41583161010", "583161010")); + assertTokensInclude(phoneSearchCHAnalyzer, "+41583161010", Arrays.asList("41583161010", "583161010")); } public void testSearchNationalPrefixWithCH() throws IOException { // + is equivalent to 00 in Switzerland - assertTokensInclude(phoneSearchCHAnalyzer, "0041583161010", Arrays.asList("41", "41583161010", "583161010")); + assertTokensInclude(phoneSearchCHAnalyzer, "0041583161010", Arrays.asList("41583161010", "583161010")); } public void testSearchLocalNumberWithCH() throws IOException { // when omitting the international prefix swiss numbers must start with '0' - assertTokensInclude(phoneSearchCHAnalyzer, "0583161010", Arrays.asList("41", "41583161010", "583161010")); + assertTokensInclude(phoneSearchCHAnalyzer, "0583161010", Arrays.asList("41583161010", "583161010")); } /** diff --git a/plugins/analysis-phonenumber/src/yamlRestTest/resources/rest-api-spec/test/analysis-phone/20_search.yml b/plugins/analysis-phonenumber/src/yamlRestTest/resources/rest-api-spec/test/analysis-phone/20_search.yml index 0bd7d2c371bfc..91560f3960e53 100644 --- a/plugins/analysis-phonenumber/src/yamlRestTest/resources/rest-api-spec/test/analysis-phone/20_search.yml +++ b/plugins/analysis-phonenumber/src/yamlRestTest/resources/rest-api-spec/test/analysis-phone/20_search.yml @@ -32,9 +32,20 @@ index: test id: 1 body: { "phone": "+41 58 316 10 10", "phone-ch": "058 316 10 10" } + - do: + index: + index: test + id: 2 + body: { "phone": "+41 58 316 99 99", "phone-ch": "058 316 99 99" } + - do: + index: + index: test + id: 2 + body: { "phone": "+1-888-280-4331", "phone-ch": "+1-888-280-4331" } - do: indices.refresh: {} + # international format in document & search will always work - do: search: rest_total_hits_as_int: true @@ -45,6 +56,7 @@ "phone": "+41583161010" - match: { hits.total: 1 } + # correct national format & international format in search will always work - do: search: rest_total_hits_as_int: true @@ -54,3 +66,58 @@ match: "phone-ch": "+41583161010" - match: { hits.total: 1 } + + # national format without country specified won't work + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match: + "phone": "0583161010" + - match: { hits.total: 0 } + + # correct national format with country specified in document & search will always work + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match: + "phone-ch": "0583161010" + - match: { hits.total: 1 } + + # international format in document & search will always work + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match: + "phone": "+1 888 280 4331" + - match: { hits.total: 1 } + + # international format in document & search will always work + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match: + "phone-ch": "+1 888 280 4331" + - match: { hits.total: 1 } + + # national format in search won't work if no country is specified + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match: + "phone": "888 280 4331" + - match: { hits.total: 0 }