diff --git a/icu4j/main/core/src/main/java/com/ibm/icu/text/UnicodeSet.java b/icu4j/main/core/src/main/java/com/ibm/icu/text/UnicodeSet.java index cc97bbc840b9..adb038adef11 100644 --- a/icu4j/main/core/src/main/java/com/ibm/icu/text/UnicodeSet.java +++ b/icu4j/main/core/src/main/java/com/ibm/icu/text/UnicodeSet.java @@ -17,7 +17,12 @@ import java.util.Iterator; import java.util.NoSuchElementException; import java.util.SortedSet; +import java.util.Spliterator; import java.util.TreeSet; +import java.util.function.IntConsumer; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; import com.ibm.icu.impl.BMPSet; import com.ibm.icu.impl.CharacterPropertiesImpl; @@ -278,14 +283,25 @@ * * * - *
To iterate over contents of UnicodeSet, the following are available: - *
To iterate over contents of {@code UnicodeSet}, the following are available: + *
The iterators and streams methods work as expected in current idiomatic Java usage.
+ * The {@link UnicodeSetIterator} cannot be in for loops, and it is not very Java-ideomatic, because it is old.
+ * But it might be faster in certain use cases. We recommend that you measure in performance sensitive code.
+ *
*
To replace, count elements, or delete spans, see {@link com.ibm.icu.text.UnicodeSetSpanner UnicodeSetSpanner}. * * @author Alan Liu @@ -5127,5 +5143,179 @@ public static void setDefaultXSymbolTable(XSymbolTable xSymbolTable) { CharacterPropertiesImpl.clear(); XSYMBOL_TABLE = xSymbolTable; } + + /** + * Returns a {@link Stream} of {@link EntryRange} values from this {@code UnicodeSet}. + * + *
Warning: The {@link EntryRange} instance is the same each time; the contents are just reset. + * + *
Warning: To iterate over the full contents, you have to also iterate over the strings. + * + *
Warning: For speed, {@code UnicodeSet} iteration does not check for concurrent modification. Warning: To iterate over the full contents, you have to also iterate over the strings.
+ *
+ * Warning: For speed, {@code UnicodeSet} iteration does not check for concurrent modification. Warning: To iterate over the full contents, you have to also iterate over the strings.
+ *
+ * Warning: For speed, {@code UnicodeSet} iteration does not check for concurrent modification. Warning: To iterate over the full contents, you have to also iterate over the strings.
+ *
+ * Warning: For speed, {@code UnicodeSet} iteration does not check for concurrent modification. Returns an {@link Iterable} over all the code points, "expanding" the ranges
+ * by iterating on all code points between the {@link EntryRange#codepoint} and
+ * {@link EntryRange#codepointEnd}.
+ *
+ * Warning: This is a convenience method, but comes with a performance penalty
+ * because it boxes {@code int} into {@code Integer}. Warning: To iterate over the full contents, you have to also iterate over the strings.
+ *
+ * Warning: For speed, {@code UnicodeSet} iteration does not check for concurrent modification.
+ * Do not alter the {@code UnicodeSet} while iterating.
+ *
+ * @return a {@link Stream} of {@link EntryRange}
+ */
+ public Stream
+ * Do not alter the {@code UnicodeSet} while iterating.
+ *
+ * @return a {@link Stream} of {@code String}
+ */
+ public Stream
+ * Do not alter the {@code UnicodeSet} while iterating.
+ *
+ * @return an {@link IntStream} of Unicode code point values
+ */
+ public IntStream codePointStream() {
+ return StreamSupport.intStream(new CodePointsSpliterator(this), false);
+ }
+
+ /**
+ * Returns a stream of {@code String} values from this {@code UnicodeSet}.
+ *
+ *
+ * Do not alter the {@code UnicodeSet} while iterating.
+ *
+ * @return a {@link Stream} of {@code String}
+ */
+ public Stream
+ * For faster iteration use the {@link #codePointStream()} or {@link #charAt(int)}.
+ *
+ *
+ * Do not alter the {@code UnicodeSet} while iterating.
+ *
+ * @return an {@link Iterable} over all the code points
+ */
+ public Iterable