From 515d0a7393fd353a191f255384e7a3c98eee39c3 Mon Sep 17 00:00:00 2001 From: Mihai Nita Date: Mon, 9 Dec 2024 14:09:27 -0800 Subject: [PATCH] ICU-22773 Faster generation (4x) with multithreading --- .../org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java | 1 + .../icu/tool/cldrtoicu/Cldr2IcuCliOptions.java | 10 ++++++++++ .../icu/tool/cldrtoicu/IcuConverterConfig.java | 13 +++++++++++++ .../unicode/icu/tool/cldrtoicu/LdmlConverter.java | 14 +++++++++----- .../icu/tool/cldrtoicu/LdmlConverterConfig.java | 5 +++++ .../icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java | 4 ++++ 6 files changed, 42 insertions(+), 5 deletions(-) diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java index 6a7618847e8c..b63b062d0e6f 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java @@ -23,6 +23,7 @@ private void convert() { convert.setLocaleIdFilter(options.localeIdFilter); convert.setIncludePseudoLocales(options.includePseudoLocales); convert.setEmitReport(options.emitReport); + convert.setParallel(options.parallel); convert.init(); convert.execute(); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java index 4edf5cdfefd0..14a4ded59873 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java @@ -144,6 +144,11 @@ class Cldr2IcuCliOptions { private static final String XML_CONFIG_DEFAULT = "${icuDir}/tools/cldr/cldr-to-icu/config.xml"; String xmlConfig; + private static final String PARALLEL = "parallel"; + private static final String PARALLEL_DESC = "Run the generation in parallel (multithreaded), to make it faster."; + private static final String PARALLEL_DEFAULT = "false"; + boolean parallel; + // These must be kept in sync with getOptions(). private static final Options options = new Options() .addOption(Option.builder() @@ -254,6 +259,10 @@ class Cldr2IcuCliOptions { .argName("path") .desc(descWithDefault(XML_CONFIG_DESC, XML_CONFIG_DEFAULT)) .build()) + .addOption(Option.builder() + .longOpt(PARALLEL) + .desc(descWithDefault(PARALLEL_DESC, PARALLEL_DEFAULT)) + .build()) ; void processArgs(String[] args) { @@ -288,6 +297,7 @@ void processArgs(String[] args) { emitReport = cli.hasOption(EMIT_REPORT); forceDelete = cli.hasOption(FORCE_DELETE); xmlConfig = cli.getOptionValue(XML_CONFIG, expandFolders(XML_CONFIG_DEFAULT)); + parallel = cli.hasOption(PARALLEL); if (cli.hasOption(OUTPUT_TYPES_LIST)) { OutputType[] outTypesToSort = OutputType.values(); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuConverterConfig.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuConverterConfig.java index 0887597193ba..8b57e10918c5 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuConverterConfig.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuConverterConfig.java @@ -47,6 +47,7 @@ public static final class Builder { private Optional cldrVersion = Optional.empty(); private CldrDraftStatus minimumDraftStatus = CldrDraftStatus.CONTRIBUTED; private boolean emitReport = false; + private boolean parallel = false; private final SetMultimap localeIdsMap = TreeMultimap.create(); private final Table forcedAliases = TreeBasedTable.create(); private final Table forcedParents = TreeBasedTable.create(); @@ -111,6 +112,11 @@ public Builder setEmitReport(boolean emitReport) { return this; } + public Builder setParallel(boolean parallel) { + this.parallel = parallel; + return this; + } + public Builder addLocaleIds(IcuLocaleDir dir, Iterable localeIds) { localeIdsMap.putAll(dir, localeIds); return this; @@ -138,6 +144,7 @@ public LdmlConverterConfig build() { private final IcuVersionInfo versionInfo; private final CldrDraftStatus minimumDraftStatus; private final boolean emitReport; + private final boolean parallel; private final ImmutableSet allLocaleIds; private final ImmutableSetMultimap localeIdsMap; private final ImmutableTable forcedAliases; @@ -161,6 +168,7 @@ private IcuConverterConfig(Builder builder) { builder.cldrVersion.orElse(CldrDataSupplier.getCldrVersionString())); this.minimumDraftStatus = checkNotNull(builder.minimumDraftStatus); this.emitReport = builder.emitReport; + this.parallel = builder.parallel; // getAllLocaleIds() returns the union of all the specified IDs in the map. this.allLocaleIds = ImmutableSet.copyOf(builder.localeIdsMap.values()); this.localeIdsMap = ImmutableSetMultimap.copyOf(builder.localeIdsMap); @@ -202,6 +210,11 @@ public boolean emitReport() { return emitReport; } + @Override + public boolean parallel() { + return parallel; + } + @Override public ImmutableMap getForcedAliases(IcuLocaleDir dir) { return forcedAliases.row(dir); diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java index 6373f9a4bbad..f013284b81f7 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java @@ -288,22 +288,26 @@ private void processLdml() { Path baseDir = config.getOutputDir(); System.out.println("processing standard ldml files"); - for (String id : config.getAllLocaleIds()) { + Stream localeStream = config.getAllLocaleIds().stream(); + if (config.parallel()) { + localeStream = localeStream.parallel(); + } + localeStream.forEach(id -> { // Skip "target" IDs that are aliases (they are handled later). if (!availableIds.contains(id)) { - continue; + return; } // TODO: Remove the following skip when ICU-20997 is fixed if (id.contains("VALENCIA") || id.contains("TARASK")) { System.out.println("(skipping " + id + " until ICU-20997 is fixed)"); - continue; + return; } // Now that former CLDR see locales are in common, there are some language // variants that are not at a high enough coverage level to pick up. // TODO need a better way of handling this. if (id.contains("POLYTON")) { System.out.println("(skipping " + id + ", insufficient coverage level)"); - continue; + return; } IcuData icuData = new IcuData(id, true); @@ -365,7 +369,7 @@ private void processLdml() { writtenLocaleIds.put(dir, id); } } - } + }); System.out.println("processing alias ldml files"); for (IcuLocaleDir dir : splitDirs) { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverterConfig.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverterConfig.java index 72241b620b97..f3ef6190748d 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverterConfig.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverterConfig.java @@ -148,4 +148,9 @@ public String getCldrVersion() { * Whether to emit a summary report for debug purposes after conversion is complete. */ boolean emitReport(); + + /** + * Whether to generate data in parallel (using multithreading). + */ + boolean parallel(); } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java index ac2ac2b57ca4..0c5c37a86322 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java @@ -136,6 +136,10 @@ public void setEmitReport(boolean emit) { config.setEmitReport(emit); } + public void setParallel(boolean parallel) { + config.setParallel(parallel); + } + public static final class LocaleIds extends Task { private ImmutableSet ids;