From 2fa8a0908c02d8adf8ecff4c69820743f4683936 Mon Sep 17 00:00:00 2001 From: Mihai Nita Date: Mon, 9 Dec 2024 19:26:40 +0000 Subject: [PATCH] ICU-22773 Migrate the CLDR conversion tool to Maven --- .github/adaboost.json | 2 +- .github/lstm_for_th_my.json | 2 +- docs/processes/cldr-icu.md | 216 ++++---- docs/processes/release/tasks/versions.md | 19 +- icu4c/source/data/unidata/changes.txt | 2 + tools/cldr/.gitignore | 13 +- tools/cldr/build.xml | 21 +- tools/cldr/cldr-to-icu/.classpath | 31 -- tools/cldr/cldr-to-icu/.project | 23 - .../.settings/org.eclipse.jdt.core.prefs | 5 - .../.settings/org.eclipse.jdt.ui.prefs | 5 - .../.settings/org.eclipse.m2e.core.prefs | 4 - tools/cldr/cldr-to-icu/README.md | 161 ++++-- tools/cldr/cldr-to-icu/README.txt | 11 - tools/cldr/cldr-to-icu/build-icu-data.xml | 472 ------------------ tools/cldr/cldr-to-icu/config.xml | 295 +++++++++++ tools/cldr/cldr-to-icu/pom.xml | 109 ++-- .../unicode/icu/tool/cldrtoicu/Cldr2Icu.java | 71 +++ .../tool/cldrtoicu/Cldr2IcuCliOptions.java | 401 +++++++++++++++ .../icu/tool/cldrtoicu/IcuDataDumper.java | 1 - .../ant/CleanOutputDirectoryTask.java | 99 +++- .../cldrtoicu/ant/ConvertIcuDataTask.java | 163 ++++-- .../tool/cldrtoicu/ant/GenerateCodeTask.java | 9 - .../unicode/icu/tool/cldrtoicu/ant/Task.java | 25 + .../src/main/resources/ldml2icu_header.txt | 2 +- .../tool/cldrtoicu/SupplementalDataTest.java | 7 +- .../ant/CleanOutputDirectoryTaskTest.java | 2 +- .../LocaleDistanceMapperTest.java | 28 +- .../cldrtoicu/mapper/Bcp47MapperTest.java | 4 +- tools/cldr/lib/README.txt | 101 ---- tools/cldr/lib/install-cldr-jars.sh | 102 ---- tools/cldr/lib/pom.xml | 53 -- 32 files changed, 1346 insertions(+), 1113 deletions(-) delete mode 100644 tools/cldr/cldr-to-icu/.classpath delete mode 100644 tools/cldr/cldr-to-icu/.project delete mode 100644 tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.core.prefs delete mode 100644 tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.ui.prefs delete mode 100644 tools/cldr/cldr-to-icu/.settings/org.eclipse.m2e.core.prefs delete mode 100644 tools/cldr/cldr-to-icu/README.txt delete mode 100644 tools/cldr/cldr-to-icu/build-icu-data.xml create mode 100644 tools/cldr/cldr-to-icu/config.xml create mode 100644 tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java create mode 100644 tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java create mode 100644 tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/Task.java delete mode 100644 tools/cldr/lib/README.txt delete mode 100755 tools/cldr/lib/install-cldr-jars.sh delete mode 100644 tools/cldr/lib/pom.xml diff --git a/.github/adaboost.json b/.github/adaboost.json index 639fd6a99da9..33913ba8659d 100644 --- a/.github/adaboost.json +++ b/.github/adaboost.json @@ -1,6 +1,6 @@ // © 2022 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html -// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml +// Generated using tools/cldr/cldr-to-icu/ // // Include Japanese adaboost model. { diff --git a/.github/lstm_for_th_my.json b/.github/lstm_for_th_my.json index b4aeb24d0ed8..6c7e3e743dab 100644 --- a/.github/lstm_for_th_my.json +++ b/.github/lstm_for_th_my.json @@ -1,6 +1,6 @@ // © 2021 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html -// Generated using tools/cldr/cldr-to-icu/build-icu-data.xml +// Generated using tools/cldr/cldr-to-icu/ // // Include Burmese and Thai lstm models. { diff --git a/docs/processes/cldr-icu.md b/docs/processes/cldr-icu.md index b179262bcf8d..0f9031d9f8d0 100644 --- a/docs/processes/cldr-icu.md +++ b/docs/processes/cldr-icu.md @@ -27,8 +27,8 @@ All Rights Reserved. # Intro and setup These instructions describe how to regenerate ICU4C locale and linguistic data from CLDR, -and then how to convert that ICU4 data for ICU4J (data jars and maven resources). -They apply to CLDR 44 / ICU 74 and later. +and then how to convert that ICU4C data for ICU4J (data jars and maven resources). +They apply to CLDR 47 / ICU 77 and later. To use these instructions just for generating ICU4J data from ICU4C, you only need to use steps 1, 8, and 12 in the Process section. @@ -37,22 +37,26 @@ The full process requires local copies of * CLDR (the source of most of the data, and some Java tools) * The complete ICU source tree, including: - * tools: includes the LdmlConverter build tool and associated config files - * icu4c: the target for converted CLDR data, and source for ICU4J data; includes tests for the converted data - * icu4j: the target for updated data jars; includes tests for the converted data + * `tools`: includes the `LdmlConverter` build tool and associated config files + * `icu4c`: the target for converted CLDR data, and source for ICU4J data; includes tests for the converted data + * `icu4j`: the target for updated data jars; includes tests for the converted data For an official CLDR data integration into ICU, these should be clean, freshly checked-out. For released CLDR sources, an alternative to checking out sources -for a given version is downloading the zipped sources for the common (core.zip) -and tools (tools.zip) directory subtrees from the Data column in +for a given version is downloading the zipped sources for the common (`core.zip`) +and tools (`tools.zip`) directory subtrees from the Data column in [CLDR Releases/Downloads](https://cldr.unicode.org/index/downloads) -Besides a standard JDK, the process also requires [ant](https://ant.apache.org) and +Besides a standard JDK 11+, the process also requires [ant](https://ant.apache.org) and [maven](https://maven.apache.org) plus the xml-apis.jar from the [Apache xalan package](https://xalan.apache.org/xalan-j/downloads.html) _(Is this -latter requirement still true?)_. You will also need to have performed the +latter requirement still true?)_. + +If you do CLDR development you can configure maven as documented at [CLDR Maven setup](http://cldr.unicode.org/development/maven) (non-Eclipse version). +But for the CLDR to ICU data conversion, or for regular ICU development this is not needed. + Notes: * Enough things can (and will) fail in this process that it is best to @@ -65,12 +69,12 @@ Notes: files are used in addition to the CLDR files as inputs to the CLDR data build process for ICU): * The primary file to edit for adding/removing locales and/or collation and - rbnf data is
- `$TOOLS_ROOT/cldr/cldr-to-icu/build-icu-data.xml`. + `rbnf` data is \ + `$ICU_DIR/tools/cldr/cldr-to-icu/config.xml`. * There are some files in `icu4c/source/data/xml/` that may need editing for - certain additions. This is especially true for brkitr additions; however there - are rbnf files there that add some rules. The collation files there mainly - hook up the UCA collation rules in `icu4c/data/unidata/UCARules.txt` to the + certain additions. This is especially true for `brkitr` additions; however there + are `rbnf` files there that add some rules. The collation files there mainly + hook up the UCA collation rules in `icu4c/source/data/unidata/UCARules.txt` to the collation data. To process these files, certain CLDR dtds are copied over to ICU. @@ -88,14 +92,14 @@ considerations: # CLDR prerequisites for BRS integrations The following tasks should be done in the CLDR repo before beginning a CLDR-ICU -integration that ss part of the BRS process; handle each of these using a separate +integration that is part of the BRS process; handle each of these using a separate ticket and a separate PR: 1. Generate updated CLDR test data (which is copied to ICU), using the process in [Generating CLDR testData](https://docs.google.com/document/d/1-RC99npKcSSwUoYGkSzxaKOe76gYRkWhGdFzCdIBCu4/edit#heading=h.2rum9c6hrr4w) -2. Run CLDRModify with no options with no options and then with -fP. The webpage - for CLDRModify is currently being converted to markdown, a reference to it will +2. Run `CLDRModify` with no options with no options and then with `-fP`. The web page + for `CLDRModify` is currently being converted to markdown, a reference to it will be added when that process is complete. # Environment variables @@ -120,61 +124,61 @@ There are several environment variables that need to be defined. * `CLDR_TMP_DIR`: Parent of temporary CLDR production data. Defaults to `$CLDR_DIR/../cldr-aux` (sibling to `CLDR_DIR`). - - > **NOTE:** As of CLDR 36 and 37, the GenerateProductionData tool no longer + + > **NOTE:** As of CLDR 36 and 37, the `GenerateProductionData` tool no longer generates data by default into `$CLDR_TMP_DIR/production`; instead it generates data into `$CLDR_DIR/../cldr-staging/production` (though there is a command-line option to override this). However the rest of the build still assumes that the generated data is in `$CLDR_TMP_DIR/production`. So `CLDR_TMP_DIR` must be defined to be `CLDR_DIR/../cldr-staging`. - + 3. ICU-related variables - * `ICU4C_DIR`: Path to root of ICU4C sources, below which is the source dir. + * `ICU_DIR`: Path to root of ICU directory, below which are (e.g.) the + `icu4c`, `icu4j`, and `tools` directories. - * `ICU4J_ROOT`: Path to root of ICU4J sources, below which is the main dir. + * `ICU4C_DIR`: Path to root of ICU4C sources, below which is the `source` dir. + + * `ICU4J_ROOT`: Path to root of ICU4J sources, below which is the `main` dir. - * `TOOLS_ROOT`: Path to root of ICU tools directory, below which are (e.g.) the - cldr and unicodetools dirs. - # Process - + ## 1 Environment variables 1a. Java, ant, and maven variables, adjust for your system -``` +```sh export JAVA_HOME=/usr/libexec/java_home export ANT_OPTS="-Xmx8192m" export MAVEN_ARGS="--no-transfer-progress" ``` 1b. CLDR variables, adjust for your setup; with cygwin it might be e.g. -``` +```sh CLDR_DIR=`cygpath -wp /build/cldr` ``` Note that for cldr-staging we do not use personal forks, we commit directly. -``` +```sh export CLDR_DIR=$HOME/cldr-myfork export CLDR_TMP_DIR=$HOME/cldr-staging export CLDR_DATA_DIR=$HOME/cldr-staging/production ``` 1c. ICU variables -``` +```sh export ICU4C_DIR=$HOME/icu-myfork/icu4c export ICU4J_ROOT=$HOME/icu-myfork/icu4j export TOOLS_ROOT=$HOME/icu-myfork/tools ``` 1d. Directory for logs/notes (create if does not exist) -``` +```sh export NOTES=...(some directory)... mkdir -p $NOTES ``` 1e. The name of the icu data directory for Java (for example `icudt74b`) -``` +```sh export ICU_DATA_VER=icudt(version)b ``` @@ -182,10 +186,10 @@ export ICU_DATA_VER=icudt(version)b 2a. Configure ICU4C, build and test without new data first, to verify that there are no pre-existing errors, and to build some tools needed for later -steps. Here `` is the runConfigureICU code for the platform you +steps. Here `` is the `runConfigureICU` code for the platform you are building on, e.g. Linux, macOS, Cygwin. (optionally build with debug enabled) -``` +```sh cd $ICU4C_DIR/source ./runConfigureICU [--enable-debug] make clean @@ -195,7 +199,7 @@ make check 2>&1 | tee $NOTES/icu4c-oldData-makeCheck.txt 2b. Now with ICU4J, build and test without new data first, to verify that there are no pre-existing errors (or at least to have the pre-existing errors as a base for comparison): -``` +```sh cd $ICU4J_ROOT mvn clean mvn verify 2>&1 | tee $NOTES/icu4j-oldData-mvnCheck.txt @@ -210,31 +214,33 @@ cp -p $CLDR_DIR/common/dtd/ldmlICU.dtd $ICU4C_DIR/source/data/dtd/cldr/common/dt ``` 3b. Update the cldr-icu tooling to use the latest tagged version of ICU -``` -open $TOOLS_ROOT/cldr/cldr-to-icu/pom.xml +```sh +open $ICU_DIR/tools/cldr/cldr-to-icu/pom.xml ``` (search for `icu4j-for-cldr` and update to the latest tagged version per instructions) 3c. Update the build for any new icu version, added locales, etc. +```sh +# ICU version +open $ICU_DIR/tools/cldr/cldr-to-icu/pom.xml +# Locales and other configuration changes +open $ICU_DIR/tools/cldr/cldr-to-icu/config.xml ``` -open $TOOLS_ROOT/cldr/cldr-to-icu/build-icu-data.xml -``` -(update icuVersion, icuDataVersion if necessary; update lists of locales to include if necessary) +(update `icuVersion`, `icuDataVersion` if necessary; update lists of locales to include if necessary) 3d. If there are new data types or variants in CLDR, you may need to update the -files that specify mapping of CLDR data to ICU rseources: -``` -open $TOOLS_ROOT/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt -open $TOOLS_ROOT/cldr/cldr-to-icu/src/main/resources/ldml2icu_supplemental.txt +files that specify mapping of CLDR data to ICU resources: +```sh +open $ICU_DIR/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_locale.txt +open $ICU_DIR/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_supplemental.txt ``` ## 4 Build and install CLDR jar -See `$TOOLS_ROOT/cldr/lib/README.txt` for more information on the CLDR -jar and the `install-cldr-jars.sh` script. -``` -cd $TOOLS_ROOT/cldr -ant install-cldr-libs +See `$ICU_DIR/tools/cldr/cldr-to-icu/README.md` for more information on the CLDR jar. +```sh +cd "$CLDR_DIR" +mvn clean install -pl :cldr-all,:cldr-code -DskipTests -DskipITs ``` ## 5 Generate CLDR production data and convert for ICU @@ -247,14 +253,15 @@ This process uses ant with ICU4C's `data/build.xml` (usually `$CLDR_TMP_DIR/production`), required if any CLDR data has changed. * Running `ant setup` is not required, but it will print useful errors to debug issues with your path when it fails. -``` + +```sh cd $ICU4C_DIR/source/data ant cleanprod ant setup ant proddata 2>&1 | tee $NOTES/cldr-newData-proddataLog.txt ``` -> Note, for CLDR development, at this point tests are sometimes run on the +> Note, for CLDR development, at this point tests are sometimes run on the production data, see [BRS: Run tests on production data](https://cldr.unicode.org/development/cldr-big-red-switch/brs-run-tests-on-production-data) @@ -262,26 +269,27 @@ ant proddata 2>&1 | tee $NOTES/cldr-newData-proddataLog.txt These include .txt files and .py files. These new files will replace whatever was already present in the ICU4C sources. This process uses the `LdmlConverter` in -`$TOOLS_ROOT/cldr/cldr-to-icu/`; see `$TOOLS_ROOT/cldr/cldr-to-icu/README.txt`. +`$ICU_DIR/tools/cldr/cldr-to-icu/`; see `$ICU_DIR/tools/cldr/cldr-to-icu/README.md`. * This process will take several minutes, during most of which there will be no log output (so do not assume nothing is happening). Keep a log so you can investigate anything that looks suspicious. -* Note that `ant clean` should _not_ be run before this. The `build-icu-data.xml` process +* The conversion tool will automatically run its own "clean" step to delete files it cannot determine to be ones that it would generate, except for pasts listed in `` elements such as `coll/de__PHONEBOOK.txt`, `coll/de_.txt`, etc. -* Before running ant to regenerate the data, make any necessary changes to the - build-icu-data.xml file, such as adding new locales etc. -``` -cd $TOOLS_ROOT/cldr/cldr-to-icu -ant -f build-icu-data.xml -DcldrDataDir="$CLDR_TMP_DIR/production" | tee $NOTES/cldr-newData-builddataLog.txt +* Before running the tool to regenerate the data, make any necessary changes to the + `config.xml` file, such as adding new locales etc. + +```sh +cd $ICU_DIR/tools/cldr/cldr-to-icu +java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar --cldrDataDir="$CLDR_TMP_DIR/production" | tee $NOTES/cldr-newData-builddataLog.txt ``` 5c. Update the CLDR testData files needed by ICU4C/J tests, ensuring they are representative of the newest CLDR data. -``` -cd $TOOLS_ROOT/cldr +```sh +cd $ICU_DIR/tools/cldr ant copy-cldr-testdata ``` @@ -289,7 +297,7 @@ ant copy-cldr-testdata (This step has been subsumed into 5c above) 5e. For now, manually re-add the `lstm` entries in `data/brkitr/root.txt` -``` +```sh open $ICU4C_DIR/source/data/brkitr/root.txt ``` Paste the following block after the dictionaries block and before the final closing '}': @@ -302,20 +310,20 @@ Paste the following block after the dictionaries block and before the final clos 5f. Update hard-coded lists in ICU -ICU4 has some hard-coded lists of locale-related codes that may need updating. Ideally these should +ICU has some hard-coded lists of locale-related codes that may need updating. Ideally these should be replaced by data converted from CLDR ([ICU-22839](https://unicode-org.atlassian.net/browse/ICU-22839)). In the meantime these need to be updated manually. | code type | icu4c/source library file(s) | icu4c/source test file(s) | | -------------------------------------------------------------------------------------------- | ------------------------------------------- | ------------------------------------------- | -| language
(at least all language codes in ICU locales or CLDR attributeValueValidity.xml) | common/uloc.cpp: LANGUAGES[], LANGUAGES_3[] | test/testdata/structLocale.txt: Languages | -| region
(at least all region codes in ICU locales or CLDR attributeValueValidity.xml) | common/uloc.cpp: COUNTRIES[], COUNTRIES_3[] | test/testdata/structLocale.txt: Countries | -| currency (see note below)
(at least everything in CLDR supplementalData.xml currencyData) | common/ucurr.cpp: gCurrencyList[]] | test/testdata/structLocale.txt: Currencies,CurrencyPlurals
test/cintltst/currtest.c:TestEnumList() | -| timezone | (not currently aware of hard-coded list) | test/testdata/structLocale.txt: zoneStrings | +| language
(at least all language codes in ICU locales or CLDR `attributeValueValidity.xml`) | `common/uloc.cpp`: `LANGUAGES[], LANGUAGES_3[]` | `test/testdata/structLocale.txt`: Languages | +| region
(at least all region codes in ICU locales or CLDR `attributeValueValidity.xml`) | `common/uloc.cpp`: `COUNTRIES[], COUNTRIES_3[]` | `test/testdata/structLocale.txt`: Countries | +| currency (see note below)
(at least everything in CLDR `supplementalData.xml` `currencyData`) | `common/ucurr.cpp`: `gCurrencyList[]]` | `test/testdata/structLocale.txt`: `Currencies`,`CurrencyPlurals`
`test/cintltst/currtest.c`:`TestEnumList()` | +| timezone | (not currently aware of hard-coded list) | `test/testdata/structLocale.txt`: `zoneStrings` | Note: currency code lists are also in other code lists along with measurement units, but these are re-generated using the procedure in -[Updating MeasureUnit with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html) +[Updating `MeasureUnit` with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html) (also mentioned in step 14 below). ## 6 Check the results @@ -323,7 +331,7 @@ but these are re-generated using the procedure in Check which data files have modifications, which have been added or removed (if there are no changes, you may not need to proceed further). Make sure the list seems reasonable. You may want to save logs, and possibly examine them... -``` +```sh cd $ICU4C_DIR/.. git status git status > $NOTES/gitStatusDelta-data.txt @@ -332,7 +340,7 @@ open $NOTES/gitDiffDelta-data.txt ``` 6a. You may also want to check which files were modified in CLDR production data: -``` +```sh cd $CLDR_TMP_DIR git status git status > $NOTES/gitStatusDelta-staging.txt @@ -342,25 +350,25 @@ git diff > $NOTES/gitDiffDelta-staging.txt ## 7 Fix data generation errors Look for evident errors in the list of file changes, or in the file diffs. -Fixing them may entail modifying CLDR source data or `TOOLS_ROOT` config files or +Fixing them may entail modifying CLDR source data or `$ICU_DIR/tools/cldr/cldr-to-icu` config files or tooling. ## 8 Rebuild ICU4C with new data, run tests 8a. Re-run configure and make clean, necessary to handle any files added or deleted: -``` +```sh cd $ICU4C_DIR/source ./runConfigureICU [--enable-debug] make clean ``` 8b. Do the rebuild, keeping a log as before: -``` +```sh make check 2>&1 | tee $NOTES/icu4c-newData-makeCheck.txt ``` To re-run a specific test if necessary when fixing bugs; for example: -``` +```sh cd test/intltest DYLD_LIBRARY_PATH=../../lib:../../stubdata:../../tools/ctestfw:$DYLD_LIBRARY_PATH ./intltest -e -G format/NumberTest/NumberPermutationTest cd ../.. @@ -380,7 +388,8 @@ ticket under which you are performing the integration, if you have one), fix the and regenerate from step 4. If the data is OK , other sources of failure can include: -* Problems with the CLDR-ICU conversion process (pehaps some locale data is not getting + +* Problems with the CLDR-ICU conversion process (perhaps some locale data is not getting converted properly; go back to step 3, adjust and repeat from there. * Problems with ICU library code that may not be using new resources properly. Fix and repeat from step 8. @@ -390,9 +399,9 @@ If the data is OK , other sources of failure can include: you will need to update `icu4c/test/testdata/structLocale.txt` (otherwise `/tsutil/cldrtest/TestLocaleStructure` may fail). -## 10 Running ICU4C tests in exhaustive mode. +## 10 Running ICU4C tests in exhaustive mode -Exhautive tests should always be run for a CLDR-ICU integration PR before it is merged. +Exhaustive tests should always be run for a CLDR-ICU integration PR before it is merged. Once you have a PR, you can do this for both C and J as part of the pre-merge CI tests by manually running a workflow (the exhaustive tests are not run automatically on every PR). See [Continuous Integration / Exhaustive Tests](../userguide/dev/ci.md#exhaustive-tests). @@ -400,7 +409,7 @@ See [Continuous Integration / Exhaustive Tests](../userguide/dev/ci.md#exhaustiv The following instructions run the ICU4C exhaustive tests locally (which you may want to do before even committing changes, or which may be necessary to diagnose failures in the CI tests): -``` +```sh cd $ICU4C_DIR/source export INTLTEST_OPTS="-e" export CINTLTST_OPTS="-e" @@ -415,13 +424,13 @@ appropriate, and repeating from step 4 or 8 as appropriate. ## 12 Transfer the ICU4C data to ICU4J 12a. You need to reconfigure ICU4C to include the unicore data. -``` +```sh cd $ICU4C_DIR/source ICU_DATA_BUILDTOOL_OPTS=--include_uni_core_data ./runConfigureICU ``` 12b. Rebuild the data with the new config setting, then create the ICU4J data jar. -``` +```sh cd $ICU4C_DIR/source/data make clean make -j -l2.5 @@ -429,13 +438,13 @@ make icu4j-data-install ``` 12c. Create the test data jar -``` +```sh cd $ICU4C_DIR/source/test/testdata make icu4j-data-install ``` 12d. Update the extracted {main, test} data files in the Maven build -``` +```sh cd $ICU4J_ROOT ./extract-data-files.sh ``` @@ -443,7 +452,7 @@ cd $ICU4J_ROOT ## 13 Rebuild ICU4J with new data, run tests 13a. Run the tests using the maven build -``` +```sh cd $ICU4J_ROOT mvn clean mvn install 2>&1 | tee $NOTES/icu4j-newData-mvnCheck.txt @@ -451,26 +460,29 @@ mvn install 2>&1 | tee $NOTES/icu4j-newData-mvnCheck.txt It is possible to re-run a specific test class or method if necessary when fixing bugs. -For example (using artifactId, full class name, test all methods): -``` +For example (using `artifactId`, full class name, test all methods): +```sh mvn install -pl :core -Dtest=com.ibm.icu.dev.test.util.LocaleBuilderTest ``` or (example of using module path, class name, one method): -``` +```sh mvn install -pl main/common_tests -Dtest=MeasureUnitTest#TestGreek ``` -13b. Optionally run the tests in exhautive mode +13b. Optionally run the tests in exhaustive mode -Optionally run before committing changes, or run to diagnose failures from -running exhastive CI tests in the PR using `/azp run CI-Exhaustive`: -``` +Optionally run exhaustive tests locally before committing changes: +```sh cd $ICU4J_ROOT mvn install -DICU.exhaustive=10 2>&1 | tee $NOTES/icu4j-newData-mvnCheckEx.txt ``` +Exhaustive tests in CI can be triggered by running the "Exhaustive Tests for ICU" +action from the GitHub web UI. +See [Continuous Integration / Exhaustive Tests](../userguide/dev/ci.md#exhaustive-tests). + Running a specific test is the same as above: -``` +```sh mvn install --pl :core -DICU.exhaustive=10 -Dtest=ExhaustiveNumberTest ``` @@ -482,7 +494,7 @@ step 4, as appropriate, until there are no more failures in ICU4C or ICU4J. Note that certain data changes and related test failures may require the rebuilding of other kinds of data and/or code. For example: -### Updating MeasureUnit code and tests +### Updating `MeasureUnit` code and tests If you see a failure such as ``` @@ -490,7 +502,7 @@ MeasureUnitTest testCLDRUnitAvailability Failure (MeasureUnitTest.java:3410) : U ``` then you will need to update the C and J library and test code for new measurement units, see the procedure at -[Updating MeasureUnit with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html) +[Updating `MeasureUnit` with new CLDR data](https://unicode-org.github.io/icu/processes/release/tasks/updating-measure-unit.html) ### Updating plurals test data @@ -503,12 +515,12 @@ To address these requires updating the LOCALE_SNAPSHOT data in ``` $ICU4J_ROOT/main/common_tests/src/test/java/com/ibm/icu/dev/test/format/PluralRulesTest.java ``` -by modifying the TestLocales() test there to run `generateLOCALE_SNAPSHOT()` and +by modifying the `TestLocales()` test there to run `generateLOCALE_SNAPSHOT()` and then copying in the updated data. ## 15 Check the ICU file changes and commit -``` +```sh cd $ICU4C_DIR/source make clean cd $ICU4J_ROOT @@ -528,13 +540,13 @@ git push origin ICU-nnnnn-branchname (Only for an official integration from CLDR git repositories) 16a. Check cldr-staging changes, and commit -``` +```sh cd $CLDR_TMP_DIR git status ``` Then `git add` or `git rm` files as necessary. Record the changes, commit and push. -``` +```sh git status > $NOTES/gitStatusDelta-production-afterAdd.txt git commit -m 'CLDR-nnnnn production data corresponding to CLDR release-nn-stage' git push origin main @@ -545,8 +557,8 @@ git push origin main (There may be other cldr-staging changes unrelated to production data, such as charts or spec; we want to include them in the tag, so pull first, but log to see what the -chnages are first) -``` +changes are first) +```sh cd $CLDR_TMP_DIR git pull git log @@ -559,7 +571,7 @@ git push --tags We need to tag the main cldr repository. If $CLDR_DIR represents that repository, this is easy: -``` +```sh cd $CLDR_DIR git tag -a "release-nn-stage" -m "CLDR-nnnnn: tag CLDR release-nn-stage" git push --tags @@ -567,7 +579,7 @@ git push --tags However if $CLDR_DIR represents your personal fork or a branch from it, you need to figure out what commit hash yo have integrated, and tag that hash in the main repo. -``` +```sh cd $CLDR_DIR git log ``` @@ -575,7 +587,7 @@ Note the latest commit hash hhhhhhhh... Then switch to the main repo, update it, and tag the appropriate hash (making sure it is in that repo!): -``` +```sh cd $HOME/cldr git pull git log @@ -583,7 +595,7 @@ git tag -a "release-nn-stage" -m "CLDR-nnnnn: tag CLDR release-nn-stage" hhhhhhh git push --tags ``` -## 18 Pubish the cldr tags in github +## 18 Publish the cldr tags in github You should publish the cldr and cldr-staging tags in github. diff --git a/docs/processes/release/tasks/versions.md b/docs/processes/release/tasks/versions.md index f64fc28c94bb..5425302e4f56 100644 --- a/docs/processes/release/tasks/versions.md +++ b/docs/processes/release/tasks/versions.md @@ -53,6 +53,13 @@ need to be correspondingly updated. See below for more files to be updated and s [icu4c/source/data/misc/icuver.txt](https://github.com/unicode-org/icu/blob/main/icu4c/source/data/misc/icuver.txt) needs to be updated with the correct version number for ICU and its data. +#### Since ICU 77 + +The tool takes the `icuVersion` and `icuDataVersion` from the official ICU APIs. +(from the icu4j listed as a dependency of the tool, usually the one you just built from the `icu4j` folder). + +If you need values different than that, you can specify them as the command line parameters (`--icuVersion` and `--icuDataVersion`). + #### Since ICU 68 In @@ -212,8 +219,18 @@ The command requires a version number string that follows the typical Java / Mav 6. cldr-to-icu build tool has a dependency on the icu4j packages which needs to be updated in [`tools/cldr/cldr-to-icu/pom.xml`](https://github.com/unicode-org/icu/blob/main/tools/cldr/cldr-to-icu/pom.xml). Please update it to match the version that was updated in `icu4j/pom.xml` in the steps above. - `74.0.1-SNAPSHOT` +```xml +version>74.0.1-SNAPSHOT +``` +Since ICU 77 this moved to a property: +```xml +77.0.1-SNAPSHOT +``` +Which can be easily be set from command line: +```sh +mvn versions:set-property -Dproperty=icu4j.version -DnewVersion=77.1 -f $ICU_DIR/tools/cldr/cldr-to-icu +``` #### Until ICU 73 (inclusive) diff --git a/icu4c/source/data/unidata/changes.txt b/icu4c/source/data/unidata/changes.txt index 4ceb4278af25..2f8ed7ad8e13 100644 --- a/icu4c/source/data/unidata/changes.txt +++ b/icu4c/source/data/unidata/changes.txt @@ -290,6 +290,8 @@ copying that version number into the $ICU_SRC/.bazeliskrc config file. - run Unicode Tools GenerateUnihanCollators & GenerateUnihanCollatorFiles, check CLDR diffs, copy to CLDR, test CLDR, ... as documented there - generate ICU zh collation data + WARNING: outdated, don't do this, follow the tools/cldr/cldr-to-icu/README.md file! + --- Old text from here: instructions inspired by https://github.com/unicode-org/icu/blob/main/tools/cldr/cldr-to-icu/README.txt and https://github.com/unicode-org/icu/blob/main/icu4c/source/data/cldr-icu-readme.txt diff --git a/tools/cldr/.gitignore b/tools/cldr/.gitignore index ac11dec3002c..5a3fa6f6edc4 100644 --- a/tools/cldr/.gitignore +++ b/tools/cldr/.gitignore @@ -1,9 +1,4 @@ -# Exclude the Maven local repository but keep the lib directory and the top-level readme, scripts and build config. -/lib/** -!/lib/README.txt -!/lib/install-cldr-jars.sh -!/lib/pom.xml - -# Ignore the default Maven target directory. -/cldr-to-icu/target - +# Eclipse IDE generated files +.classpath +.project +.settings/ diff --git a/tools/cldr/build.xml b/tools/cldr/build.xml index 53f815d91e31..b1e1287e4bad 100644 --- a/tools/cldr/build.xml +++ b/tools/cldr/build.xml @@ -3,7 +3,7 @@ - - - - - - - - - - - - - - - diff --git a/tools/cldr/cldr-to-icu/.classpath b/tools/cldr/cldr-to-icu/.classpath deleted file mode 100644 index 6d7587a819e6..000000000000 --- a/tools/cldr/cldr-to-icu/.classpath +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tools/cldr/cldr-to-icu/.project b/tools/cldr/cldr-to-icu/.project deleted file mode 100644 index d3bf0a17a78c..000000000000 --- a/tools/cldr/cldr-to-icu/.project +++ /dev/null @@ -1,23 +0,0 @@ - - - cldr-to-icu - - - - - - org.eclipse.jdt.core.javabuilder - - - - - org.eclipse.m2e.core.maven2Builder - - - - - - org.eclipse.jdt.core.javanature - org.eclipse.m2e.core.maven2Nature - - diff --git a/tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.core.prefs b/tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 714351aec195..000000000000 --- a/tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,5 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 -org.eclipse.jdt.core.compiler.compliance=1.8 -org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning -org.eclipse.jdt.core.compiler.source=1.8 diff --git a/tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.ui.prefs b/tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.ui.prefs deleted file mode 100644 index c6293a5e4184..000000000000 --- a/tools/cldr/cldr-to-icu/.settings/org.eclipse.jdt.ui.prefs +++ /dev/null @@ -1,5 +0,0 @@ -eclipse.preferences.version=1 -org.eclipse.jdt.ui.ignorelowercasenames=true -org.eclipse.jdt.ui.importorder=java;javax;org;com; -org.eclipse.jdt.ui.ondemandthreshold=9999 -org.eclipse.jdt.ui.staticondemandthreshold=9999 diff --git a/tools/cldr/cldr-to-icu/.settings/org.eclipse.m2e.core.prefs b/tools/cldr/cldr-to-icu/.settings/org.eclipse.m2e.core.prefs deleted file mode 100644 index f897a7f1cb23..000000000000 --- a/tools/cldr/cldr-to-icu/.settings/org.eclipse.m2e.core.prefs +++ /dev/null @@ -1,4 +0,0 @@ -activeProfiles= -eclipse.preferences.version=1 -resolveWorkspaceProjects=true -version=1 diff --git a/tools/cldr/cldr-to-icu/README.md b/tools/cldr/cldr-to-icu/README.md index 15d1f4d2f75e..3b1f20a9fc30 100644 --- a/tools/cldr/cldr-to-icu/README.md +++ b/tools/cldr/cldr-to-icu/README.md @@ -6,32 +6,56 @@ License & terms of use: http://www.unicode.org/copyright.html # Basic instructions for running the LdmlConverter via Maven > Note: While this document provides useful background information about the - LdmlConverter, the actual complete process for integrating CLDR data to ICU + `LdmlConverter`, the actual complete process for integrating CLDR data to ICU is described in the document `../../../docs/processes/cldr-icu.md` which is best viewed as [CLDR-ICU integration](https://unicode-org.github.io/icu/processes/cldr-icu.html) +## TLDR + +* Define the `ICU_DIR`, `CLDR_DIR`, and `CLDR_DATA_DIR` environment variables, or (see below) +* Check / update versions +* Build ICU4J: + ```sh + cd "$ICU_DIR" + mvn clean install -f icu4j -DskipTests -DskipITs + ``` +* Build the `cldr-code` library from the `cldr` repo: + ```sh + cd "$CLDR_DIR" + mvn clean install -pl :cldr-all,:cldr-code -DskipTests -DskipITs + ``` +* Build the conversion tool: + ```sh + cd "$ICU_DIR/tools/cldr/cldr-to-icu/" + mvn clean package -DskipTests -DskipITs + ``` +* Run the conversion tool: + ```sh + java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar + ``` + ## Requirements * A CLDR release for supplying CLDR data and the CLDR API. +* JDK 11+ * The Maven build tool -* The Ant build tool (using JDK 11+) ## Important directories | Directory | Description | |-----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `TOOLS_ROOT` | Path to root of ICU tools directory, below which are (e.g.) the `cldr/` and `unicodetools/` directories. | +| `ICU_DIR` | Path to root of ICU directory, below which are (e.g.) the `icu4c/`, `icu4j/` and `tools/` directories. | | `CLDR_DIR` | This is the path to the to root of standard CLDR sources, below which are the `common/` and `tools/` directories. | | `CLDR_DATA_DIR` | The top-level directory for the CLDR production data (typically the "production" directory in the staging repository). Usually generated locally or obtained from: https://github.com/unicode-org/cldr-staging/tree/main/production | In Posix systems, it's best to set these as exported shell variables, and any following instructions assume they have been set accordingly: -``` -$ export TOOLS_ROOT=/path/to/icu/tools -$ export CLDR_DIR=/path/to/cldr -$ export CLDR_DATA_DIR=/path/to/cldr-staging/production +```sh +export TOOLS_ROOT=/path/to/icu/tools +export CLDR_DIR=/path/to/cldr +export CLDR_DATA_DIR=/path/to/cldr-staging/production ``` Note that you should not attempt to use data from the CLDR project directory @@ -40,65 +64,132 @@ relies on a pre-processing step, and the CLDR data must come from the separate "staging" repository (i.e. https://github.com/unicode-org/cldr-staging) or be pre-processed locally into a different directory. +:point_right: **Note**: the 3 folders can also be overridden: + +* with Java properties (e.g. `-DCLDR_DIR=/foo/bar`) +* from the command line when invoking the tool (the `icuDir`, `cldrDir`, and `cldrDataDir` options) ## Initial Setup -This project relies on the Maven build tool for managing dependencies and uses -Ant for configuration purposes, so both will need to be installed. On a Debian +This project relies on the Maven build tool for managing dependencies, so it will need to be installed. On a Debian based system, this should be as simple as: +```sh +sudo apt-get install maven +``` + +## Check / update versions + +### Real versions + +**ICU version (`real_icu_ver`):** +```sh +mvn help:evaluate -Dexpression=project.version -q -DforceStdout -f $ICU_DIR/icu4j ``` -$ sudo apt-get install maven ant + +**CLDR Library version (`real_cldr_ver`):** +```sh +mvn help:evaluate -Dexpression=project.version -q -DforceStdout -f $CLDR_DIR/tools ``` -You must also install an additional CLDR JAR file the local Maven repository at -`$TOOLS_ROOT/cldr/lib` (see the `README.txt` in that directory for more -information). +### Dependency versions +**ICU version used by the cldr conversion tool:** \ +⚠️ **Warning:** Must be the same as `real_icu_ver` +```sh +mvn help:evaluate -Dexpression=icu4j.version -q -DforceStdout -f $ICU_DIR/tools/cldr/cldr-to-icu ``` -$ cd "$TOOLS_ROOT/cldr/lib" -$ ./install-cldr-jars.sh "$CLDR_DIR" + +**CLDR library version used by the cldr conversion tool:** \ +⚠️ **Warning:** Must be the same as `real_cldr_ver` +```sh +mvn help:evaluate -Dexpression=cldr-code.version -q -DforceStdout -f $ICU_DIR/tools/cldr/cldr-to-icu ``` -## Generating all ICU data and source code +**ICU version used by the cldr library:** \ +⚠️ **Warning:** Must be the same as `real_icu_ver` +```sh +mvn help:evaluate -Dexpression=icu4j.version -q -DforceStdout -f $CLDR_DIR/tools +``` +### TLDR (Quick update versions without checking) + +```sh +# Get real versions +real_icu_ver=`mvn help:evaluate -Dexpression=project.version -q -DforceStdout -f $ICU_DIR/icu4j` +echo $real_icu_ver +real_cldr_ver=`mvn help:evaluate -Dexpression=project.version -q -DforceStdout -f $CLDR_DIR/tools` +echo $real_cldr_ver +# Set dependency versions +mvn versions:set-property -Dproperty=icu4j.version -DnewVersion=$real_icu_ver -f $ICU_DIR/tools/cldr/cldr-to-icu +mvn versions:set-property -Dproperty=cldr-code.version -DnewVersion=$real_cldr_ver -f $ICU_DIR/tools/cldr/cldr-to-icu +mvn versions:set-property -Dproperty=icu4j.version -DnewVersion=$real_icu_ver -f $CLDR_DIR/tools ``` -$ cd "$TOOLS_ROOT/cldr/cldr-to-icu" -$ ant -f build-icu-data.xml + +## Build everything + +You must also build and install an additional CLDR library in the the local Maven repository. + +Since that depends on ICU4J, you need to build and install that first. + +Lastly, build the conversion tool + +```sh +# Build ICU4J +cd "$ICU_DIR" +mvn clean install -f icu4j -DskipTests -DskipITs +# Build the CLDR library +cd "$CLDR_DIR" +mvn clean install -pl :cldr-all,:cldr-code -DskipTests -DskipITs +# Build the conversion tool +cd "$ICU_DIR/tools/cldr/cldr-to-icu/" +mvn clean package -DskipTests -DskipITs ``` +## Generating all ICU data and source code + +Run the conversion tool: +```sh +cd "$ICU_DIR/tools/cldr/cldr-to-icu/" +java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar +``` + +You can run it with `--help` for all the options supported. + ## Other Examples * Outputting a subset of the supplemental data into a specified directory: - ``` - $ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DoutputTypes=plurals,dayPeriods -DdontGenCode=true + ```sh + java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar --outDir=/tmp/cldr --outputTypes=plurals,dayPeriods --dontGenCode=true ``` Note: Output types can be listed with mixedCase, lower_underscore or UPPER_UNDERSCORE. Pass `-DoutputTypes=help` to see the full list. * Outputting only a subset of locale IDs (and all the supplemental data): - ``` - $ ant -f build-icu-data.xml -DoutDir=/tmp/cldr -DlocaleIdFilter='(zh|yue).*' -DdontGenCode=true + ```sh + java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar --outDir=/tmp/cldr --outputTypes=plurals,dayPeriods --dontGenCode=true + + java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar --outDir=/tmp/cldr --localeIdFilter='(zh|yue).*' --dontGenCode=true ``` * Overriding the default CLDR version string (which normally matches the CLDR library code): - ``` - $ ant -f build-icu-data.xml -DcldrVersion="36.1" + ```sh + java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar --cldrVersion="36.1" ``` ### Using `alt="ascii"` CLDR alternate values from the CLDR XML CLDR provides alternate values in addition to the default values for locale data. -For example, some locales have time formats using U+202F NARROW NO-BREAK SPACE (NNBSP) between the hours/minutes/seconds and the day periods. +For example, some locales have time formats using U+202F NARROW NO-BREAK SPACE (`NNBSP`) between the hours/minutes/seconds and the day periods. In order to provide the equivalent time formats that use the ASCII space U+0020 SPACE, the alternate values have the extra attribute `alt="ascii"`. Follw these steps to generate ICU data using the ASCII versions of locale data: -1. First, edit the `build-icu-data.xml` file where it mentions `ALTERNATE VALUES` +1. First, edit the `config.xml` file where it mentions `ALTERNATE VALUES` with the correctly annotated source path, target path, and locales list as follows: @@ -150,10 +241,10 @@ as follows: + source="//ldml/dates/calendars/calendar[@type='generic']/dateTimeFormats/availableFormats/dateFormatItem[@id='hms'][@alt='ascii']"/> ``` -1. Then run the generator: +1. Then run the generator: - ``` - $ ant -f build-icu-data.xml + ```sh + java -jar target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar ``` ## Config syntax details @@ -167,19 +258,19 @@ the following excerpt of the DTD schema indicates that there is a default value ``` -See `build-icu-data.xml` for documentation of all options and additional customization. +See `config.xml` for documentation of all options and additional customization. +## Running unit tests (CURRENTLY FAILING) -## Running unit tests - -``` -$ mvn test -DCLDR_DIR="$CLDR_DATA_DIR" +```sh +mvn test -DCLDR_DIR="$CLDR_DATA_DIR" ``` - ## Importing and running from an IDE This project should be easy to import into an IDE which supports Maven development, such as IntelliJ or Eclipse. It uses a local Maven repository directory for the unpublished CLDR libraries (which are included in the project), but otherwise gets all dependencies via Maven's public repositories. + +But before importing and running it you still need to build the ICU4J and the CLDR library (see above). diff --git a/tools/cldr/cldr-to-icu/README.txt b/tools/cldr/cldr-to-icu/README.txt deleted file mode 100644 index d675c1d078be..000000000000 --- a/tools/cldr/cldr-to-icu/README.txt +++ /dev/null @@ -1,11 +0,0 @@ -********************************************************************* -*** © 2019 and later: Unicode, Inc. and others. *** -*** License & terms of use: http://www.unicode.org/copyright.html *** -********************************************************************* - -The instructions for the LdmlConverter tool (a.k.a. CLDR-to-ICU converter) have -moved to README.md in this directory. - -Please read README.md, or better yet, view the rendered form of its Markdown -contents online at Github -(ex: https://github.com/unicode-org/icu/tree/main/tools/cldr/cldr-to-icu) diff --git a/tools/cldr/cldr-to-icu/build-icu-data.xml b/tools/cldr/cldr-to-icu/build-icu-data.xml deleted file mode 100644 index 69bf7f577cb0..000000000000 --- a/tools/cldr/cldr-to-icu/build-icu-data.xml +++ /dev/null @@ -1,472 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // A - af, agq, ak, am, ar, ars, as, asa, ast, az, az_AZ, az_Cyrl - - // B - bas, be, bem, bez, bg, bgc, bho, blo, bm, bn, bo, br, brx, bs, bs_BA, bs_Cyrl - - // C - ca, ccp, ce, ceb, cgg, chr, ckb, cs, csw, cv, cy - - // D - da, dav, de, dje, doi, dsb, dua, dyo, dz - - // E - ebu, ee, el, en, en_NH, en_RH, eo, es, et, eu, ewo - - // F - fa, ff, ff_Adlm, ff_CM, ff_GN, ff_MR, ff_SN, fi, fil, fo, fr, fur, fy - - // G - ga, gaa, gd, gl, gsw, gu, guz, gv - - // H - ha, haw, he, hi, hi_Latn, hr, hsb, hu, hy - - // I - ia, id, ie, ig, ii, in, in_ID, is, it, iw, iw_IL - - // J - ja, jgo, jmc, jv - - // K - ka, kab, kam, kde, kea, kgp, khq, ki, kk, kkj, kl, kln, km, kn, ko, kok, kok_Latn, ks - ks_Deva, ks_IN, ksb, ksf, ksh, ku, kw, kxv, kxv_Deva, kxv_IN, kxv_Orya, kxv_Telu, ky - - // L - lag, lb, lg, lij, lkt, lmo, ln, lo, lrc, lt, lu, luo, luy, lv - - // M - mai, mas, mer, mfe, mg, mgh, mgo, mi, mk, ml, mn, mni, mni_IN, mo, mr, ms - mt, mua, my, mzn - - // N - naq, nb, nd, nds, ne, nl, nmg, nn, nnh, no, no_NO, no_NO_NY, nqo, nso, nus, nyn - - // O - oc, om, or, os - - // P - pa, pa_Arab, pa_IN, pa_PK, pcm, pl, prg, ps, pt - - // Q - qu - - // R - raj, rm, rn, ro, rof, ru, rw, rwk - - // S - sa, sah, saq, sat, sat_IN, sbp, sc, sd, sd_Deva, sd_IN, sd_PK, se, seh, ses, sg, sh, sh_BA, sh_CS, sh_YU - shi, shi_Latn, shi_MA, si, sk, sl, smn, sn, so, sq, sr, sr_BA, sr_CS, sr_Cyrl_CS, sr_Cyrl_YU, sr_Latn - sr_Latn_CS, sr_Latn_YU, sr_ME, sr_RS, sr_XK, sr_YU, st, su, su_ID, sv, sw, syr, szl - - // T - ta, te, teo, tg, th, ti, tk, tl, tl_PH, tn, to, tok, tr, tt, twq, tzm - - // U - ug, uk, ur, uz, uz_AF, uz_Arab, uz_Cyrl, uz_UZ - - // V - vai, vai_LR, vai_Latn, vec, vi, vmw, vun - - // W - wae, wo - - // X - xh, xnr, xog - - // Y - yav, yi, yo, yrl, yue, yue_CN, yue_HK, yue_Hans - - // Z - za, zgh, zh, zh_CN, zh_HK, zh_Hant, zh_MO, zh_SG, zh_TW, zu - - - - - - - - - - - - - - - - - - - root, - - // A-B - af, am, ars, ar, as, az, be, bg, bn, bo, br, bs_Cyrl, bs, - - // C-F - ca, ceb, chr, cs, cy, da, de_AT, de, dsb, dz, ee, el, en, - en_US_POSIX, en_US, eo, es, et, fa_AF, fa, ff_Adlm, ff, fil, fi, fo, fr_CA, fr, fy, - - // G-J - ga, gl, gu, ha, haw, he, hi, hr, hsb, hu, hy, - id_ID, id, ig, in, in_ID, is, it, iw_IL, iw, ja, - - // K-P - ka, kk, kl, km, kn, kok, ko, ku, ky, lb, lij, lkt, ln, lo, lt, lv, - mk, ml, mn, mo, mr, ms, mt, my, nb, nb_NO, ne, nl, nn, no, no_NO, nso, - om, or, pa_IN, pa, pa_Guru, pl, ps, pt, - - // R-T - ro, ru, sa, se, sh_BA, sh_CS, sh, sh_YU, si, sk, sl, smn, sq, - sr_BA, sr_Cyrl_ME, sr_Latn, sr_ME, sr_RS, sr, st, sv, sw, - ta, te, th, tk, tn, to, tr, - - // U-Z - ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans - yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu - - - - - - - - - - root, - - // A-E - af, ak, am, ars, ar, az, be, bg, bs, ca, ccp, chr, cs, cy, - da, de_CH, de, ee, el, en_001, en_IN, en, eo, es_419, es_DO, - es_GT, es_HN, es_MX, es_NI, es_PA, es_PR, es_SV, es, es_US, et, - - // F-P - fa_AF, fa, ff, fil, fi, fo, fr_BE, fr_CH, fr, ga, he, hi, hr, - hu, hy, id, in, is, it, iw, ja, ka, kk, kl, km, ko, ky, lb, - lo, lrc, lt, lv, mk, ms, mt, my, nb, ne, nl, nn, no, pl, pt_PT, pt, - - // Q-Z - qu, ro, ru, se, sh, sk, sl, sq, sr_Latn, sr, su, sv, sw, ta, th, tr, - uk, vec, vi, yue_Hans, yue, zh_Hant_HK, zh_Hant, zh_HK, zh_MO, zh_TW, zh - - - - - - root, - de, el, en, en_US_POSIX, en_US, es, fi, fr, it, ja, ko, pt, ru, sv, zh_Hant, zh - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tools/cldr/cldr-to-icu/config.xml b/tools/cldr/cldr-to-icu/config.xml new file mode 100644 index 000000000000..e5be7e6b03dc --- /dev/null +++ b/tools/cldr/cldr-to-icu/config.xml @@ -0,0 +1,295 @@ + + + + + + + + // A + af, agq, ak, am, ar, ars, as, asa, ast, az, az_AZ, az_Cyrl + + // B + bas, be, bem, bez, bg, bgc, bho, blo, bm, bn, bo, br, brx, bs, bs_BA, bs_Cyrl + + // C + ca, ccp, ce, ceb, cgg, chr, ckb, cs, csw, cv, cy + + // D + da, dav, de, dje, doi, dsb, dua, dyo, dz + + // E + ebu, ee, el, en, en_NH, en_RH, eo, es, et, eu, ewo + + // F + fa, ff, ff_Adlm, ff_CM, ff_GN, ff_MR, ff_SN, fi, fil, fo, fr, fur, fy + + // G + ga, gaa, gd, gl, gsw, gu, guz, gv + + // H + ha, haw, he, hi, hi_Latn, hr, hsb, hu, hy + + // I + ia, id, ie, ig, ii, in, in_ID, is, it, iw, iw_IL + + // J + ja, jgo, jmc, jv + + // K + ka, kab, kam, kde, kea, kgp, khq, ki, kk, kkj, kl, kln, km, kn, ko, kok, kok_Latn, ks + ks_Deva, ks_IN, ksb, ksf, ksh, ku, kw, kxv, kxv_Deva, kxv_IN, kxv_Orya, kxv_Telu, ky + + // L + lag, lb, lg, lij, lkt, lmo, ln, lo, lrc, lt, lu, luo, luy, lv + + // M + mai, mas, mer, mfe, mg, mgh, mgo, mi, mk, ml, mn, mni, mni_IN, mo, mr, ms + mt, mua, my, mzn + + // N + naq, nb, nd, nds, ne, nl, nmg, nn, nnh, no, no_NO, no_NO_NY, nqo, nso, nus, nyn + + // O + oc, om, or, os + + // P + pa, pa_Arab, pa_IN, pa_PK, pcm, pl, prg, ps, pt + + // Q + qu + + // R + raj, rm, rn, ro, rof, ru, rw, rwk + + // S + sa, sah, saq, sat, sat_IN, sbp, sc, sd, sd_Deva, sd_IN, sd_PK, se, seh, ses, sg, sh, sh_BA, sh_CS, sh_YU + shi, shi_Latn, shi_MA, si, sk, sl, smn, sn, so, sq, sr, sr_BA, sr_CS, sr_Cyrl_CS, sr_Cyrl_YU, sr_Latn + sr_Latn_CS, sr_Latn_YU, sr_ME, sr_RS, sr_XK, sr_YU, st, su, su_ID, sv, sw, syr, szl + + // T + ta, te, teo, tg, th, ti, tk, tl, tl_PH, tn, to, tok, tr, tt, twq, tzm + + // U + ug, uk, ur, uz, uz_AF, uz_Arab, uz_Cyrl, uz_UZ + + // V + vai, vai_LR, vai_Latn, vec, vi, vmw, vun + + // W + wae, wo + + // X + xh, xnr, xog + + // Y + yav, yi, yo, yrl, yue, yue_CN, yue_HK, yue_Hans + + // Z + za, zgh, zh, zh_CN, zh_HK, zh_Hant, zh_MO, zh_SG, zh_TW, zu + + + + + + + + + + + + + + + + + + root, + + // A-B + af, am, ars, ar, as, az, be, bg, bn, bo, br, bs_Cyrl, bs, + + // C-F + ca, ceb, chr, cs, cy, da, de_AT, de, dsb, dz, ee, el, en, + en_US_POSIX, en_US, eo, es, et, fa_AF, fa, ff_Adlm, ff, fil, fi, fo, fr_CA, fr, fy, + + // G-J + ga, gl, gu, ha, haw, he, hi, hr, hsb, hu, hy, + id_ID, id, ig, in, in_ID, is, it, iw_IL, iw, ja, + + // K-P + ka, kk, kl, km, kn, kok, ko, ku, ky, lb, lij, lkt, ln, lo, lt, lv, + mk, ml, mn, mo, mr, ms, mt, my, nb, nb_NO, ne, nl, nn, no, no_NO, nso, + om, or, pa_IN, pa, pa_Guru, pl, ps, pt, + + // R-T + ro, ru, sa, se, sh_BA, sh_CS, sh, sh_YU, si, sk, sl, smn, sq, + sr_BA, sr_Cyrl_ME, sr_Latn, sr_ME, sr_RS, sr, st, sv, sw, + ta, te, th, tk, tn, to, tr, + + // U-Z + ug, uk, ur, uz, vi, wae, wo, xh, yi, yo, yue_CN, yue_Hans_CN, yue_Hans + yue_Hant, yue, zh_CN, zh_Hans, zh_Hant, zh_HK, zh_MO, zh_SG, zh_TW, zh, zu + + + + + + + + + + root, + + // A-E + af, ak, am, ars, ar, az, be, bg, bs, ca, ccp, chr, cs, cy, + da, de_CH, de, ee, el, en_001, en_IN, en, eo, es_419, es_DO, + es_GT, es_HN, es_MX, es_NI, es_PA, es_PR, es_SV, es, es_US, et, + + // F-P + fa_AF, fa, ff, fil, fi, fo, fr_BE, fr_CH, fr, ga, he, hi, hr, + hu, hy, id, in, is, it, iw, ja, ka, kk, kl, km, ko, ky, lb, + lo, lrc, lt, lv, mk, ms, mt, my, nb, ne, nl, nn, no, pl, pt_PT, pt, + + // Q-Z + qu, ro, ru, se, sh, sk, sl, sq, sr_Latn, sr, su, sv, sw, ta, th, tr, + uk, vec, vi, yue_Hans, yue, zh_Hant_HK, zh_Hant, zh_HK, zh_MO, zh_TW, zh + + + + + + root, + de, el, en, en_US_POSIX, en_US, es, fi, fr, it, ja, ko, pt, ru, sv, zh_Hant, zh + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/cldr/cldr-to-icu/pom.xml b/tools/cldr/cldr-to-icu/pom.xml index c35af50b0615..2fb61ec8b4fa 100644 --- a/tools/cldr/cldr-to-icu/pom.xml +++ b/tools/cldr/cldr-to-icu/pom.xml @@ -9,71 +9,60 @@ 4.0.0 - - org.unicode.icu - cldr-lib - 1.0 - ../lib - + org.unicode.icu + cldr-to-icu + 1.0-SNAPSHOT UTF-8 + + 11 + 11 + + 76.1 + 47.0-SNAPSHOT + 32.1.1-jre + 1.4.4 + 1.9.0 - cldr-to-icu - 1.0-SNAPSHOT org.apache.maven.plugins maven-compiler-plugin - 3.5.1 - - 8 - 8 - - - - org.codehaus.mojo - exec-maven-plugin - 1.6.0 + 3.13.0 - - org.unicode.icu.tool.cldrtoicu.LdmlConverter - - - - ICU_DIR - ${project.basedir}/../../.. - - + ${maven.compiler.source} + ${maven.compiler.target} org.apache.maven.plugins maven-assembly-plugin - 3.1.1 + 3.7.1 compile single - - - - - org.unicode.icu.tool.cldrtoicu.LdmlConverter - - - - - jar-with-dependencies - - + + + + + org.unicode.icu.tool.cldrtoicu.Cldr2Icu + + + + + jar-with-dependencies + + @@ -83,11 +72,16 @@ com.ibm.icu icu4j - 76.1 - + ${icu4j.version} + + + + org.unicode.cldr + cldr-code + ${cldr-code.version} - org.apache.ant - ant - 1.10.11 + commons-cli + commons-cli + ${commons-cli.version} com.google.truth truth - 1.0 - test - - - com.google.truth.extensions - truth-java8-extension - 1.0 + ${truth.version} test - - - githubcldr - GitHub unicode-org/icu Apache Maven Packages - https://maven.pkg.github.com/unicode-org/icu - - diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java new file mode 100644 index 000000000000..6a7618847e8c --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2Icu.java @@ -0,0 +1,71 @@ +// © 2024 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import org.unicode.icu.tool.cldrtoicu.ant.CleanOutputDirectoryTask; +import org.unicode.icu.tool.cldrtoicu.ant.ConvertIcuDataTask; +import org.unicode.icu.tool.cldrtoicu.ant.GenerateCodeTask; + +public class Cldr2Icu { + private final Cldr2IcuCliOptions options = new Cldr2IcuCliOptions(); + + private void convert() { + ConvertIcuDataTask convert = ConvertIcuDataTask.fromXml(options.xmlConfig); + + convert.setCldrDir(options.cldrDataDir); + convert.setOutputDir(options.outDir); + convert.setSpecialsDir(options.specialsDir); + convert.setOutputTypes(options.outputTypes); + convert.setIcuVersion(options.icuVersion); + convert.setIcuDataVersion(options.icuDataVersion); + convert.setCldrVersion(options.cldrVersion); + convert.setMinimalDraftStatus(options.minDraftStatus); + convert.setLocaleIdFilter(options.localeIdFilter); + convert.setIncludePseudoLocales(options.includePseudoLocales); + convert.setEmitReport(options.emitReport); + + convert.init(); + convert.execute(); + } + + private void generateCode(String action) { + GenerateCodeTask generateCode = new GenerateCodeTask(); + + generateCode.setCldrDir(options.cldrDataDir); + generateCode.setCOutDir(options.genCCodeDir); + generateCode.setJavaOutDir(options.genJavaCodeDir); + generateCode.setAction(action); + + generateCode.init(); + generateCode.execute(); + } + + private void outputDirectories() { + CleanOutputDirectoryTask clean = CleanOutputDirectoryTask.fromXml(options.xmlConfig); + + clean.setRoot(options.outDir); + clean.setForceDelete(options.forceDelete); + + clean.init(); + clean.execute(); + } + + private void clean() { + outputDirectories(); + generateCode("clean"); + } + + private void generate() { + convert(); + if (!options.dontGenCode) { + generateCode(null); + } + } + + public static void main(String[] args) { + Cldr2Icu self = new Cldr2Icu(); + self.options.processArgs(args); + self.clean(); + self.generate(); + } +} diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java new file mode 100644 index 000000000000..4edf5cdfefd0 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/Cldr2IcuCliOptions.java @@ -0,0 +1,401 @@ +// © 2024 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu; + +import java.io.File; +import java.util.Arrays; +import java.util.StringJoiner; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.unicode.icu.tool.cldrtoicu.LdmlConverter.OutputType; + +import com.ibm.icu.util.VersionInfo; + +class Cldr2IcuCliOptions { + private static final String HELP = "help"; + private static final String HELP_DESC = "this text"; + + private static final String ICU_DIR = "icuDir"; + private static final String ICU_DIR_DESC = "Path top level ICU directory" + + " (containing `.git`, `icu4c`, `icu4j`, `tools` directories)"; + private static final String ICU_DIR_DEFAULT = "${environ.ICU_DIR}"; + String icuDir; + + private static final String CLDR_DIR = "cldrDir"; + private static final String CLDR_DIR_DESC = "This is the path to the to root of standard CLDR sources," + + " (containing `common` and `tools` directories)."; + private static final String CLDR_DIR_DEFAULT = "${environ.CLDR_DIR}"; + String cldrDir; + + private static final String CLDR_DATA_DIR = "cldrDataDir"; + private static final String CLDR_DATA_DIR_DESC = "The top-level directory for the CLDR production data" + + " (typically the `production` directory in the staging repository)." + + " Usually generated locally or obtained from https://github.com/unicode-org/cldr-staging/tree/main/production"; + private static final String CLDR_DATA_DIR_DEFAULT = "${environ.CLDR_DATA_DIR}"; + String cldrDataDir; + + private static final String OUT_DIR = "outDir"; + final private static String OUT_DIR_DESC = "The output directory into which to write the converted ICU data. By default" + + " this will overwrite (without deletion) the ICU data files in this ICU release," + + " so it is recommended that for testing, it be set to another value."; + final private static String OUT_DIR_DEFAULT = "${icuDir}/icu4c/source/data"; + String outDir; + + private static final String GEN_C_CODE_DIR = "genCCodeDir"; + private static final String GEN_C_CODE_DIR_DESC = "The output directory into which to write generated C/C++ code." + + " By default this will overwrite (without deletion) the generated C/C++ files in this ICU release," + + " so it is recommended that for testing, it be set to another value."; + private static final String GEN_C_CODE_DIR_DEFAULT = "${icuDir}/icu4c/source"; + String genCCodeDir; + + private static final String GEN_JAVA_CODE_DIR = "genJavaCodeDir"; + private static final String GEN_JAVA_CODE_DIR_DESC = "The output directory into which to write generated Java code." + + " By default this will overwrite (without deletion) the generated Java files in this ICU release," + + " so it is recommended that for testing, it be set to another value."; + private static final String GEN_JAVA_CODE_DIR_DEFAULT = "${icuDir}/icu4j/main/core"; + String genJavaCodeDir; + + private static final String DONT_GEN_CODE = "dontGenCode"; + private static final String DONT_GEN_CODE_DESC = "Set this to true to prevent the generation of" + + " ICU source files"; + private static final String DONT_GEN_CODE_DEFAULT = "false"; + boolean dontGenCode; + + private static final String SPECIALS_DIR = "specialsDir"; + private static final String SPECIALS_DIR_DESC = "The directory in which the additional ICU XML data is stored."; + private static final String SPECIALS_DIR_DEFAULT = "${icuDir}/icu4c/source/data/xml"; + String specialsDir; + + private static final String ICU_VERSION = "icuVersion"; + private static final String ICU_VERSION_DESC = "Default value for ICU version (`icuver.txt`)." + + " Update this for each release."; + private static final String ICU_VERSION_DEFAULT = VersionInfo.ICU_VERSION.toString(); + String icuVersion; + + private static final String ICU_DATA_VERSION = "icuDataVersion"; + private static final String ICU_DATA_VERSION_DESC = "Default value for ICU data version (`icuver.txt`)." + + " Update this for each release."; + private static final String ICU_DATA_VERSION_DEFAULT = VersionInfo.ICU_DATA_VERSION.toString(); + String icuDataVersion; + + private static final String CLDR_VERSION = "cldrVersion"; + private static final String CLDR_VERSION_DESC = "An override for the CLDR version string (`icuver.txt` and others)." + + " This will be extracted from the CLDR library used for building the data if not set here."; + private static final String CLDR_VERSION_DEFAULT = ""; + String cldrVersion; + + private static final String MIN_DRAFT_STATUS = "minDraftStatus"; + private static final String MIN_DRAFT_STATUS_DESC = "The minimum draft status for CLDR data to be used in the conversion." + + " See CldrDraftStatus for more details."; + private static final String MIN_DRAFT_STATUS_DEFAULT = "CONTRIBUTED"; + String minDraftStatus; + + private static final String LOCALE_ID_FILTER = "localeIdFilter"; + private static final String LOCALE_ID_FILTER_DESC = "A regular expression to match the locale IDs to be generated" + + " (useful for debugging specific regions). This is applied after locale ID specifications" + + " have been expanded into full locale IDs, so the value `en` will NOT match `en_GB` or `en_001` etc."; + private static final String LOCALE_ID_FILTER_DEFAULT = ""; + String localeIdFilter; + + private static final String INCLUDE_PSEUDO_LOCALES = "includePseudoLocales"; + private static final String INCLUDE_PSEUDO_LOCALES_DESC = "Whether to synthetically generate \"pseudo locale\" data" + + " (`en_XA` and `ar_XB`)."; + private static final String INCLUDE_PSEUDO_LOCALES_DEFAULT = "false"; + boolean includePseudoLocales; + + private static final String EMIT_REPORT = "emitReport"; + private static final String EMIT_REPORT_DESC = "Whether to emit a debug report containing some possibly" + + " useful information after the conversion has finished."; + private static final String EMIT_REPORT_DEFAULT = "false"; + boolean emitReport; + + private static final String OUTPUT_TYPES = "outputTypes"; + private static final String OUTPUT_TYPES_DESC = "List of output \"types\" to be generated (e.g. `rbnf,plurals,locales`);" + + " an empty list means \"build everything\".\n" + + "Note that the grouping of types is based on the legacy converter behaviour and" + + " is not always directly associated with an output directory (e.g. \"locales\") produces locale data" + + " for `curr/`, `lang/`, `main/`, `region/`, `unit/`, `zone/` but NOT `coll/`, `brkitr/` or `rbnf/`).\n" + // It would be nice to initialize this from OutputType, but to do that we need to read an XML file, + // so we need to know what the cldrDir folder is. But we only know that AFTER we parse the command line. + + "Use outputTypesList to get a list of currently know values."; + private static final String OUTPUT_TYPES_DEFAULT = ""; + String outputTypes; + + private static final String OUTPUT_TYPES_LIST = "outputTypesList"; + private static final String OUTPUT_TYPES_LIST_DESC = "Show the complete list of knonw output types and exit."; + private static final String OUTPUT_TYPES_LIST_DEFAULT = "false"; + + private static final String FORCE_DELETE = "forceDelete"; + private static final String FORCE_DELETE_DESC = "Override to force the 'clean' task to delete files it cannot" + + " determine to be auto-generated by this tool. This is useful if the file header changes since" + + " the heading is what's used to recognize auto-generated files."; + private static final String FORCE_DELETE_DEFAULT = "false"; + boolean forceDelete; + + private static final String XML_CONFIG = "xmlConfig"; + private static final String XML_CONFIG_DESC = "Override to force the 'clean' task to delete files it cannot" + + " determine to be auto-generated by this tool. This is useful if the file header changes since" + + " the heading is what's used to recognize auto-generated files."; + private static final String XML_CONFIG_DEFAULT = "${icuDir}/tools/cldr/cldr-to-icu/config.xml"; + String xmlConfig; + + // These must be kept in sync with getOptions(). + private static final Options options = new Options() + .addOption(Option.builder() + .longOpt(HELP) + .desc(HELP_DESC) + .build()) + .addOption(Option.builder() + .longOpt(ICU_DIR) + .hasArg() + .argName("path") + .desc(descWithDefault(ICU_DIR_DESC, ICU_DIR_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(CLDR_DIR) + .hasArg() + .argName("path") + .desc(descWithDefault(CLDR_DIR_DESC, CLDR_DIR_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(CLDR_DATA_DIR) + .hasArg() + .argName("path") + .desc(descWithDefault(CLDR_DATA_DIR_DESC, CLDR_DATA_DIR_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(OUT_DIR) + .hasArg() + .argName("path") + .desc(descWithDefault(OUT_DIR_DESC, OUT_DIR_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(GEN_C_CODE_DIR) + .hasArg() + .argName("path") + .desc(descWithDefault(GEN_C_CODE_DIR_DESC, GEN_C_CODE_DIR_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(GEN_JAVA_CODE_DIR) + .hasArg() + .argName("path") + .desc(descWithDefault(GEN_JAVA_CODE_DIR_DESC, GEN_JAVA_CODE_DIR_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(DONT_GEN_CODE) + .desc(descWithDefault(DONT_GEN_CODE_DESC, DONT_GEN_CODE_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(SPECIALS_DIR) + .hasArg() + .argName("path") + .desc(descWithDefault(SPECIALS_DIR_DESC, SPECIALS_DIR_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(OUTPUT_TYPES) + .hasArg() + .argName("out_types") + .desc(descWithDefault(OUTPUT_TYPES_DESC, OUTPUT_TYPES_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(OUTPUT_TYPES_LIST) + .desc(descWithDefault(OUTPUT_TYPES_LIST_DESC, OUTPUT_TYPES_LIST_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(ICU_VERSION) + .hasArg() + .argName("version") + .desc(descWithDefault(ICU_VERSION_DESC, ICU_VERSION_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(ICU_DATA_VERSION) + .hasArg() + .argName("version") + .desc(descWithDefault(ICU_DATA_VERSION_DESC, ICU_DATA_VERSION_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(CLDR_VERSION) + .hasArg() + .argName("version") + .desc(descWithDefault(CLDR_VERSION_DESC, CLDR_VERSION_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(MIN_DRAFT_STATUS) + .hasArg() + .argName("draft_status") + .desc(descWithDefault(MIN_DRAFT_STATUS_DESC, MIN_DRAFT_STATUS_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(LOCALE_ID_FILTER) + .hasArg() + .argName("locale_list") + .desc(descWithDefault(LOCALE_ID_FILTER_DESC, LOCALE_ID_FILTER_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(INCLUDE_PSEUDO_LOCALES) + .desc(descWithDefault(INCLUDE_PSEUDO_LOCALES_DESC, INCLUDE_PSEUDO_LOCALES_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(EMIT_REPORT) + .desc(descWithDefault(EMIT_REPORT_DESC, EMIT_REPORT_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(FORCE_DELETE) + .desc(descWithDefault(FORCE_DELETE_DESC, FORCE_DELETE_DEFAULT)) + .build()) + .addOption(Option.builder() + .longOpt(XML_CONFIG) + .hasArg() + .argName("path") + .desc(descWithDefault(XML_CONFIG_DESC, XML_CONFIG_DEFAULT)) + .build()) + ; + + void processArgs(String[] args) { + CommandLine cli = null; + try{ + CommandLineParser parser = new DefaultParser(); + cli = parser.parse(options, args); + } catch (Exception e){ + cli = CommandLine.builder().build(); + showUsageAndExit(); + } + if (cli.hasOption(HELP)) { + showUsageAndExit(); + } + + icuDir = cli.getOptionValue(ICU_DIR, icuDir); + cldrDir = cli.getOptionValue(CLDR_DIR, cldrDir); + cldrDataDir = cli.getOptionValue(CLDR_DATA_DIR, cldrDataDir); + + outDir = cli.getOptionValue(OUT_DIR, expandFolders(OUT_DIR_DEFAULT)); + genCCodeDir = cli.getOptionValue(GEN_C_CODE_DIR, expandFolders(GEN_C_CODE_DIR_DEFAULT)); + genJavaCodeDir = cli.getOptionValue(GEN_JAVA_CODE_DIR, expandFolders(GEN_JAVA_CODE_DIR_DEFAULT)); + dontGenCode = cli.hasOption(DONT_GEN_CODE); + specialsDir = cli.getOptionValue(SPECIALS_DIR, expandFolders(SPECIALS_DIR_DEFAULT)); + outputTypes = cli.getOptionValue(OUTPUT_TYPES, ""); // empty means all + icuVersion = cli.getOptionValue(ICU_VERSION, ICU_VERSION_DEFAULT); + icuDataVersion = cli.getOptionValue(ICU_DATA_VERSION, ICU_DATA_VERSION_DEFAULT); + cldrVersion = cli.getOptionValue(CLDR_VERSION, CLDR_VERSION_DEFAULT); + minDraftStatus = cli.getOptionValue(MIN_DRAFT_STATUS, MIN_DRAFT_STATUS_DEFAULT); + localeIdFilter = cli.getOptionValue(LOCALE_ID_FILTER, LOCALE_ID_FILTER_DEFAULT); + includePseudoLocales = cli.hasOption(INCLUDE_PSEUDO_LOCALES); + emitReport = cli.hasOption(EMIT_REPORT); + forceDelete = cli.hasOption(FORCE_DELETE); + xmlConfig = cli.getOptionValue(XML_CONFIG, expandFolders(XML_CONFIG_DEFAULT)); + + if (cli.hasOption(OUTPUT_TYPES_LIST)) { + OutputType[] outTypesToSort = OutputType.values(); + Arrays.sort(outTypesToSort, (o1, o2) -> o1.name().compareTo(o2.name())); + StringJoiner strOutType = new StringJoiner(", "); + for (OutputType ot : outTypesToSort) { + strOutType.add(ot.name()); + } + System.out.println("Known output types: " + strOutType); + System.exit(2); + } + } + + private static String descWithDefault(String description, String defaultValue) { + if (defaultValue != null) { + return description + "\nDefaults to: \"" + defaultValue + "\""; + } else { + return description; + } + } + + private void showUsageAndExit() { + String thisClassName = Cldr2Icu.class.getCanonicalName(); + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp( + /*width*/ 120, + /*cmdLineSyntax*/ thisClassName + " [OPTIONS]\n", + /*header*/ "\n" + + "This program is used to convert CLDR xml files to ICU ResourceBundle txt files.\n" + + "Options:", + options, + /*footer*/ "\nExample: " + thisClassName + " --outDir /tmp/debug --localeIdFilter=fr"); + System.exit(-1); + } + + Cldr2IcuCliOptions() { + // This will initialize icuDir, cldrDir, and cldrDataDir from environment variables + validateEnvironment(); + } + + String expandFolders(String str) { + return str + .replace("${icuDir}", icuDir) + .replace("${cldrDir}", cldrDir) + .replace("${cldrDataDir}", cldrDataDir); + } + + // For certain things we want to check both the environment, and Java properties + // (passed with -Dkey=value) + // The property takes precedence. + private static String getEnvironOrProperty(String key) { + String result = System.getProperty(key); + if (result == null) { + result = System.getenv(key); + } + return result; + } + + // Check that the environment variables point to the proper `icu` / `cldr` / `cldr-staging` folders + private void validateEnvironment() { + icuDir = getEnvironOrProperty("ICU_DIR"); + cldrDir = getEnvironOrProperty("CLDR_DIR"); + cldrDataDir = getEnvironOrProperty("CLDR_DATA_DIR"); + + String icuMessage = "Set the ICU_DIR environment variable to the top level ICU directory (containing `.git`, `icu4c`, `icu4j`, `tools` directories)"; + String cldrMessage = "Set the CLDR_DIR environment variable to the top level CLDR directory (containing `common` and `tools` directories)"; + String cldrDataMessage = "Set the CLDR_DATA_DIR environment variable to the top level CLDR production data directory (typically the `production` directory in the staging repository)\n" + + "Usually generated locally or obtained from: https://github.com/unicode-org/cldr-staging/tree/main/production"; + if (icuDir == null) { + System.err.println(icuMessage); + System.exit(1); + } + if (cldrDir == null) { + System.err.println(cldrMessage); + System.exit(1); + } + if (cldrDataDir == null) { + System.err.println(cldrDataMessage); + System.exit(1); + } + + if (!new File(icuDir).isDirectory() + || ! new File(icuDir, "icu4c").isDirectory() + || ! new File(icuDir, "icu4j").isDirectory() + || ! new File(icuDir, "tools/cldr/cldr-to-icu").isDirectory() + || ! new File(icuDir, "tools/cldr/cldr-to-icu/pom.xml").isFile()) { + System.err.println("The `" + icuDir + "` directory does not look like a valid icu root."); + System.err.println(icuMessage); + System.exit(1); + } + if (!new File(cldrDir).isDirectory() + || ! new File(cldrDir, "tools/cldr-code").isDirectory() + || ! new File(cldrDir, "tools/cldr-code/pom.xml").isFile()) { + System.err.println("The `" + cldrDir + "` directory does not look like a valid cldr root."); + System.err.println(cldrMessage); + System.exit(1); + } + if (!new File(cldrDataDir).isDirectory() + || ! new File(cldrDataDir, "common/supplemental").isDirectory() + || ! new File(cldrDataDir, "common/main").isDirectory() + || ! new File(cldrDataDir, "common/main/en.xml").isFile()) { + System.err.println("The `" + cldrDataDir + "` directory does not look like a valid cldr-staging/ root."); + System.err.println(cldrDataMessage); + System.exit(1); + } + + // The cldr-code library checks for CLDR_DIR in the Java properties. + // So if we got cldrDir from or from environment or command line we update the property. + System.setProperty("CLDR_DIR", cldrDir); + } +} diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java index 5df53606ba39..90d1b317f49b 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/IcuDataDumper.java @@ -179,7 +179,6 @@ void processLine(String line) { LineMatch match = LineType.match(line, inBlockComment); checkState(match.getType().isValidTransitionFrom(lastType), "invalid state transition: %s --//-> %s", lastType, match.getType()); - boolean isEndOfWrappedValue = false; switch (match.getType()) { case COMMENT: if (name != null) { diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java index 01e781387d28..76061b1b3480 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java @@ -11,6 +11,7 @@ import static java.util.stream.Collectors.partitioningBy; import java.io.BufferedReader; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -28,9 +29,14 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import org.apache.tools.ant.BuildException; -import org.apache.tools.ant.Task; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import com.google.common.base.CharMatcher; import com.google.common.collect.ImmutableList; @@ -38,7 +44,6 @@ import com.google.common.collect.Iterables; import com.google.common.io.CharStreams; -// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed. public final class CleanOutputDirectoryTask extends Task { private static final ImmutableSet ALLOWED_DIRECTORIES = Stream @@ -58,8 +63,7 @@ public final class CleanOutputDirectoryTask extends Task { // header without it (since that's the old behaviour). // Once there's been an ICU release with this line included in the headers of all data // files, we can remove the fallback and just test for this line and nothing else. - private static final String WAS_GENERATED_LABEL = - "Generated using tools/cldr/cldr-to-icu/build-icu-data.xml"; + private static final String WAS_GENERATED_LABEL = "Generated using tools/cldr/cldr-to-icu/"; // The number of header lines to check before giving up if we don't find the generated // label. @@ -84,9 +88,8 @@ public CleanOutputDirectoryTask() { public static final class Retain extends Task { private Path path = null; - // Don't use "Path" for the argument type because that always makes an absolute path (e.g. - // relative to the working directory for the Ant task). We want relative paths. - @SuppressWarnings("unused") + // Don't use "Path" for the argument type because that always makes an absolute path + // (e.g. relative to the working directory). We want relative paths. public void setPath(String path) { Path p = Paths.get(path).normalize(); checkBuild(!p.isAbsolute() && !p.startsWith(".."), "invalid path: %s", path); @@ -103,14 +106,12 @@ public static final class Dir extends Task { private String name; private final Set retained = new HashSet<>(); - @SuppressWarnings("unused") public void setName(String name) { checkBuild(ALLOWED_DIRECTORIES.contains(name), "unknown directory name '%s'; allowed values: %s", name, ALLOWED_DIRECTORIES); this.name = name; } - @SuppressWarnings("unused") public void addConfiguredRetain(Retain retain) { retained.add(retain.path); } @@ -121,18 +122,15 @@ public void init() throws BuildException { } } - @SuppressWarnings("unused") public void setRoot(String root) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. this.root = Paths.get(root); } - @SuppressWarnings("unused") public void setForceDelete(boolean forceDelete) { this.forceDelete = forceDelete; } - @SuppressWarnings("unused") public void addConfiguredDir(Dir dir) { outputDirs.add(dir); } @@ -255,7 +253,7 @@ static boolean wasFileAutoGenerated(BufferedReader fileReader, ImmutableList= headerLines.size() - 1; @@ -340,4 +338,77 @@ private static ImmutableList readLinesFromResource(String name) { throw new RuntimeException("cannot read resource: " + name, e); } } + + private static Retain getRetain(Element elem) { + if (!"retain".equals(elem.getTagName())) { + return null; + } + String path = elem.getAttribute("path"); + Retain retain = new Retain(); + retain.setPath(path); + return retain; + } + + private static Dir getDirectory(Element element) { + if (!"dir".equals(element.getTagName())) { + return null; + } + String name = element.getAttribute("name"); + Dir dir = new Dir(); + dir.setName(name); + Node node = element.getFirstChild(); + while (node != null) { + if (node.getNodeType() == Node.ELEMENT_NODE) { + Element childElement = (Element) node; + switch (childElement.getTagName()) { + case "retain": + Retain retain = getRetain(childElement); + dir.addConfiguredRetain(retain); + break; + default: + } + } + node = node.getNextSibling(); + } + return dir; + } + + public static CleanOutputDirectoryTask fromXml(String fileName) { + try { + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + Document doc = builder.parse(new File(fileName)); + Element root = doc.getDocumentElement(); + if (!"config".equals(root.getTagName())) { + System.err.println("The root of the config file should be "); + return null; + } + + NodeList outputDirectories = root.getElementsByTagName("outputDirectories"); + if (outputDirectories.getLength() != 1) { + System.err.println("Exactly one element allowed and required"); + return null; + } + CleanOutputDirectoryTask cleaner = new CleanOutputDirectoryTask(); + Node node = outputDirectories.item(0).getFirstChild(); + while (node != null) { + if (node instanceof Element) { + Element childElement = (Element) node; + String nodeName = childElement.getTagName(); + switch (nodeName) { + case "dir": + Dir dir = getDirectory(childElement); + cleaner.addConfiguredDir(dir); + break; + default: + break; + } + } + node = node.getNextSibling(); + } + return cleaner; + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java index 48eeea9edfab..ac2ac2b57ca4 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/ConvertIcuDataTask.java @@ -15,6 +15,7 @@ import static java.util.stream.Collectors.joining; import static org.unicode.cldr.api.CldrPath.parseDistinguishingPath; +import java.io.File; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -25,8 +26,9 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; -import org.apache.tools.ant.BuildException; -import org.apache.tools.ant.Task; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; + import org.unicode.cldr.api.CldrDataSupplier; import org.unicode.cldr.api.CldrDraftStatus; import org.unicode.cldr.api.CldrPath; @@ -38,6 +40,10 @@ import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; import org.unicode.icu.tool.cldrtoicu.PseudoLocales; import org.unicode.icu.tool.cldrtoicu.SupplementalData; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import com.google.common.base.Ascii; import com.google.common.base.CaseFormat; @@ -53,10 +59,9 @@ import com.google.common.collect.Sets; import com.google.common.collect.Table.Cell; -// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed. public final class ConvertIcuDataTask extends Task { private static final Splitter LIST_SPLITTER = - Splitter.on(CharMatcher.anyOf(",\n")).trimResults(whitespace()).omitEmptyStrings(); + Splitter.on(CharMatcher.anyOf(",\n")).trimResults(whitespace()).omitEmptyStrings(); private static final CharMatcher DIGIT_OR_UNDERSCORE = inRange('0', '9').or(is('_')); private static final CharMatcher UPPER_UNDERSCORE = inRange('A', 'Z').or(DIGIT_OR_UNDERSCORE); @@ -77,39 +82,32 @@ public final class ConvertIcuDataTask extends Task { private boolean includePseudoLocales = false; private Predicate idFilter = id -> true; - @SuppressWarnings("unused") public void setOutputDir(String path) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. config.setOutputDir(Paths.get(path)); } - @SuppressWarnings("unused") public void setCldrDir(String path) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. this.cldrPath = checkNotNull(Paths.get(path)); } - @SuppressWarnings("unused") public void setIcuVersion(String icuVersion) { config.setIcuVersion(icuVersion); } - @SuppressWarnings("unused") public void setIcuDataVersion(String icuDataVersion) { config.setIcuDataVersion(icuDataVersion); } - @SuppressWarnings("unused") public void setCldrVersion(String cldrVersion) { config.setCldrVersion(cldrVersion); } - @SuppressWarnings("unused") public void setMinimalDraftStatus(String status) { minimumDraftStatus = resolve(CldrDraftStatus.class, status); } - @SuppressWarnings("unused") public void setOutputTypes(String types) { ImmutableList typeList = LIST_SPLITTER @@ -121,23 +119,19 @@ public void setOutputTypes(String types) { } } - @SuppressWarnings("unused") public void setSpecialsDir(String path) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. config.setSpecialsDir(Paths.get(path)); } - @SuppressWarnings("unused") public void setIncludePseudoLocales(boolean includePseudoLocales) { this.includePseudoLocales = includePseudoLocales; } - @SuppressWarnings("unused") public void setLocaleIdFilter(String idFilterRegex) { this.idFilter = Pattern.compile(idFilterRegex).asPredicate(); } - @SuppressWarnings("unused") public void setEmitReport(boolean emit) { config.setEmitReport(emit); } @@ -145,7 +139,6 @@ public void setEmitReport(boolean emit) { public static final class LocaleIds extends Task { private ImmutableSet ids; - @SuppressWarnings("unused") public void addText(String localeIds) { this.ids = parseLocaleIds(localeIds); } @@ -162,22 +155,18 @@ public static final class Directory extends Task { private final List forcedAliases = new ArrayList<>(); private LocaleIds localeIds = null; - @SuppressWarnings("unused") public void setDir(String directory) { this.dir = resolve(IcuLocaleDir.class, directory); } - @SuppressWarnings("unused") public void setInheritLanguageSubtag(String localeIds) { this.inheritLanguageSubtag = parseLocaleIds(localeIds); } - @SuppressWarnings("unused") public void addConfiguredForcedAlias(ForcedAlias alias) { forcedAliases.add(alias); } - @SuppressWarnings("unused") public void addConfiguredLocaleIds(LocaleIds localeIds) { checkBuild(this.localeIds == null, "Cannot add more that one element for : %s", dir); @@ -195,12 +184,10 @@ public static final class ForcedAlias extends Task { private String source = ""; private String target = ""; - @SuppressWarnings("unused") public void setSource(String source) { this.source = whitespace().trimFrom(source); } - @SuppressWarnings("unused") public void setTarget(String target) { this.target = whitespace().trimFrom(target); } @@ -217,17 +204,14 @@ public static final class AltPath extends Task { private String target = ""; private ImmutableSet localeIds = ImmutableSet.of(); - @SuppressWarnings("unused") public void setTarget(String target) { this.target = target.replace('\'', '"'); } - @SuppressWarnings("unused") public void setSource(String source) { this.source = source.replace('\'', '"'); } - @SuppressWarnings("unused") public void setLocales(String localeIds) { this.localeIds = parseLocaleIds(localeIds); } @@ -239,13 +223,11 @@ public void init() throws BuildException { } } - @SuppressWarnings("unused") public void addConfiguredLocaleIds(LocaleIds localeIds) { checkBuild(this.localeIds == null, "Cannot add more that one element"); this.localeIds = localeIds; } - @SuppressWarnings("unused") public void addConfiguredDirectory(Directory filter) { checkState(!perDirectoryIds.containsKey(filter.dir), "directory %s specified twice", filter.dir); @@ -289,14 +271,12 @@ public void addConfiguredDirectory(Directory filter) { } // Aliases on the outside are applied to all directories. - @SuppressWarnings("unused") public void addConfiguredForcedAlias(ForcedAlias alias) { for (IcuLocaleDir dir : IcuLocaleDir.values()) { config.addForcedAlias(dir, alias.source, alias.target); } } - @SuppressWarnings("unused") public void addConfiguredAltPath(AltPath altPath) { // Don't convert to CldrPath here (it triggers a bunch of CLDR data loading for the DTDs). // Wait until the "execute()" method since in future we expect to use the configured CLDR @@ -304,7 +284,6 @@ public void addConfiguredAltPath(AltPath altPath) { altPaths.add(altPath); } - @SuppressWarnings("unused") public void execute() throws BuildException { // Spin up CLDRConfig outside of other inner loops, to // avoid static init problems seen in CLDR-14636 @@ -408,4 +387,128 @@ private static > T resolve(Class enumClass, String name) { "invalid enumeration name " + name + "; expected one of; " + validNames); } } + + private static AltPath getAltPath(Element elem) { + if (!"altPath".equals(elem.getTagName())) { + return null; + } + String source = elem.getAttribute("source"); + String target = elem.getAttribute("target"); + String locales = elem.getAttribute("locales"); + AltPath ap = new AltPath(); + ap.setSource(source); + ap.setTarget(target); + ap.setLocales(locales); + ap.init(); + return ap; + } + + private static ForcedAlias getForcedAlias(Element elem) { + if (!"forcedAlias".equals(elem.getTagName())) { + return null; + } + String source = elem.getAttribute("source"); + String target = elem.getAttribute("target"); + ForcedAlias fa = new ForcedAlias(); + fa.setSource(source); + fa.setTarget(target); + fa.init(); + return fa; + } + + private static LocaleIds getLocaleIds(Element elem) { + if (!"localeIds".equals(elem.getTagName())) { + return null; + } + LocaleIds localeIds = new LocaleIds(); + String strLocaleIds = elem.getTextContent(); + localeIds.addText(strLocaleIds); + localeIds.init(); + return localeIds; + } + + private static Directory getDirectory(Element element) { + if (!"directory".equals(element.getTagName())) { + return null; + } + String dir = element.getAttribute("dir"); + String inheritLanguageSubtag = element.getAttribute("inheritLanguageSubtag"); + Directory directory = new Directory(); + directory.setDir(dir); + directory.setInheritLanguageSubtag(inheritLanguageSubtag); + Node node = element.getFirstChild(); + while (node != null) { + if (node.getNodeType() == Node.ELEMENT_NODE) { + Element childElement = (Element) node; + switch (childElement.getTagName()) { + case "localeIds": + LocaleIds localeIds = getLocaleIds(childElement); + directory.addConfiguredLocaleIds(localeIds); + break; + case "forcedAlias": + ForcedAlias fa = getForcedAlias(childElement); + directory.addConfiguredForcedAlias(fa); + break; + default: + } + } + node = node.getNextSibling(); + } + if (directory.localeIds == null) { + directory.addConfiguredLocaleIds(new LocaleIds()); + } + directory.init(); + return directory; + } + + public static ConvertIcuDataTask fromXml(String fileName) { + try { + DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); + Document doc = builder.parse(new File(fileName)); + Element root = doc.getDocumentElement(); + if (!"config".equals(root.getTagName())) { + System.err.println("The root of the config file should be "); + return null; + } + + NodeList convertNodes = root.getElementsByTagName("convert"); + if (convertNodes.getLength() != 1) { + System.err.println("Exactly one element allowed and required"); + return null; + } + ConvertIcuDataTask converter = new ConvertIcuDataTask(); + Node node = convertNodes.item(0).getFirstChild(); + while (node != null) { + if (node instanceof Element) { + Element childElement = (Element) node; + String nodeName = childElement.getTagName(); + switch (nodeName) { + case "localeIds": + LocaleIds localeIds = getLocaleIds(childElement); + converter.addConfiguredLocaleIds(localeIds); + break; + case "directory": + Directory directory = getDirectory(childElement); + converter.addConfiguredDirectory(directory); + break; + case "forcedAlias": + ForcedAlias fa = getForcedAlias(childElement); + converter.addConfiguredForcedAlias(fa); + break; + case "altPath": + AltPath altPath = getAltPath(childElement); + converter.addConfiguredAltPath(altPath); + break; + default: + break; + } + } + node = node.getNextSibling(); + } + return converter; + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/GenerateCodeTask.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/GenerateCodeTask.java index 9ab24778af99..004137f5f447 100644 --- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/GenerateCodeTask.java +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/GenerateCodeTask.java @@ -12,12 +12,9 @@ import java.nio.file.Path; import java.nio.file.Paths; -import org.apache.tools.ant.BuildException; -import org.apache.tools.ant.Task; import org.unicode.icu.tool.cldrtoicu.CodeGenerator; import org.unicode.icu.tool.cldrtoicu.generator.ResourceFallbackCodeGenerator; -// Note: Auto-magical Ant methods are listed as "unused" by IDEs, unless the warning is suppressed. public final class GenerateCodeTask extends Task { private Path cldrPath; private Path cOutDir; @@ -40,31 +37,26 @@ public GeneratedFileDef(String cRelativePath, String javaRelativePath, CodeGener new GeneratedFileDef("common/localefallback_data.h", "src/main/java/com/ibm/icu/impl/LocaleFallbackData.java", new ResourceFallbackCodeGenerator()), }; - @SuppressWarnings("unused") public void setCldrDir(String path) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. this.cldrPath = checkNotNull(Paths.get(path)); } - @SuppressWarnings("unused") public void setCOutDir(String path) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. this.cOutDir = Paths.get(path); } - @SuppressWarnings("unused") public void setJavaOutDir(String path) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. this.javaOutDir = Paths.get(path); } - @SuppressWarnings("unused") public void setAction(String action) { // Use String here since on some systems Ant doesn't support automatically converting Path instances. this.action = action; } - @SuppressWarnings("unused") public void execute() throws BuildException { for (GeneratedFileDef task : generatedFileDefs) { Path cOutPath = cOutDir.resolve(task.cRelativePath); @@ -91,5 +83,4 @@ public void execute() throws BuildException { } } } - } diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/Task.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/Task.java new file mode 100644 index 000000000000..049a939b2c83 --- /dev/null +++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/Task.java @@ -0,0 +1,25 @@ +// © 2024 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +package org.unicode.icu.tool.cldrtoicu.ant; + +public class Task { + public static class BuildException extends RuntimeException { + private static final long serialVersionUID = 2430911677116799373L; + + public BuildException(String message, Throwable cause) { + super(message, cause); + } + + public BuildException(String message) { + super(message); + } + } + + void log(String format) { + System.out.println(format); + } + + public void execute() throws BuildException {} + + public void init() throws BuildException {} +} diff --git a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt index 09025850a4f1..efef06f32d2c 100644 --- a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt +++ b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt @@ -1,3 +1,3 @@ © 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html -Generated using tools/cldr/cldr-to-icu/build-icu-data.xml +Generated using tools/cldr/cldr-to-icu/ diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java index ca15ad4fe5e6..f90a5d54ac80 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/SupplementalDataTest.java @@ -4,7 +4,6 @@ import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.Truth.assertWithMessage; -import static com.google.common.truth.Truth8.assertThat; import static org.unicode.cldr.api.CldrValue.parseValue; import java.nio.file.Path; @@ -38,7 +37,11 @@ public class SupplementalDataTest { @BeforeClass public static void loadRegressionData() { - Path cldrRoot = Paths.get(System.getProperty("CLDR_DIR")); + String cldrDir = System.getProperty("CLDR_DIR"); + if (cldrDir == null) { + cldrDir = System.getenv("CLDR_DIR"); + } + Path cldrRoot = Paths.get(cldrDir); regressionData = SupplementalData.create(CldrDataSupplier.forCldrFilesIn(cldrRoot)); likelySubtags = new LikelySubtags(); } diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTaskTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTaskTest.java index 08962fcb4c05..881da25d0078 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTaskTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTaskTest.java @@ -18,7 +18,7 @@ public class CleanOutputDirectoryTaskTest { // Not using the original field since we want this test to fail if this changes unexpectedly. private static final String WAS_GENERATED_LABEL = - "Generated using tools/cldr/cldr-to-icu/build-icu-data.xml"; + "Generated using tools/cldr/cldr-to-icu/"; // Commented version of the label for test data. private static final String WAS_GENERATED_LINE = "// " + WAS_GENERATED_LABEL; diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/localedistance/LocaleDistanceMapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/localedistance/LocaleDistanceMapperTest.java index 4e1fafe334f4..1b94b6e78668 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/localedistance/LocaleDistanceMapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/localedistance/LocaleDistanceMapperTest.java @@ -135,14 +135,15 @@ public void testEndToEnd() { // LSR values come in (language, script, region) tuples. They are the mapped-to // values for the likely subtag mappings, ordered by the DTD order in which the // mapping keys were encountered. - assertThat(icuData).hasValuesFor("likely/lsrs", - "", "", "", - "skip", "script", "", - "zh", "Hans", "CN", - "zh", "Hant", "TW", - "en", "Latn", "US", - "zh", "Hant", "HK", - "zh", "Hant", "MO"); + assertThat(icuData).hasValuesFor("likely/lsrnum:intvector", + "0", // "", "", "" + "1", // "skip", "script", "" + "1232236233", // "zh", "Hans", "CN" + "1254131029", // "zh", "Hant", "TW" + "429941505", // "en", "Latn", "US" + "1247517541", // "zh", "Hant", "HK" + "1249741720" // "zh", "Hant", "MO" + ); // It's a bit easier to see how match keys are grouped against the partitions. ImmutableSetMultimap likelyTrie = @@ -174,11 +175,12 @@ public void testEndToEnd() { // Pairs of expanded paradigm locales (using LSR tuples) in declaration order. // This is just the list from the CLDR data with no processing. - assertThat(icuData).hasValuesFor("match/paradigms", - "en", "Latn", "US", - "en", "Latn", "GB", - "es", "Latn", "ES", - "es", "Latn", "419"); + assertThat(icuData).hasValuesFor("match/paradigmnum:intvector", + "429941505", // "en", "Latn", "US" + "420631446", // "en", "Latn", "GB" + "429626712", // "es", "Latn", "ES" + "419470284" // "es", "Latn", "419" + ); // See PartitionInfoTest for a description of the ordering of these strings. assertThat(icuData).hasValuesFor("match/partitions", diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java index d5adde881cda..d65ce49df485 100644 --- a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java +++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/mapper/Bcp47MapperTest.java @@ -28,7 +28,9 @@ public class Bcp47MapperTest { RbPath.of("typeAlias", "timezone:alias"), RbValue.of("/ICUDATA/timezoneTypes/typeAlias/timezone"), RbPath.of("typeMap", "timezone:alias"), - RbValue.of("/ICUDATA/timezoneTypes/typeMap/timezone")); + RbValue.of("/ICUDATA/timezoneTypes/typeMap/timezone"), + RbPath.of("ianaMap", "timezone:alias"), + RbValue.of("/ICUDATA/timezoneTypes/ianaMap/timezone")); @Test public void testSimple() { diff --git a/tools/cldr/lib/README.txt b/tools/cldr/lib/README.txt deleted file mode 100644 index bfca72d31fec..000000000000 --- a/tools/cldr/lib/README.txt +++ /dev/null @@ -1,101 +0,0 @@ -********************************************************************* -*** © 2019 and later: Unicode, Inc. and others. *** -*** License & terms of use: http://www.unicode.org/copyright.html *** -********************************************************************* - -What is this directory and why is it empty? -------------------------------------------- - -This is the root of a local Maven repository which needs to be populated before -code which uses the CLDR data API can be executed. - -To do this, you need to have a local copy of the CLDR project configured on your -computer and be able able to build the API jar file and copy an existing utility -jar file. In the examples below it is assumed that $CLDR_ROOT references this -CLDR release. - -Setup ------ - -This project relies on the Maven build tool for managing dependencies and uses -Ant for configuration purposes, so both will need to be installed. On a Debian -based system, this should be as simple as: - -$ sudo apt-get install maven ant - - -Installing the CLDR API jar ---------------------------- - -From this directory: - -$ ./install-cldr-jars.sh "$CLDR_DIR" - - -Manually installing the CLDR API jar ------------------------------------- - -Only follow these remaining steps if the installation script isn't suitable or -doesn't work on your system. - -To regenerate the CLDR API jar you need to build the "jar" target manually -using the Maven pom.xml file in the "tools" directory of the CLDR project: - -$ cd "$CLDR_ROOT/tools" -$ mvn package -DskipTests=true - -This should result in the cldr-code.jar file being built into the cldr-code/target -sub-directory, which can then be installed as a Maven dependency as described above. - - -Updating local Maven repository -------------------------------- - -To update the local Maven repository (e.g. to install the CLDR jar) then from -this directory (lib/) you should run: - -$ mvn install:install-file \ - -Dproject.parent.relativePath="" \ - -DgroupId=org.unicode.cldr \ - -DartifactId=cldr-api \ - -Dversion=0.1-SNAPSHOT \ - -Dpackaging=jar \ - -DgeneratePom=true \ - -DlocalRepositoryPath=. \ - -Dfile="$CLDR_ROOT/tools/cldr-code/target/cldr-code.jar" - -And if you have updated one of these libraries then from this directory run: - -$ mvn dependency:purge-local-repository \ - -Dproject.parent.relativePath="" \ - -DmanualIncludes=org.unicode.cldr:cldr-api:jar - -After doing this, you should see something like the following list of files in -this directory: - -README.txt <-- this file -org/unicode/cldr/cldr-api/maven-metadata-local.xml -org/unicode/cldr/cldr-api/0.1-SNAPSHOT/maven-metadata-local.xml -org/unicode/cldr/cldr-api/0.1-SNAPSHOT/cldr-api-0.1-SNAPSHOT.pom -org/unicode/cldr/cldr-api/0.1-SNAPSHOT/cldr-api-0.1-SNAPSHOT.jar - -Finally, if you choose to update the version number of the snapshot, then also -update all the the pom.xml files which reference it (but this is unlikely to be -necessary). - -Troubleshooting ---------------- - -While the Maven system should keep the CLDR JAR up to date, there is a chance -that you may have an out of date JAR installed elsewhere. If you have any -issues with the JAR not being the expected version (e.g. after making changes) -then run the above "purge" step again, from this directory. - -This should re-resolve the current JAR snapshot from the repository in this -directory. Having purged the Maven cache, next time you build a project, you -should see something like: - -[exec] Downloading from : /org/unicode/cldr/cldr-api/0.1-SNAPSHOT/maven-metadata.xml -[exec] [INFO] Building jar: /tools/cldr/cldr-to-icu/target/cldr-to-icu-1.0-SNAPSHOT-jar-with-dependencies.jar - -This shows that it has had to re-fetch the JAR file. diff --git a/tools/cldr/lib/install-cldr-jars.sh b/tools/cldr/lib/install-cldr-jars.sh deleted file mode 100755 index 2ba989e9fa9e..000000000000 --- a/tools/cldr/lib/install-cldr-jars.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -u -# -##################################################################### -### © 2020 and later: Unicode, Inc. and others. ### -### License & terms of use: http://www.unicode.org/copyright.html ### -##################################################################### -# -# This script will attempt to build and install the necessary CLDR JAR files -# from a given CLDR installation root directory. The JAR files are installed -# according to the manual instructions given in README.txt and lib/README.txt. -# -# The user must have installed both 'ant' and 'maven' in accordance with the -# instructions in README.txt before attempting to run this script. -# -# Usage (from the directory of this script): -# -# ./install-cldr-jars.sh -# -# Note to maintainers: This script cannot be assumed to run on a Unix/Linux -# based system, and while a Posix compliant bash shell is required, any -# assumptions about auxiliary Unix tools should be minimized (e.g. things -# like "dirname" or "tempfile" may not exist). Where bash-only alternatives -# have to be used, they should be clearly documented. - -# Exit with a message for fatal errors. -function die() { - echo "$1" - echo "Exiting..." - exit 1 -} >&2 - -# Runs a given command and captures output to the global log file. -# If a command errors, the user can then view the log file. -function run_with_logging() { - echo >> "${LOG_FILE}" - echo "Running: ${@}" >> "${LOG_FILE}" - echo -- "----------------------------------------------------------------" >> "${LOG_FILE}" - "${@}" >> "${LOG_FILE}" 2>&1 - if (( $? != 0 )) ; then - echo -- "---- Previous command failed ----" >> "${LOG_FILE}" - echo "Error running: ${@}" - read -p "Show log file? " -n 1 -r - echo - if [[ "${REPLY}" =~ ^[Yy]$ ]] ; then - less -RX "${LOG_FILE}" - fi - echo "Log file: ${LOG_FILE}" - exit 1 - fi - echo -- "---- Previous command succeeded ----" >> "${LOG_FILE}" -} - -# First require that we are run from the same directory as the script. -# Can't assume users have "dirname" available so hack it a bit with shell -# substitution (if no directory path was prepended, SCRIPT_DIR==$0). -SCRIPT_DIR=${0%/*} -if [[ "$SCRIPT_DIR" != "$0" ]] ; then - cd $SCRIPT_DIR -fi - -# Check for some expected environmental things early. -which ant > /dev/null || die "Cannot find Ant executable 'ant' in the current path." -which mvn > /dev/null || die "Cannot find Maven executable 'mvn' in the current path." - -# Check there's one argument that points at a directory (or a symbolic link to a directory). -(( $# == 1 )) && [[ -d "$1" ]] || die "Usage: ./install-cldr-jars.sh " - -# Set up a log file (and be nice about tidying it up). -# Cannot assume "tempfile" exists so use a timestamp (we expect "date" to exist though). -LOG_FILE="${TMPDIR:-/tmp}/cldr2icu_log_$(date '+%m%d_%H%M%S').txt" -touch $LOG_FILE || die "Cannot create temporary file: ${LOG_FILE}" -echo -- "---- LOG FILE ---- $(date '+%F %T') ----" >> "${LOG_FILE}" - -# Build the cldr-code.jar in the cldr-code/target subdirectory of the CLDR tools directory. -CLDR_TOOLS_DIR="$1/tools" -pushd "${CLDR_TOOLS_DIR}" > /dev/null || die "Cannot change directory to: ${CLDR_TOOLS_DIR}" - -echo "Building CLDR JAR file..." -run_with_logging mvn package -DskipTests=true -[[ -f "cldr-code/target/cldr-code.jar" ]] || die "Error creating cldr-code.jar file" - -popd > /dev/null - -# The -B flag is "batch" mode and won't mess about with escape codes in the log file. -echo "Installing CLDR JAR file..." -run_with_logging mvn -B install:install-file \ - -Dproject.parent.relativePath="" \ - -DgroupId=org.unicode.cldr \ - -DartifactId=cldr-api \ - -Dversion=0.1-SNAPSHOT \ - -Dpackaging=jar \ - -DgeneratePom=true \ - -DlocalRepositoryPath=. \ - -Dfile="${CLDR_TOOLS_DIR}/cldr-code/target/cldr-code.jar" - -echo "Syncing local Maven repository..." -run_with_logging mvn -B dependency:purge-local-repository \ - -Dproject.parent.relativePath="" \ - -DmanualIncludes=org.unicode.cldr:cldr-api:jar - -echo "All done!" -echo "Log file: ${LOG_FILE}" diff --git a/tools/cldr/lib/pom.xml b/tools/cldr/lib/pom.xml deleted file mode 100644 index 842e226dc3b1..000000000000 --- a/tools/cldr/lib/pom.xml +++ /dev/null @@ -1,53 +0,0 @@ - - - - 4.0.0 - - - - - pom - - - org.unicode.icu - cldr-lib - 1.0 - - - - - local-maven-repo - file://${project.basedir}/${project.parent.relativePath} - - - - - - - org.unicode.cldr - cldr-api - 0.1-SNAPSHOT - - - -