From 11a31c90dfb1d61eafc903ce7f6b491eba9ecb4e Mon Sep 17 00:00:00 2001 From: Frank Tang Date: Mon, 20 Nov 2023 17:58:08 -0800 Subject: [PATCH] ICU-22549 Add RuleBasedBreakIterator fuzzer --- icu4c/source/test/fuzzer/Makefile.in | 2 +- .../rule_based_break_iterator_fuzzer.cpp | 26 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 icu4c/source/test/fuzzer/rule_based_break_iterator_fuzzer.cpp diff --git a/icu4c/source/test/fuzzer/Makefile.in b/icu4c/source/test/fuzzer/Makefile.in index 28edbcb4963c..5f895a73950d 100644 --- a/icu4c/source/test/fuzzer/Makefile.in +++ b/icu4c/source/test/fuzzer/Makefile.in @@ -33,7 +33,7 @@ CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcd DEFS += -D'U_TOPSRCDIR="$(top_srcdir)/"' -D'U_TOPBUILDDIR="$(BUILDDIR)"' LIBS = $(LIBCTESTFW) $(LIBICUTOOLUTIL) $(LIBICUIO) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M) -FUZZER_TARGETS = break_iterator_fuzzer calendar_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer date_format_fuzzer list_format_fuzzer locale_fuzzer locale_morph_fuzzer number_format_fuzzer relative_date_time_formatter_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer +FUZZER_TARGETS = break_iterator_fuzzer calendar_fuzzer collator_compare_fuzzer collator_rulebased_fuzzer converter_fuzzer date_format_fuzzer list_format_fuzzer locale_fuzzer locale_morph_fuzzer number_format_fuzzer relative_date_time_formatter_fuzzer rule_based_break_iterator_fuzzer ucasemap_fuzzer uloc_canonicalize_fuzzer uloc_for_language_tag_fuzzer uloc_get_name_fuzzer uloc_is_right_to_left_fuzzer uloc_open_keywords_fuzzer unicode_string_codepage_create_fuzzer uregex_open_fuzzer OBJECTS = $(FUZZER_TARGETS:%=%.o) OBJECTS += fuzzer_driver.o locale_util.o diff --git a/icu4c/source/test/fuzzer/rule_based_break_iterator_fuzzer.cpp b/icu4c/source/test/fuzzer/rule_based_break_iterator_fuzzer.cpp new file mode 100644 index 000000000000..7344ecb7d99e --- /dev/null +++ b/icu4c/source/test/fuzzer/rule_based_break_iterator_fuzzer.cpp @@ -0,0 +1,26 @@ +// © 2023 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include + +#include "fuzzer_utils.h" +#include "unicode/localpointer.h" +#include "unicode/locid.h" +#include "unicode/rbbi.h" + +IcuEnvironment* env = new IcuEnvironment(); + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + UErrorCode status = U_ZERO_ERROR; + + size_t unistr_size = size/2; + std::unique_ptr fuzzbuff(new char16_t[unistr_size]); + std::memcpy(fuzzbuff.get(), data, unistr_size * 2); + icu::UnicodeString fuzzstr(false, fuzzbuff.get(), unistr_size); + + UParseError parse; + icu::LocalPointer brk( + new icu::RuleBasedBreakIterator(fuzzstr, parse, status)); + + return 0; +}