Skip to content

Commit

Permalink
Update collation generator (unicode-org#356)
Browse files Browse the repository at this point in the history
* Better parsing of collationtest.txt data

* Fixing how collation rules are parsed. Removes may errors

* Minor fixes

* Minor fixes

* String spaces in rules

* Fix logic of resetting rule set

* fix utf-8

* Remove encode/decode from test generation. Add characterization options.

* Update verifier/testreport.py

Co-authored-by: Elango Cheran <[email protected]>

---------

Co-authored-by: Elango Cheran <[email protected]>
  • Loading branch information
sven-oly and echeran authored Dec 14, 2024
1 parent 53f1861 commit aeba024
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 41 deletions.
2 changes: 1 addition & 1 deletion testgen/generators/collation_short.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def check_parse_compare(self, line_index, lines):
raw_string2 = is_comparison_match.group(3)
string2 = ''
try:
string2 = raw_string2.encode().decode("unicode_escape")
string2 = raw_string2 # Don't do any unescaping
except Exception as err:
# Catch an error. What should be done here ???
string2_errors.append([line_index, raw_string2, err])
Expand Down
94 changes: 54 additions & 40 deletions verifier/testreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,15 +651,22 @@ def characterize_results_by_options(self, test_list, category):
'strength', 'caseFirst', 'backwards',
'reorder', 'maxVariable',
'source_file'
# TODO!!! Characterize by actual_options keys & values
]
for key in key_list:
if test.get(key, None): # For collation results
value = test[key]
if not isinstance(value, str):
# Make this a string for comparison
value = str(value)
if key not in results:
results[key] = {}
if value not in results[key]:
results[key][value] = set()
results[key][value].add(label)
try:
if value not in results[key]:
results[key][value] = set()
results[key][value].add(label)
except:
pass

ki_key_list = ['known_issue', 'known_issue_id']
for key in ki_key_list:
Expand All @@ -674,27 +681,30 @@ def characterize_results_by_options(self, test_list, category):
# Look at the input_data part of the test result
# TODO: Check the error_detail and error parts, too.
key_list = [
'compare_type',
'error_detail',
'ignorePunctuation',
'language_label',
'languageDisplay',
'locale_label',
'locale',
'options',
'rules',
'test_description',
'unsupported_options',
'style',
'type',
'dateStyle',
'timeStyle,'
'calendar',
'unit',
'count'
]
'compare_type',
'error_detail',
'ignorePunctuation',
'language_label',
'languageDisplay',
'locale_label',
'locale',
'options',
'rules',
'test_description',
'unsupported_options',
'style',
'type',
'dateStyle',
'timeStyle,'
'calendar',
'unit',
'count',
'source_file'
]

self.add_to_results_by_key(label, results, input_data, test, key_list)
if 'actual_options' in test:
self.add_to_results_by_key(label, results, test['actual_options'], test, key_list)

# Special case for input_data / options.
special_key = 'options'
Expand Down Expand Up @@ -748,24 +758,28 @@ def characterize_datetime_tests(self, test_list, results):
def add_to_results_by_key(self, label, results, input_data, test, key_list):
if input_data:
for key in key_list:
if input_data.get(key, None): # For collation results
value = input_data.get(key, None)
if key == 'input_list':
if 'input_size' not in results:
results['input_size'] = {}
else:
results['input_size'].add(len(value))
if key == 'rules':
value = 'RULE' # A special case to avoid over-characterization
if key not in results:
results[key] = {}
try:
if not results[key].get(value, None):
results[key][value] = set()
results[key][value].add(label)
except TypeError as err:
# value may not be hashable. This should be skipped
pass
try:
if input_data.get(key, None): # For collation results
value = input_data.get(key, None)
if key == 'input_list':
if 'input_size' not in results:
results['input_size'] = {}
else:
results['input_size'].add(len(value))
if key == 'rules':
value = 'RULE' # A special case to avoid over-characterization
if key not in results:
results[key] = {}
try:
if not results[key].get(value, None):
results[key][value] = set()
results[key][value].add(label)
except TypeError as err:
# value may not be hashable. This should be skipped
pass
except:
pass

def check_simple_text_diffs(self, test_list, category):
results = defaultdict(list)
all_checks = ['insert', 'delete', 'insert_digit', 'insert_space', 'delete_digit',
Expand Down

0 comments on commit aeba024

Please sign in to comment.