Skip to content

Commit

Permalink
Datetime date from CLDR data (unicode-org#329)
Browse files Browse the repository at this point in the history
* Fixes problems creating subdirectories in output

* Using datetime.json from CLDR for test/verify

* Starting to support CLDR-based datetime_fmt test data

* Starting to update datetime generator from CLDR data

* Add timeZone to test options

* Adding known issue checking

* Fix reference to datetime.UTC for Python3.10

* Set DateTime tests failing with dataTimeFormatType as 'KnownIssue'

* Remove unused import

* Actually append the label
  • Loading branch information
sven-oly authored Nov 15, 2024
1 parent 10b91bb commit 2300fe3
Show file tree
Hide file tree
Showing 7 changed files with 112,256 additions and 8 deletions.
1 change: 1 addition & 0 deletions run_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"exec": "cpp",
"test_type": [
"collation_short",
"datetime_fmt",
"lang_names",
"likely_subtags",
"message_fmt2",
Expand Down
1 change: 1 addition & 0 deletions schema/check_generated_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import glob
import json

from jsonschema import Draft7Validator, ValidationError

import logging
import logging.config
Expand Down
2 changes: 1 addition & 1 deletion schema/schema_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

import jsonschema.exceptions
from jsonschema import validate
from jsonschema import validate
from jsonschema import exceptions
from jsonschema import ValidationError

import logging
import logging.config
Expand Down
125 changes: 119 additions & 6 deletions testgen/generators/datetime_fmt.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# -*- coding: utf-8 -*-
from datetime import datetime, timezone

import os
import json
import re
import logging
import math
import subprocess
from generators.base import DataGenerator

Expand All @@ -12,6 +15,110 @@ class DateTimeFmtGenerator(DataGenerator):
json_test = {"test_type": "datetime_fmt"}
json_verify = {"test_type": "datetime_fmt"}

def generate_datetime_data_from_cldr(self, dt_json_path, run_limit=-1):
# Get CLDR-derived date time json file and parse it
# Optionally sample from it if run_limit > 1
with open(dt_json_path, 'r', encoding="UTF-8") as dt_json_file:
try:
json_data = json.load(dt_json_file)
except json.JSONDecodeError as err:
return None

test_cases = []
verify_cases = []

test_obj = {
'Test scenario': 'datetime_fmt',
'test_type': 'datetime_fmt',
'description': 'date/time format test data generated by Node',
'icuVersion': self.icu_version,
'cldrVersion': '??'
}

test_cases = []
verify_cases = []
verify_obj = {
'test_type': 'datetime_fmt',
'description': 'date/time format test data generated by Node',
'icuVersion': self.icu_version,
'cldrVersion': '??'
}
# Get each entry and assemble the test data and verify data.
label_num = -1
desired_width = math.ceil(math.log10(len(json_data))) # Based the size of json_data

input_index = -1
input_increment = 1
if self.run_limit > 0:
input_increment = math.floor(len(json_data) / self.run_limit)

for test_item in json_data:
input_index += 1
label_num += 1

if input_index % input_increment != 0:
continue

label_str = str(label_num).rjust(desired_width, "0")
# Construct options
options = {}
# Generate input string with "Z" and compute tz_offset_secs
raw_input = test_item['input']
start_index = raw_input.find('[')
end_index = raw_input.find(']')
raw_time = datetime.fromisoformat(raw_input[0:start_index])

if start_index >= 0 and end_index > start_index:
timeZone = raw_input[start_index+1:end_index]
options['timeZone'] = timeZone

# Set the options
if 'dateLength' in test_item:
options['dateStyle'] = test_item['dateLength']
if 'timeLength' in test_item:
options['timeStyle'] = test_item['timeLength']

if 'calendar' in test_item:
options['calendar'] = test_item['calendar']
if options['calendar'] == 'gregorian':
options['calendar'] = 'gregory'

# Generate UTC time equivalent and get the offset in seconds
u_time = raw_time.astimezone(timezone.utc)
input_string = u_time.isoformat().replace('+00:00', 'Z')
tz_offset_secs = raw_time.utcoffset().total_seconds()

new_test = {"locale": test_item['locale'], "input_string": input_string, "options": options,
'tz_offset_secs': tz_offset_secs, 'label': label_str,
'original_input': raw_input}
if 'dateTimeFormatType' in test_item:
new_test['dateTimeFormatType'] = test_item['dateTimeFormatType']

new_verify = {"label": label_str,
"verify": test_item['expected']
}
test_cases.append(new_test)
verify_cases.append(new_verify)



# Save output as: datetime_fmt_test.json and datetime_fmt_verify.json
test_obj['tests'] = test_cases
verify_obj['verifications'] = verify_cases
# Create the hex hash values
self.generateTestHashValues(test_obj)

base_path = ''
dt_test_path = os.path.join(base_path, 'datetime_fmt_test.json')
dt_verify_path = os.path.join(base_path, 'datetime_fmt_verify.json')

try:
self.saveJsonFile(dt_test_path, test_obj, indent=2)
self.saveJsonFile(dt_verify_path, verify_obj, indent=2)
except BaseException as err:
logging.error('!!! %s: Failure to save file %s', err, )
return None

def process_test_data(self):
# Use NOde JS to create the .json files
icu_nvm_versions = {
Expand All @@ -22,11 +129,18 @@ def process_test_data(self):
'icu71': '18.7.0',
}

# Update to check for datetime.json which has been generated from CLDR data.
dt_json_path = os.path.join('.', self.icu_version, 'datetime.json')
if os.path.exists(dt_json_path):
result = self.generate_datetime_data_from_cldr(dt_json_path, self.run_limit)
return result

# OK, there's no CLDR-based JSON data available.
run_list = [
['source ~/.nvm/nvm.sh; nvm install 21.6.0; nvm use 21.6.0 --silent'],
['node generators/datetime_gen.js'],
['mv datetime_fmt*.json icu74']
]
['source ~/.nvm/nvm.sh; nvm install 21.6.0; nvm use 21.6.0 --silent'],
['node generators/datetime_gen.js'],
['mv datetime_fmt*.json icu74']
]

if self.icu_version not in icu_nvm_versions:
logging.warning('Generating datetime data not configured for icu version %s', self.icu_version)
Expand All @@ -39,11 +153,10 @@ def process_test_data(self):
nvm_version, nvm_version, '-run_limit', self.run_limit)

logging.debug('Running this command: %s', generate_command)
result = result = subprocess.run(generate_command, shell=True)
result = subprocess.run(generate_command, shell=True)

# Move results to the right directory
mv_command = 'mv datetime_fmt*.json %s' % self.icu_version
result = subprocess.run(mv_command, shell=True)

return result

Loading

0 comments on commit 2300fe3

Please sign in to comment.