diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 627dd03..b41c764 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'pip' @@ -28,11 +28,17 @@ jobs: run: | python -m pip install --upgrade pip pip install .[test] - + + - name: Check Python linting (Ruff) + run: ruff check --output-format=github + + - name: Check Python formatting (Ruff) + run: ruff format --check + - name: Run unit tests run: | pytest - + - name: Run Django integration tests working-directory: ./edtf_django_tests run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ff6df15 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: end-of-file-fixer + exclude: "business-facing/layer" + - id: trailing-whitespace + exclude: "business-facing/layer" + - id: check-yaml + exclude: "business-facing/layer" + - id: check-json + exclude: "business-facing/layer" + + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.4.4 + hooks: + # Run the linter, and enable lint fixes + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format diff --git a/README.md b/README.md index 9456dfa..82a9b7d 100644 --- a/README.md +++ b/README.md @@ -365,3 +365,20 @@ Example usage: Since the `EDTFField` and the `_earliest` and `_latest` field values are set automatically, you may want to make them readonly, or not visible in your model admin. + +## To develop +### Setup +- Clone the repository: `git clone https://github.com/ixc/python-edtf.git` +- Set up a virtual environment: `python3 -m venv venv` +- Install the dependencies: `pip install -r dev-requirements.txt` +- Install precommit hooks: `pre-commit install` + +### Running tests +- From `python-edtf`, run the unit tests: `pytest` +- From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` + +### Linting and formatting +- Check linting: `ruff check --output-format=github --config pyproject.toml` +- Check formatting: `ruff format --check --config pyproject.toml` +- Fix formatting: `ruff format --config pyproject.toml` +- Linting and formatting checks and attempted fixes are also run as precommit hooks if you installed them. diff --git a/dev-requirements.txt b/dev-requirements.txt new file mode 100644 index 0000000..1e37df5 --- /dev/null +++ b/dev-requirements.txt @@ -0,0 +1,5 @@ +-r requirements.txt # Include all main requirements +django>=4.2,<5.0 +pytest +ruff +pre-commit diff --git a/edtf/__init__.py b/edtf/__init__.py index 291cccc..7bb2885 100644 --- a/edtf/__init__.py +++ b/edtf/__init__.py @@ -1,6 +1,73 @@ -from edtf.parser.grammar import parse_edtf from edtf.natlang import text_to_edtf -from edtf.parser.parser_classes import * -from edtf.convert import dt_to_struct_time, struct_time_to_date, \ - struct_time_to_datetime, trim_struct_time, struct_time_to_jd, \ - jd_to_struct_time, old_specs_to_new_specs_expression +from edtf.parser import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + EDTFObject, + EDTFParseException, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, + Level2Season, + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, + UnspecifiedIntervalSection, + parse_edtf, +) + +from .convert import ( + dt_to_struct_time, + jd_to_struct_time, + old_specs_to_new_specs_expression, + struct_time_to_date, + struct_time_to_datetime, + struct_time_to_jd, + trim_struct_time, +) + +# public +__all__ = [ + "dt_to_struct_time", + "jd_to_struct_time", + "old_specs_to_new_specs_expression", + "struct_time_to_date", + "struct_time_to_datetime", + "struct_time_to_jd", + "trim_struct_time", + "text_to_edtf", + "parse_edtf", + # parser_exceptions + "EDTFParseException", + # parser_classes + "EDTFObject", + "Date", + "DateAndTime", + "Interval", + "UA", + "UncertainOrApproximate", + "UnspecifiedIntervalSection", + "Unspecified", + "Level1Interval", + "LongYear", + "Season", + "PartialUncertainOrApproximate", + "PartialUnspecified", + "Consecutives", + "EarlierConsecutives", + "LaterConsecutives", + "OneOfASet", + "MultipleDates", + "Level2Interval", + "Level2Season", + "ExponentialYear", +] diff --git a/edtf/appsettings.py b/edtf/appsettings.py index 8904c58..e1bc821 100644 --- a/edtf/appsettings.py +++ b/edtf/appsettings.py @@ -2,15 +2,19 @@ try: from django.core.exceptions import ImproperlyConfigured + try: from django.conf import settings - EDTF = getattr(settings, 'EDTF', {}) + + EDTF = getattr(settings, "EDTF", {}) except ImproperlyConfigured: EDTF = {} except ImportError: EDTF = {} -SEASON_MONTHS_RANGE = EDTF.get('SEASON_MONTHS_RANGE', { +SEASON_MONTHS_RANGE = EDTF.get( + "SEASON_MONTHS_RANGE", + { # season id: [earliest_month, last_month] 21: [3, 5], 22: [6, 8], @@ -20,10 +24,12 @@ # For simplicity, we assume it falls at the end of the year, esp since the # spec says that sort order goes spring > summer > autumn > winter 24: [12, 12], - } + }, ) -SEASON_L2_MONTHS_RANGE = EDTF.get('SEASON_L2_MONTHS_RANGE', { +SEASON_L2_MONTHS_RANGE = EDTF.get( + "SEASON_L2_MONTHS_RANGE", + { # season id: [earliest_month, last_month] 21: [3, 5], 22: [6, 8], @@ -57,28 +63,31 @@ 38: [5, 8], 39: [9, 12], 40: [1, 6], - 41: [7, 12] - } + 41: [7, 12], + }, ) -DAY_FIRST = EDTF.get('DAY_FIRST', False) # Americans! +DAY_FIRST = EDTF.get("DAY_FIRST", False) # Americans! -SEASONS = EDTF.get('SEASONS', { - 21: "spring", - 22: "summer", - 23: "autumn", - 24: "winter", -}) -INVERSE_SEASONS = EDTF.get('INVERSE_SEASONS', {v: k for k, v in SEASONS.items()}) +SEASONS = EDTF.get( + "SEASONS", + { + 21: "spring", + 22: "summer", + 23: "autumn", + 24: "winter", + }, +) +INVERSE_SEASONS = EDTF.get("INVERSE_SEASONS", {v: k for k, v in SEASONS.items()}) # also need to interpret `fall` -INVERSE_SEASONS['fall'] = 23 +INVERSE_SEASONS["fall"] = 23 # changing these will break tests -PADDING_DAY_PRECISION = EDTF.get('PADDING_DAY_PRECISION', relativedelta(days=1)) -PADDING_MONTH_PRECISION = EDTF.get('PADDING_MONTH_PRECISION', relativedelta(months=1)) -PADDING_YEAR_PRECISION = EDTF.get('PADDING_YEAR_PRECISION', relativedelta(years=1)) -PADDING_SEASON_PRECISION = EDTF.get('PADDING_SEASON_PRECISION', relativedelta(weeks=12)) -MULTIPLIER_IF_UNCERTAIN = EDTF.get('MULTIPLIER_IF_UNCERTAIN', 1.0) -MULTIPLIER_IF_APPROXIMATE = EDTF.get('MULTIPLIER_IF_APPROXIMATE', 1.0) -MULTIPLIER_IF_BOTH = EDTF.get('MULTIPLIER_IF_BOTH', 2.0) +PADDING_DAY_PRECISION = EDTF.get("PADDING_DAY_PRECISION", relativedelta(days=1)) +PADDING_MONTH_PRECISION = EDTF.get("PADDING_MONTH_PRECISION", relativedelta(months=1)) +PADDING_YEAR_PRECISION = EDTF.get("PADDING_YEAR_PRECISION", relativedelta(years=1)) +PADDING_SEASON_PRECISION = EDTF.get("PADDING_SEASON_PRECISION", relativedelta(weeks=12)) +MULTIPLIER_IF_UNCERTAIN = EDTF.get("MULTIPLIER_IF_UNCERTAIN", 1.0) +MULTIPLIER_IF_APPROXIMATE = EDTF.get("MULTIPLIER_IF_APPROXIMATE", 1.0) +MULTIPLIER_IF_BOTH = EDTF.get("MULTIPLIER_IF_BOTH", 2.0) DELTA_IF_UNKNOWN = EDTF.get("DELTA_IF_UNKNOWN", relativedelta(years=10)) diff --git a/edtf/convert.py b/edtf/convert.py index f8d070f..a294462 100644 --- a/edtf/convert.py +++ b/edtf/convert.py @@ -1,12 +1,12 @@ -from time import struct_time from datetime import date, datetime +from time import struct_time from edtf import jdutil - TIME_EMPTY_TIME = [0, 0, 0] # tm_hour, tm_min, tm_sec TIME_EMPTY_EXTRAS = [0, 0, -1] # tm_wday, tm_yday, tm_isdst + def old_specs_to_new_specs_expression(expression): expression = expression.replace("unknown", "") expression = expression.replace("open", "..") @@ -32,16 +32,15 @@ def dt_to_struct_time(dt): """ if isinstance(dt, datetime): return struct_time( - [dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second] + - TIME_EMPTY_EXTRAS + [dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second] + + TIME_EMPTY_EXTRAS ) elif isinstance(dt, date): return struct_time( [dt.year, dt.month, dt.day] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS ) else: - raise NotImplementedError( - "Cannot convert %s to `struct_time`" % type(dt)) + raise NotImplementedError(f"Cannot convert {type(dt)} to `struct_time`") def struct_time_to_date(st): @@ -112,12 +111,11 @@ def jd_to_struct_time(jd): # This conversion can return negative values for items we do not want to be # negative: month, day, hour, minute, second. year, month, day, hour, minute, second = _roll_negative_time_fields( - year, month, day, hour, minute, second) - - return struct_time( - [year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS + year, month, day, hour, minute, second ) + return struct_time([year, month, day, hour, minute, second] + TIME_EMPTY_EXTRAS) + def _roll_negative_time_fields(year, month, day, hour, minute, second): """ diff --git a/edtf/fields.py b/edtf/fields.py index bbccbcf..f717592 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -1,53 +1,80 @@ -try: - import cPickle as pickle -except: - import pickle +import pickle -from django.db import models from django.core.exceptions import FieldDoesNotExist +from django.db import models +from django.db.models import signals +from django.db.models.query_utils import DeferredAttribute -from edtf import parse_edtf, EDTFObject -from edtf.natlang import text_to_edtf +from edtf import EDTFObject, parse_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd +from edtf.natlang import text_to_edtf DATE_ATTRS = ( - 'lower_strict', - 'upper_strict', - 'lower_fuzzy', - 'upper_fuzzy', + "lower_strict", + "upper_strict", + "lower_fuzzy", + "upper_fuzzy", ) -class EDTFField(models.CharField): +class EDTFFieldDescriptor(DeferredAttribute): + """ + Descriptor for the EDTFField's attribute on the model instance. + This updates the dependent fields each time this value is set. + """ + + def __set__(self, instance, value): + # First set the value we are given + instance.__dict__[self.field.attname] = value + # `update_values` may provide us with a new value to set + edtf = self.field.update_values(instance, value) + if edtf != value: + instance.__dict__[self.field.attname] = edtf + +class EDTFField(models.CharField): def __init__( self, - verbose_name=None, name=None, + verbose_name=None, + name=None, natural_text_field=None, direct_input_field=None, lower_strict_field=None, upper_strict_field=None, lower_fuzzy_field=None, upper_fuzzy_field=None, - **kwargs + **kwargs, ): - kwargs['max_length'] = 2000 - self.natural_text_field, self.direct_input_field, \ - self.lower_strict_field, self.upper_strict_field, \ - self.lower_fuzzy_field, self.upper_fuzzy_field = \ - natural_text_field, direct_input_field, lower_strict_field, \ - upper_strict_field, lower_fuzzy_field, upper_fuzzy_field - super(EDTFField, self).__init__(verbose_name, name, **kwargs) - - description = "A field for storing complex/fuzzy date specifications in EDTF format." + kwargs["max_length"] = 2000 + ( + self.natural_text_field, + self.direct_input_field, + self.lower_strict_field, + self.upper_strict_field, + self.lower_fuzzy_field, + self.upper_fuzzy_field, + ) = ( + natural_text_field, + direct_input_field, + lower_strict_field, + upper_strict_field, + lower_fuzzy_field, + upper_fuzzy_field, + ) + super().__init__(verbose_name, name, **kwargs) + + description = ( + "A field for storing complex/fuzzy date specifications in EDTF format." + ) + descriptor_class = EDTFFieldDescriptor def deconstruct(self): - name, path, args, kwargs = super(EDTFField, self).deconstruct() + name, path, args, kwargs = super().deconstruct() if self.natural_text_field: - kwargs['natural_text_field'] = self.natural_text_field + kwargs["natural_text_field"] = self.natural_text_field for attr in DATE_ATTRS: - field = "%s_field" % attr + field = f"{attr}_field" f = getattr(self, field, None) if f: kwargs[field] = f @@ -62,7 +89,7 @@ def from_db_value(self, value, expression, connection): try: # Try to unpickle if the value was pickled - return pickle.loads(value) + return pickle.loads(value) # noqa S301 except (pickle.PickleError, TypeError): # If it fails because it's not pickled data, try parsing as EDTF return parse_edtf(value, fail_silently=True) @@ -79,41 +106,49 @@ def to_python(self, value): def get_db_prep_save(self, value, connection): if value: return pickle.dumps(value) - return super(EDTFField, self).get_db_prep_save(value, connection) + return super().get_db_prep_save(value, connection) def get_prep_value(self, value): # convert python objects to query values - value = super(EDTFField, self).get_prep_value(value) + value = super().get_prep_value(value) if isinstance(value, EDTFObject): return pickle.dumps(value) return value - def pre_save(self, instance, add): + def update_values(self, instance, *args, **kwargs): """ Updates the EDTF value from either the natural_text_field, which is parsed with text_to_edtf() and is used for display, or falling back to the direct_input_field, which allows directly providing an EDTF string. If one of these provides a valid EDTF object, then set the date values accordingly. """ - + # Get existing value to determine if update is needed existing_value = getattr(instance, self.attname, None) - direct_input = getattr(instance, self.direct_input_field, None) - natural_text = getattr(instance, self.natural_text_field, None) + direct_input = getattr(instance, self.direct_input_field, "") + natural_text = getattr(instance, self.natural_text_field, "") # if direct_input is provided and is different from the existing value, update the EDTF field - if direct_input and (existing_value is None or str(existing_value) != direct_input): - edtf = parse_edtf(direct_input, fail_silently=True) # ParseException if invalid; should this be raised? + if direct_input and ( + existing_value is None or str(existing_value) != direct_input + ): + edtf = parse_edtf( + direct_input, fail_silently=True + ) # ParseException if invalid; should this be raised? # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) # set the natural_text (display) field to the direct_input if it is not provided - if natural_text is None: + if natural_text == "": setattr(instance, self.natural_text_field, direct_input) elif natural_text: edtf_string = text_to_edtf(natural_text) - if edtf_string and (existing_value is None or str(existing_value) != edtf_string): - edtf = parse_edtf(edtf_string, fail_silently=True) # potetial ParseException if invalid; should this be raised? + if edtf_string and ( + existing_value is None or str(existing_value) != edtf_string + ): + edtf = parse_edtf( + edtf_string, fail_silently=True + ) # potetial ParseException if invalid; should this be raised? else: edtf = existing_value else: @@ -123,10 +158,6 @@ def pre_save(self, instance, add): # TODO: if both direct_input and natural_text are cleared, should we throw an error? edtf = existing_value - # Update the actual EDTF field in the model if there is a change - if edtf != existing_value: - setattr(instance, self.attname, edtf) - # Process and update related date fields based on the EDTF object for attr in DATE_ATTRS: field_attr = f"{attr}_field" @@ -144,10 +175,19 @@ def pre_save(self, instance, add): value = struct_time_to_date(value) else: raise NotImplementedError( - u"EDTFField does not support %s as a derived data" - u" field, only FloatField or DateField" - % type(target_field)) + f"EDTFField does not support {type(target_field)} as a derived data" + " field, only FloatField or DateField" + ) setattr(instance, g, value) else: setattr(instance, g, None) return edtf + + def contribute_to_class(self, cls, name, **kwargs): + super().contribute_to_class(cls, name, **kwargs) + # Attach update_values so that dependent fields declared + # after their corresponding edtf field don't stay cleared by + # Model.__init__, see Django bug #11196. + # Only run post-initialization values update on non-abstract models + if not cls._meta.abstract: + signals.post_init.connect(self.update_values, sender=cls) diff --git a/edtf/jdutil.py b/edtf/jdutil.py index 9fabdd1..16cd312 100644 --- a/edtf/jdutil.py +++ b/edtf/jdutil.py @@ -9,14 +9,15 @@ """ -import math import datetime as dt +import math # Note: The Python datetime module assumes an infinitely valid Gregorian calendar. # The Gregorian calendar took effect after 10-15-1582 and the dates 10-05 through # 10-14-1582 never occurred. Python datetime objects will produce incorrect # time deltas if one date is from before 10-15-1582. + def mjd_to_jd(mjd): """ Convert Modified Julian Day to Julian Day. @@ -54,11 +55,11 @@ def jd_to_mjd(jd): return jd - 2400000.5 -def date_to_jd(year,month,day): +def date_to_jd(year, month, day): """ Convert a date to Julian Day. - Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', + Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', 4th ed., Duffet-Smith and Zwart, 2011. Parameters @@ -95,20 +96,19 @@ def date_to_jd(year,month,day): # this checks where we are in relation to October 15, 1582, the beginning # of the Gregorian calendar. - if ((year < 1582) or - (year == 1582 and month < 10) or - (year == 1582 and month == 10 and day < 15)): + if ( + (year < 1582) + or (year == 1582 and month < 10) + or (year == 1582 and month == 10 and day < 15) + ): # before start of Gregorian calendar B = 0 else: # after start of Gregorian calendar - A = math.trunc(yearp / 100.) - B = 2 - A + math.trunc(A / 4.) + A = math.trunc(yearp / 100.0) + B = 2 - A + math.trunc(A / 4.0) - if yearp < 0: - C = math.trunc((365.25 * yearp) - 0.75) - else: - C = math.trunc(365.25 * yearp) + C = math.trunc(365.25 * yearp - 0.75) if yearp < 0 else math.trunc(365.25 * yearp) D = math.trunc(30.6001 * (monthp + 1)) @@ -121,7 +121,7 @@ def jd_to_date(jd): """ Convert Julian Day to date. - Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', + Algorithm from 'Practical Astronomy with your Calculator or Spreadsheet', 4th ed., Duffet-Smith and Zwart, 2011. Parameters @@ -151,15 +151,12 @@ def jd_to_date(jd): """ jd = jd + 0.5 - F, I = math.modf(jd) - I = int(I) + F, I = math.modf(jd) # noqa: E741 + I = int(I) # noqa: E741 - A = math.trunc((I - 1867216.25)/36524.25) + A = math.trunc((I - 1867216.25) / 36524.25) - if I > 2299160: - B = I + 1 + A - math.trunc(A / 4.) - else: - B = I + B = I + 1 + A - math.trunc(A / 4.0) if I > 2299160 else I C = B + 1524 @@ -171,20 +168,14 @@ def jd_to_date(jd): day = C - E + F - math.trunc(30.6001 * G) - if G < 13.5: - month = G - 1 - else: - month = G - 13 + month = G - 1 if G < 13.5 else G - 13 - if month > 2.5: - year = D - 4716 - else: - year = D - 4715 + year = D - 4716 if month > 2.5 else D - 4715 return year, month, day -def hmsm_to_days(hour=0,min=0,sec=0,micro=0): +def hmsm_to_days(hour=0, min=0, sec=0, micro=0): """ Convert hours, minutes, seconds, and microseconds to fractional days. @@ -213,13 +204,13 @@ def hmsm_to_days(hour=0,min=0,sec=0,micro=0): 0.25 """ - days = sec + (micro / 1.e6) + days = sec + (micro / 1.0e6) - days = min + (days / 60.) + days = min + (days / 60.0) - days = hour + (days / 60.) + days = hour + (days / 60.0) - return days / 24. + return days / 24.0 def days_to_hmsm(days): @@ -257,16 +248,16 @@ def days_to_hmsm(days): (2, 24, 0, 0) """ - hours = days * 24. + hours = days * 24.0 hours, hour = math.modf(hours) - mins = hours * 60. + mins = hours * 60.0 mins, min = math.modf(mins) - secs = mins * 60. + secs = mins * 60.0 secs, sec = math.modf(secs) - micro = round(secs * 1.e6) + micro = round(secs * 1.0e6) return int(hour), int(min), int(sec), int(micro) @@ -286,16 +277,18 @@ def datetime_to_jd(date): Examples -------- - >>> d = datetime.datetime(1985,2,17,6) + >>> d = datetime.datetime(1985,2,17,6) >>> d datetime.datetime(1985, 2, 17, 6, 0) >>> jdutil.datetime_to_jd(d) 2446113.75 """ - days = date.day + hmsm_to_days(date.hour,date.minute,date.second,date.microsecond) + days = date.day + hmsm_to_days( + date.hour, date.minute, date.second, date.microsecond + ) - return date_to_jd(date.year,date.month,days) + return date_to_jd(date.year, date.month, days) def jd_to_datetime(jd): @@ -320,12 +313,12 @@ def jd_to_datetime(jd): """ year, month, day = jd_to_date(jd) - frac_days,day = math.modf(day) + frac_days, day = math.modf(day) day = int(day) - hour,min,sec,micro = days_to_hmsm(frac_days) + hour, min, sec, micro = days_to_hmsm(frac_days) - return datetime(year,month,day,hour,min,sec,micro) + return datetime(year, month, day, hour, min, sec, micro) def timedelta_to_days(td): @@ -350,9 +343,9 @@ def timedelta_to_days(td): 4.5 """ - seconds_in_day = 24. * 3600. + seconds_in_day = 24.0 * 3600.0 - days = td.days + (td.seconds + (td.microseconds * 10.e6)) / seconds_in_day + days = td.days + (td.seconds + (td.microseconds * 10.0e6)) / seconds_in_day return days @@ -372,8 +365,9 @@ class datetime(dt.datetime): datetime.datetime : Parent class. """ - def __add__(self,other): - if not isinstance(other,dt.timedelta): + + def __add__(self, other): + if not isinstance(other, dt.timedelta): s = "jdutil.datetime supports '+' only with datetime.timedelta" raise TypeError(s) @@ -383,8 +377,8 @@ def __add__(self,other): return jd_to_datetime(combined) - def __radd__(self,other): - if not isinstance(other,dt.timedelta): + def __radd__(self, other): + if not isinstance(other, dt.timedelta): s = "jdutil.datetime supports '+' only with datetime.timedelta" raise TypeError(s) @@ -394,15 +388,15 @@ def __radd__(self,other): return jd_to_datetime(combined) - def __sub__(self,other): - if isinstance(other,dt.timedelta): + def __sub__(self, other): + if isinstance(other, dt.timedelta): days = timedelta_to_days(other) combined = datetime_to_jd(self) - days return jd_to_datetime(combined) - elif isinstance(other, (datetime,dt.datetime)): + elif isinstance(other, (datetime, dt.datetime)): diff = datetime_to_jd(self) - datetime_to_jd(other) return dt.timedelta(diff) @@ -412,8 +406,8 @@ def __sub__(self,other): s += "datetime.timedelta, jdutil.datetime and datetime.datetime" raise TypeError(s) - def __rsub__(self,other): - if not isinstance(other, (datetime,dt.datetime)): + def __rsub__(self, other): + if not isinstance(other, (datetime, dt.datetime)): s = "jdutil.datetime supports '-' with: " s += "jdutil.datetime and datetime.datetime" raise TypeError(s) diff --git a/edtf/natlang/__init__.py b/edtf/natlang/__init__.py index 325672f..463863c 100644 --- a/edtf/natlang/__init__.py +++ b/edtf/natlang/__init__.py @@ -1 +1,3 @@ from .en import text_to_edtf + +__all__ = ["text_to_edtf"] diff --git a/edtf/natlang/en.py b/edtf/natlang/en.py index 1f46c37..6ecb190 100644 --- a/edtf/natlang/en.py +++ b/edtf/natlang/en.py @@ -1,10 +1,12 @@ """Utilities to derive an EDTF string from an (English) natural language string.""" -from datetime import datetime -from dateutil.parser import parse, ParserError + import re -from edtf import appsettings +from datetime import datetime + +from dateutil.parser import ParserError, parse from six.moves import xrange +from edtf import appsettings # two dates where every digit of an ISO date representation is different, # and one is in the past and one is in the future. @@ -20,7 +22,7 @@ # Set of RE rules that will cause us to abort text processing, since we know # the results will be wrong. REJECT_RULES = ( - r'.*dynasty.*', # Don't parse '23rd Dynasty' to 'uuuu-uu-23' + r".*dynasty.*", # Don't parse '23rd Dynasty' to 'uuuu-uu-23' ) @@ -41,7 +43,6 @@ def text_to_edtf(text): # TODO: assemble multiple dates into a {} or [] structure. for split in [",", ";", "or"]: for list_item in t.split(split): - # try parsing as an interval - split by '-' toks = list_item.split("-") if len(toks) == 2: @@ -51,18 +52,23 @@ def text_to_edtf(text): # match looks from the beginning of the string, search # looks anywhere. - if re.match(r'\d\D\b', d2): # 1-digit year partial e.g. 1868-9 - if re.search(r'\b\d\d\d\d$', d1): # TODO: evaluate it and see if it's a year + if re.match(r"\d\D\b", d2): # 1-digit year partial e.g. 1868-9 + if re.search( + r"\b\d\d\d\d$", d1 + ): # TODO: evaluate it and see if it's a year d2 = d1[-4:-1] + d2 - elif re.match(r'\d\d\b', d2): # 2-digit year partial e.g. 1809-10 - if re.search(r'\b\d\d\d\d$', d1): + elif re.match(r"\d\d\b", d2): # 2-digit year partial e.g. 1809-10 + if re.search(r"\b\d\d\d\d$", d1): d2 = d1[-4:-2] + d2 else: - century_range_match = re.search(r'\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]', "%s-%s" % (d1,d2)) + century_range_match = re.search( + r"\b(\d\d)(th|st|nd|rd|)-(\d\d)(th|st|nd|rd) [cC]", + f"{d1}-{d2}", + ) if century_range_match: g = century_range_match.groups() - d1 = "%sC" % g[0] - d2 = "%sC" % g[2] + d1 = f"{g[0]}C" + d2 = f"{g[2]}C" r1 = text_to_edtf_date(d1) r2 = text_to_edtf_date(d2) @@ -79,7 +85,7 @@ def text_to_edtf(text): else: int_match = re.search(r"(\d\d\d\d)\/(\d\d\d\d)", list_item) if int_match: - return "[%s, %s]" % (int_match.group(1), int_match.group(2)) + return f"[{int_match.group(1)}, {int_match.group(2)}]" result = text_to_edtf_date(list_item) if result: @@ -87,18 +93,17 @@ def text_to_edtf(text): if result: break - is_before = re.findall(r'\bbefore\b', t) - is_before = is_before or re.findall(r'\bearlier\b', t) + is_before = re.findall(r"\bbefore\b", t) + is_before = is_before or re.findall(r"\bearlier\b", t) - is_after = re.findall(r'\bafter\b', t) - is_after = is_after or re.findall(r'\bsince\b', t) - is_after = is_after or re.findall(r'\blater\b', t) + is_after = re.findall(r"\bafter\b", t) + is_after = is_after or re.findall(r"\bsince\b", t) + is_after = is_after or re.findall(r"\blater\b", t) if is_before: result = f"/{result}" # unknown is replaced with null for intervals elif is_after: result = f"{result}/" # unknown is replaced with null for intervals - return result @@ -114,36 +119,34 @@ def text_to_edtf_date(text): return t = text.lower() - result = '' + result = "" for reject_re in REJECT_RULES: if re.match(reject_re, t): return # matches on '1800s'. Needs to happen before is_decade. - could_be_century = re.findall(r'(\d{2}00)s', t) + could_be_century = re.findall(r"(\d{2}00)s", t) # matches on '1800s' and '1910s'. Removes the 's'. # Needs to happen before is_uncertain because e.g. "1860s?" - t, is_decade = re.subn(r'(\d{3}0)s', r'\1', t) + t, is_decade = re.subn(r"(\d{3}0)s", r"\1", t) # detect approximation signifiers # a few 'circa' abbreviations just before the year - is_approximate = re.findall(r'\b(ca?\.?) ?\d{4}', t) + is_approximate = re.findall(r"\b(ca?\.?) ?\d{4}", t) # the word 'circa' anywhere - is_approximate = is_approximate or re.findall(r'\bcirca\b', t) + is_approximate = is_approximate or re.findall(r"\bcirca\b", t) # the word 'approx'/'around'/'about' anywhere - is_approximate = is_approximate or \ - re.findall(r'\b(approx|around|about)', t) + is_approximate = is_approximate or re.findall(r"\b(approx|around|about)", t) # a ~ before a year-ish number - is_approximate = is_approximate or re.findall(r'\b~\d{4}', t) + is_approximate = is_approximate or re.findall(r"\b~\d{4}", t) # a ~ at the beginning - is_approximate = is_approximate or re.findall(r'^~', t) + is_approximate = is_approximate or re.findall(r"^~", t) # detect uncertainty signifiers - t, is_uncertain = re.subn(r'(\d{4})\?', r'\1', t) + t, is_uncertain = re.subn(r"(\d{4})\?", r"\1", t) # the words uncertain/maybe/guess anywhere - is_uncertain = is_uncertain or re.findall( - r'\b(uncertain|possibly|maybe|guess)', t) + is_uncertain = is_uncertain or re.findall(r"\b(uncertain|possibly|maybe|guess)", t) # detect century forms is_century = re.findall(CENTURY_RE, t) @@ -159,20 +162,19 @@ def text_to_edtf_date(text): try: is_bc = is_century[0][-1] in ("bc", "bce") if is_bc: - result = "-%s" % result + result = f"-{result}" except IndexError: pass elif is_ce: result = "%04d" % (int(is_ce[0][0])) - is_approximate = is_approximate or \ - re.findall(r'\b(ca?\.?) ?' + CE_RE, t) - is_uncertain = is_uncertain or re.findall(CE_RE + r'\?', t) + is_approximate = is_approximate or re.findall(r"\b(ca?\.?) ?" + CE_RE, t) + is_uncertain = is_uncertain or re.findall(CE_RE + r"\?", t) try: is_bc = is_ce[0][-1] in ("bc", "bce") if is_bc: - result = "-%s" % result + result = f"-{result}" except IndexError: pass @@ -187,7 +189,7 @@ def text_to_edtf_date(text): dayfirst=appsettings.DAY_FIRST, yearfirst=False, fuzzy=True, # force a match, even if it's default date - default=DEFAULT_DATE_1 + default=DEFAULT_DATE_1, ) dt2 = parse( @@ -195,16 +197,15 @@ def text_to_edtf_date(text): dayfirst=appsettings.DAY_FIRST, yearfirst=False, fuzzy=True, # force a match, even if it's default date - default=DEFAULT_DATE_2 + default=DEFAULT_DATE_2, ) - except ParserError as pe: + except ParserError: return - except Exception as e: + except Exception: return - if dt1.date() == DEFAULT_DATE_1.date() and \ - dt2.date() == DEFAULT_DATE_2.date(): + if dt1.date() == DEFAULT_DATE_1.date() and dt2.date() == DEFAULT_DATE_2.date(): # couldn't parse anything - defaults are untouched. return @@ -212,9 +213,9 @@ def text_to_edtf_date(text): date2 = dt2.isoformat()[:10] # guess precision of 'unspecified' characters to use - mentions_year = re.findall(r'\byear\b.+(in|during)\b', t) - mentions_month = re.findall(r'\bmonth\b.+(in|during)\b', t) - mentions_day = re.findall(r'\bday\b.+(in|during)\b', t) + mentions_year = re.findall(r"\byear\b.+(in|during)\b", t) + mentions_month = re.findall(r"\bmonth\b.+(in|during)\b", t) + mentions_day = re.findall(r"\bday\b.+(in|during)\b", t) for i in xrange(len(date1)): # if the given year could be a century (e.g. '1800s') then use diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 911fc13..290fead 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -1,8 +1,10 @@ +# ruff: noqa: S101 # Asserts are ok in tests + import pytest + from edtf.natlang.en import text_to_edtf # TODO update the tests and code to test and output the new spec - # where examples are tuples, the second item is the normalised output @pytest.mark.parametrize("input_text,expected_output", [ # Ignoring 'late' for simplicity in these examples @@ -184,4 +186,3 @@ def test_natlang(input_text, expected_output): """ result = text_to_edtf(input_text) assert result == expected_output, f"Failed for input: {input_text} - expected {expected_output}, got {result}" - diff --git a/edtf/parser/__init__.py b/edtf/parser/__init__.py index e5a0e5f..43197d5 100644 --- a/edtf/parser/__init__.py +++ b/edtf/parser/__init__.py @@ -1,2 +1,51 @@ -from edtf.parser.grammar import parse_edtf -from edtf.parser.parser_classes import * +from .edtf_exceptions import EDTFParseException +from .grammar import parse_edtf +from .parser_classes import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + EDTFObject, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, + Level2Season, + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, + UnspecifiedIntervalSection, +) + +__all__ = [ + "parse_edtf", + "EDTFParseException", + "EDTFObject", + "Date", + "DateAndTime", + "Interval", + "UA", + "UncertainOrApproximate", + "Unspecified", + "UnspecifiedIntervalSection", + "Level1Interval", + "LongYear", + "Season", + "PartialUncertainOrApproximate", + "PartialUnspecified", + "Consecutives", + "EarlierConsecutives", + "LaterConsecutives", + "OneOfASet", + "MultipleDates", + "Level2Interval", + "Level2Season", + "ExponentialYear", +] diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index d69e719..730f47d 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,26 +1,54 @@ -from pyparsing import Literal as L, ParseException, Opt, Optional, OneOrMore, \ - ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, Group - -# (* ************************** Level 0 *************************** *) -from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ - UncertainOrApproximate, Level1Interval, LongYear, Season, \ - PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ - Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, Level2Season +from pyparsing import ( + Combine, + NotAny, + OneOrMore, + Opt, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + nums, + oneOf, +) +from pyparsing import Literal as L from edtf.parser.edtf_exceptions import EDTFParseException -oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) -oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) -oneThru23 = oneOf(['%.2d' % i for i in range(1, 24)]) -zeroThru23 = oneOf(['%.2d' % i for i in range(0, 24)]) -oneThru29 = oneOf(['%.2d' % i for i in range(1, 30)]) -oneThru30 = oneOf(['%.2d' % i for i in range(1, 31)]) -oneThru31 = oneOf(['%.2d' % i for i in range(1, 32)]) -oneThru59 = oneOf(['%.2d' % i for i in range(1, 60)]) -zeroThru59 = oneOf(['%.2d' % i for i in range(0, 60)]) - -positiveDigit = Word(nums, exact=1, excludeChars='0') +# (* ************************** Level 0 *************************** *) +from edtf.parser.parser_classes import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, + Level2Season, + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, +) + +oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) +oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) +oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) +zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) +oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) +oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) +oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) +oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) +zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) + +positiveDigit = Word(nums, exact=1, excludeChars="0") digit = Word(nums, exact=1) second = zeroThru59 @@ -50,13 +78,10 @@ Date.set_parser(date) zoneOffsetHour = oneThru13 -zoneOffset = L("Z") \ - ^ (Regex("[+-]") - + (zoneOffsetHour + Optional(":" + minute) - ^ L("14:00") - ^ ("00:" + oneThru59) - ) - ) +zoneOffset = L("Z") ^ ( + Regex("[+-]") + + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) +) baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") @@ -100,83 +125,80 @@ # cleanly otherwise the parameter names are overlapped. def f(toks): try: - return {'date': toks[0], 'ua': toks[1]} + return {"date": toks[0], "ua": toks[1]} except IndexError: - return {'date': toks[0], 'ua': None} + return {"date": toks[0], "ua": None} -l1Start = '..' ^ uaDateOrSeason +l1Start = ".." ^ uaDateOrSeason l1Start.addParseAction(f) -l1End = uaDateOrSeason ^ '..' +l1End = uaDateOrSeason ^ ".." l1End.addParseAction(f) -level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") \ - ^ l1Start("lower") + "/" + Optional(l1End("upper")) +level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") ^ l1Start( + "lower" +) + "/" + Optional(l1End("upper")) Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine( - digit + digit + (digit ^ 'X') + 'X' -)("year") +yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") -unspecified = yearWithOneOrTwoUnspecifedDigits \ - ^ monthUnspecified \ - ^ dayUnspecified \ +unspecified = ( + yearWithOneOrTwoUnspecifedDigits + ^ monthUnspecified + ^ dayUnspecified ^ dayAndMonthUnspecified +) Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) -uncertainOrApproxDate = date('date') + UASymbol("ua") +uncertainOrApproxDate = date("date") + UASymbol("ua") UncertainOrApproximate.set_parser(uncertainOrApproxDate) -level1Expression = uncertainOrApproxDate \ - ^ unspecified \ - ^ level1Interval \ - ^ longYearSimple \ - ^ season +level1Expression = ( + uncertainOrApproxDate ^ unspecified ^ level1Interval ^ longYearSimple ^ season +) # (* ************************** Level 2 *************************** *) # (* ** Internal Unspecified** *) -digitOrX = Word(nums + 'X', exact=1) +digitOrX = Word(nums + "X", exact=1) # 2-digit day with at least one 'X' present -dayWithX = Combine( - ("X" + digitOrX) - ^ (digitOrX + 'X') -)("day") +dayWithX = Combine(("X" + digitOrX) ^ (digitOrX + "X"))("day") # 2-digit month with at least one 'X' present -monthWithX = Combine( - oneOf("0X 1X") - ^ ("X" + digitOrX) -)("month") +monthWithX = Combine(oneOf("0X 1X") ^ ("X" + digitOrX))("month") # 4-digit year with at least one 'X' present yearWithX = Combine( - ('X' + digitOrX + digitOrX + digitOrX) - ^ (digitOrX + 'X' + digitOrX + digitOrX) - ^ (digitOrX + digitOrX + 'X' + digitOrX) - ^ (digitOrX + digitOrX + digitOrX + 'X') + ("X" + digitOrX + digitOrX + digitOrX) + ^ (digitOrX + "X" + digitOrX + digitOrX) + ^ (digitOrX + digitOrX + "X" + digitOrX) + ^ (digitOrX + digitOrX + digitOrX + "X") )("year") -yearMonthWithX = ( - (Combine(year("") ^ yearWithX(""))("year") + "-" + monthWithX) - ^ (yearWithX + "-" + month) +yearMonthWithX = (Combine(year("") ^ yearWithX(""))("year") + "-" + monthWithX) ^ ( + yearWithX + "-" + month ) -monthDayWithX = ( - (Combine(month("") ^ monthWithX(""))("month") + "-" + dayWithX) - ^ (monthWithX + "-" + day) +monthDayWithX = (Combine(month("") ^ monthWithX(""))("month") + "-" + dayWithX) ^ ( + monthWithX + "-" + day ) yearMonthDayWithX = ( - (yearWithX + "-" + Combine(month("") ^ monthWithX(""))("month") + "-" + Combine(day("") ^ dayWithX(""))("day")) + ( + yearWithX + + "-" + + Combine(month("") ^ monthWithX(""))("month") + + "-" + + Combine(day("") ^ dayWithX(""))("day") + ) ^ (year + "-" + monthWithX + "-" + Combine(day("") ^ dayWithX(""))("day")) ^ (year + "-" + month + "-" + dayWithX) ) @@ -188,8 +210,9 @@ def f(toks): # group qualification # qualifier right of a component(date, month, day) applies to all components to the left -group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day \ - ^ year + UASymbol("year_ua") + "-" + month + Opt("-" + day) +group_qual = yearMonth + UASymbol("year_month_ua") + "-" + day ^ year + UASymbol( + "year_ua" +) + "-" + month + Opt("-" + day) # component qualification # qualifier immediate left of a component (date, month, day) applies to that component only @@ -197,17 +220,18 @@ def f(toks): qual_month = month ^ UASymbol("month_ua") + month qual_day = day ^ UASymbol("day_ua") + day -indi_qual = UASymbol("year_ua_b") + year + Opt("-" + qual_month + Opt("-" + qual_day)) \ - ^ qual_year + "-" + UASymbol("month_ua") + month + Opt("-" + qual_day) \ +indi_qual = ( + UASymbol("year_ua_b") + year + Opt("-" + qual_month + Opt("-" + qual_day)) + ^ qual_year + "-" + UASymbol("month_ua") + month + Opt("-" + qual_day) ^ qual_year + "-" + qual_month + "-" + UASymbol("day_ua") + day +) partialUncertainOrApproximate = group_qual ^ indi_qual PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) -dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified +dateWithInternalUncertainty = partialUncertainOrApproximate ^ partialUnspecified -qualifyingString = Regex(r'\S') # any nonwhitespace char +qualifyingString = Regex(r"\S") # any nonwhitespace char # (* ** SeasonQualified ** *) seasonQualifier = qualifyingString @@ -215,14 +239,25 @@ def f(toks): # (* ** Long Year - Scientific Form ** *) positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) -longYearScientific = "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + \ - positiveInteger("exponent") + Optional("S" + positiveInteger("precision")) +longYearScientific = ( + "Y" + + Combine(Optional("-") + positiveInteger)("base") + + "E" + + positiveInteger("exponent") + + Optional("S" + positiveInteger("precision")) +) ExponentialYear.set_parser(longYearScientific) # (* ** level2Interval ** *) -level2Interval = (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) +level2Interval = ( + (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) + ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) + ^ ( + dateWithInternalUncertainty("lower") + + "/" + + dateWithInternalUncertainty("upper") + ) +) Level2Interval.set_parser(level2Interval) # (* ** Masked precision ** *) eliminated in latest specs @@ -230,16 +265,20 @@ def f(toks): # MaskedPrecision.set_parser(maskedPrecision) # (* ** Inclusive list and choice list** *) -consecutives = (yearMonthDay("lower") + ".." + yearMonthDay("upper")) \ - ^ (yearMonth("lower") + ".." + yearMonth("upper")) \ +consecutives = ( + (yearMonthDay("lower") + ".." + yearMonthDay("upper")) + ^ (yearMonth("lower") + ".." + yearMonth("upper")) ^ (year("lower") + ".." + year("upper")) +) Consecutives.set_parser(consecutives) -listElement = date \ - ^ dateWithInternalUncertainty \ - ^ uncertainOrApproxDate \ - ^ unspecified \ +listElement = ( + date + ^ dateWithInternalUncertainty + ^ uncertainOrApproxDate + ^ unspecified ^ consecutives +) earlier = L("..").addParseAction(f)("lower") + date("upper").addParseAction(f) later = date("lower").addParseAction(f) + L("..").addParseAction(f)("upper") @@ -248,10 +287,12 @@ def f(toks): LaterConsecutives.set_parser(later) -listContent = (earlier + ZeroOrMore("," + listElement)) \ - ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) \ - ^ (listElement + OneOrMore("," + listElement)) \ +listContent = ( + (earlier + ZeroOrMore("," + listElement)) + ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) + ^ (listElement + OneOrMore("," + listElement)) ^ consecutives +) choiceList = "[" + listContent + "]" OneOfASet.set_parser(choiceList) @@ -265,17 +306,21 @@ def f(toks): l2season = year + "-" + seasonL2Number("season") Level2Season.set_parser(l2season) -level2Expression = partialUncertainOrApproximate \ - ^ partialUnspecified \ - ^ choiceList \ - ^ inclusiveList \ - ^ level2Interval \ - ^ longYearScientific \ - ^ l2season \ +level2Expression = ( + partialUncertainOrApproximate + ^ partialUnspecified + ^ choiceList + ^ inclusiveList + ^ level2Interval + ^ longYearScientific + ^ l2season ^ seasonQualified +) # putting it all together -edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +edtfParser = ( + level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +) def parse_edtf(str, parseAll=True, fail_silently=False): @@ -285,7 +330,7 @@ def parse_edtf(str, parseAll=True, fail_silently=False): p = edtfParser.parseString(str.strip(), parseAll) if p: return p[0] - except ParseException as e: + except ParseException as err: if fail_silently: return None - raise EDTFParseException(e) + raise EDTFParseException(err) from err diff --git a/edtf/parser/grammar_test.py b/edtf/parser/grammar_test.py index 81b2d5d..c8ff727 100644 --- a/edtf/parser/grammar_test.py +++ b/edtf/parser/grammar_test.py @@ -1,26 +1,52 @@ -from pyparsing import Literal as L, ParseException, Optional, Opt, OneOrMore, \ - ZeroOrMore, oneOf, Regex, Combine, Word, NotAny, nums, FollowedBy - -# (* ************************** Level 0 *************************** *) -from edtf.parser.parser_classes import Date, DateAndTime, Interval, Unspecified, \ - UncertainOrApproximate, Level1Interval, LongYear, Season, \ - PartialUncertainOrApproximate, UA, PartialUnspecified, OneOfASet, \ - Consecutives, EarlierConsecutives, LaterConsecutives, MultipleDates, \ - MaskedPrecision, Level2Interval, ExponentialYear, UnspecifiedIntervalSection# , Testi +from pyparsing import ( + Combine, + NotAny, + OneOrMore, + Optional, + ParseException, + Regex, + Word, + ZeroOrMore, + nums, + oneOf, +) +from pyparsing import Literal as L from edtf.parser.edtf_exceptions import EDTFParseException -oneThru12 = oneOf(['%.2d' % i for i in range(1, 13)]) -oneThru13 = oneOf(['%.2d' % i for i in range(1, 14)]) -oneThru23 = oneOf(['%.2d' % i for i in range(1, 24)]) -zeroThru23 = oneOf(['%.2d' % i for i in range(0, 24)]) -oneThru29 = oneOf(['%.2d' % i for i in range(1, 30)]) -oneThru30 = oneOf(['%.2d' % i for i in range(1, 31)]) -oneThru31 = oneOf(['%.2d' % i for i in range(1, 32)]) -oneThru59 = oneOf(['%.2d' % i for i in range(1, 60)]) -zeroThru59 = oneOf(['%.2d' % i for i in range(0, 60)]) - -positiveDigit = Word(nums, exact=1, excludeChars='0') +# (* ************************** Level 0 *************************** *) +from edtf.parser.parser_classes import ( + UA, + Consecutives, + Date, + DateAndTime, + EarlierConsecutives, + ExponentialYear, + Interval, + LaterConsecutives, + Level1Interval, + Level2Interval, # , Testi + LongYear, + MultipleDates, + OneOfASet, + PartialUncertainOrApproximate, + PartialUnspecified, + Season, + UncertainOrApproximate, + Unspecified, +) + +oneThru12 = oneOf(["%.2d" % i for i in range(1, 13)]) +oneThru13 = oneOf(["%.2d" % i for i in range(1, 14)]) +oneThru23 = oneOf(["%.2d" % i for i in range(1, 24)]) +zeroThru23 = oneOf(["%.2d" % i for i in range(0, 24)]) +oneThru29 = oneOf(["%.2d" % i for i in range(1, 30)]) +oneThru30 = oneOf(["%.2d" % i for i in range(1, 31)]) +oneThru31 = oneOf(["%.2d" % i for i in range(1, 32)]) +oneThru59 = oneOf(["%.2d" % i for i in range(1, 60)]) +zeroThru59 = oneOf(["%.2d" % i for i in range(0, 60)]) + +positiveDigit = Word(nums, exact=1, excludeChars="0") digit = Word(nums, exact=1) second = zeroThru59 @@ -50,13 +76,10 @@ Date.set_parser(date) zoneOffsetHour = oneThru13 -zoneOffset = L("Z") \ - ^ (Regex("[+-]") - + (zoneOffsetHour + Optional(":" + minute) - ^ L("14:00") - ^ ("00:" + oneThru59) - ) - ) +zoneOffset = L("Z") ^ ( + Regex("[+-]") + + (zoneOffsetHour + Optional(":" + minute) ^ L("14:00") ^ ("00:" + oneThru59)) +) baseTime = Combine(hour + ":" + minute + ":" + second ^ "24:00:00") @@ -96,92 +119,90 @@ uaDateOrSeason = dateOrSeason + Optional(UASymbol) -#unspecifiedIntervalSec = L('..')('unknownOrOpen') + FollowedBy(L("/") + uaDateOrSeason)('other_section_element') -#Testi.set_parser(unspecifiedIntervalSec) +# unspecifiedIntervalSec = L('..')('unknownOrOpen') + FollowedBy(L("/") + uaDateOrSeason)('other_section_element') +# Testi.set_parser(unspecifiedIntervalSec) + # bit of a kludge here to get the all the relevant tokens into the parse action # cleanly otherwise the parameter names are overlapped. def f(toks): try: - return {'date': toks[0], 'ua': toks[1]} + return {"date": toks[0], "ua": toks[1]} except IndexError: - return {'date': toks[0], 'ua': None} + return {"date": toks[0], "ua": None} -l1Start = '..' ^ uaDateOrSeason -#l1Start = unspecifiedIntervalSec ^ uaDateOrSeason +l1Start = ".." ^ uaDateOrSeason +# l1Start = unspecifiedIntervalSec ^ uaDateOrSeason l1Start.addParseAction(f) -l1End = uaDateOrSeason ^ '..' +l1End = uaDateOrSeason ^ ".." l1End.addParseAction(f) -#level1Interval = l1Start("lower") + "/" + l1End("upper") -level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") \ - ^ l1Start("lower") + "/" + Optional(l1End("upper")) +# level1Interval = l1Start("lower") + "/" + l1End("upper") +level1Interval = Optional(l1Start)("lower") + "/" + l1End("upper") ^ l1Start( + "lower" +) + "/" + Optional(l1End("upper")) Level1Interval.set_parser(level1Interval) # (* *** unspecified *** *) -yearWithOneOrTwoUnspecifedDigits = Combine( - digit + digit + (digit ^ 'X') + 'X' -)("year") +yearWithOneOrTwoUnspecifedDigits = Combine(digit + digit + (digit ^ "X") + "X")("year") monthUnspecified = year + "-" + L("XX")("month") dayUnspecified = yearMonth + "-" + L("XX")("day") dayAndMonthUnspecified = year + "-" + L("XX")("month") + "-" + L("XX")("day") -unspecified = yearWithOneOrTwoUnspecifedDigits \ - ^ monthUnspecified \ - ^ dayUnspecified \ +unspecified = ( + yearWithOneOrTwoUnspecifedDigits + ^ monthUnspecified + ^ dayUnspecified ^ dayAndMonthUnspecified +) Unspecified.set_parser(unspecified) # (* *** uncertainOrApproxDate *** *) -uncertainOrApproxDate = date('date') + UASymbol("ua") +uncertainOrApproxDate = date("date") + UASymbol("ua") UncertainOrApproximate.set_parser(uncertainOrApproxDate) -level1Expression = uncertainOrApproxDate \ - ^ unspecified \ - ^ level1Interval \ - ^ longYearSimple \ - ^ season +level1Expression = ( + uncertainOrApproxDate ^ unspecified ^ level1Interval ^ longYearSimple ^ season +) # (* ************************** Level 2 *************************** *) # (* ** Internal Unspecified** *) -digitOrU = Word(nums + 'X', exact=1) +digitOrU = Word(nums + "X", exact=1) # 2-digit day with at least one 'X' present -dayWithU = Combine( - ("X" + digitOrU) - ^ (digitOrU + 'X') -)("day") +dayWithU = Combine(("X" + digitOrU) ^ (digitOrU + "X"))("day") # 2-digit month with at least one 'X' present -monthWithU = Combine( - oneOf("0X 1X") - ^ ("X" + digitOrU) -)("month") +monthWithU = Combine(oneOf("0X 1X") ^ ("X" + digitOrU))("month") # 4-digit year with at least one 'X' present yearWithU = Combine( - ('X' + digitOrU + digitOrU + digitOrU) - ^ (digitOrU + 'X' + digitOrU + digitOrU) - ^ (digitOrU + digitOrU + 'X' + digitOrU) - ^ (digitOrU + digitOrU + digitOrU + 'X') + ("X" + digitOrU + digitOrU + digitOrU) + ^ (digitOrU + "X" + digitOrU + digitOrU) + ^ (digitOrU + digitOrU + "X" + digitOrU) + ^ (digitOrU + digitOrU + digitOrU + "X") )("year") -yearMonthWithU = ( - (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) - ^ (yearWithU + "-" + month) +yearMonthWithU = (Combine(year("") ^ yearWithU(""))("year") + "-" + monthWithU) ^ ( + yearWithU + "-" + month ) -monthDayWithU = ( - (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) - ^ (monthWithU + "-" + day) +monthDayWithU = (Combine(month("") ^ monthWithU(""))("month") + "-" + dayWithU) ^ ( + monthWithU + "-" + day ) yearMonthDayWithU = ( - (yearWithU + "-" + Combine(month("") ^ monthWithU(""))("month") + "-" + Combine(day("") ^ dayWithU(""))("day")) + ( + yearWithU + + "-" + + Combine(month("") ^ monthWithU(""))("month") + + "-" + + Combine(day("") ^ dayWithU(""))("day") + ) ^ (year + "-" + monthWithU + "-" + Combine(day("") ^ dayWithU(""))("day")) ^ (year + "-" + month + "-" + dayWithU) ) @@ -198,30 +219,52 @@ def f(toks): # second clause below needed Optional() around the "year_ua" UASymbol, for dates # like '(2011)-06-04~' to work. -IUABase = \ - (year_with_brackets + UASymbol("year_ua") + "-" + month + Optional("-(" + day + ")" + UASymbol("day_ua"))) \ - ^ (year_with_brackets + Optional(UASymbol)("year_ua") + "-" + monthDay + Optional(UASymbol)("month_day_ua")) \ +IUABase = ( + ( + year_with_brackets + + UASymbol("year_ua") + + "-" + + month + + Optional("-(" + day + ")" + UASymbol("day_ua")) + ) ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + year_with_brackets + + Optional(UASymbol)("year_ua") + + "-" + + monthDay + + Optional(UASymbol)("month_day_ua") + ) + ^ ( + year_with_brackets + + Optional(UASymbol)("year_ua") + + "-(" + + month + + ")" + + UASymbol("month_ua") + Optional("-(" + day + ")" + UASymbol("day_ua")) - ) \ + ) ^ ( - year_with_brackets + Optional(UASymbol)("year_ua") + "-(" + month + ")" + UASymbol("month_ua") + year_with_brackets + + Optional(UASymbol)("year_ua") + + "-(" + + month + + ")" + + UASymbol("month_ua") + Optional("-" + day) - ) \ - ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) \ - ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) \ - ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) \ - ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) \ + ) + ^ (yearMonth + UASymbol("year_month_ua") + "-(" + day + ")" + UASymbol("day_ua")) + ^ (yearMonth + UASymbol("year_month_ua") + "-" + day) + ^ (yearMonth + "-(" + day + ")" + UASymbol("day_ua")) + ^ (year + "-(" + monthDay + ")" + UASymbol("month_day_ua")) ^ (season("ssn") + UASymbol("season_ua")) +) partialUncertainOrApproximate = IUABase ^ ("(" + IUABase + ")" + UASymbol("all_ua")) PartialUncertainOrApproximate.set_parser(partialUncertainOrApproximate) -dateWithInternalUncertainty = partialUncertainOrApproximate \ - ^ partialUnspecified +dateWithInternalUncertainty = partialUncertainOrApproximate ^ partialUnspecified -qualifyingString = Regex(r'\S') # any nonwhitespace char +qualifyingString = Regex(r"\S") # any nonwhitespace char # (* ** SeasonQualified ** *) seasonQualifier = qualifyingString @@ -229,14 +272,25 @@ def f(toks): # (* ** Long Year - Scientific Form ** *) positiveInteger = Combine(positiveDigit + ZeroOrMore(digit)) -longYearScientific = "Y" + Combine(Optional("-") + positiveInteger)("base") + "E" + \ - positiveInteger("exponent") + Optional("S" + positiveInteger("precision")) +longYearScientific = ( + "Y" + + Combine(Optional("-") + positiveInteger)("base") + + "E" + + positiveInteger("exponent") + + Optional("S" + positiveInteger("precision")) +) ExponentialYear.set_parser(longYearScientific) # (* ** level2Interval ** *) -level2Interval = (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) \ - ^ (dateWithInternalUncertainty("lower") + "/" + dateWithInternalUncertainty("upper")) +level2Interval = ( + (dateOrSeason("lower") + "/" + dateWithInternalUncertainty("upper")) + ^ (dateWithInternalUncertainty("lower") + "/" + dateOrSeason("upper")) + ^ ( + dateWithInternalUncertainty("lower") + + "/" + + dateWithInternalUncertainty("upper") + ) +) Level2Interval.set_parser(level2Interval) # (* ** Masked precision ** *) eliminated in latest specs @@ -244,26 +298,32 @@ def f(toks): # MaskedPrecision.set_parser(maskedPrecision) # (* ** Inclusive list and choice list** *) -consecutives = (yearMonthDay("lower") + ".." + yearMonthDay("upper")) \ - ^ (yearMonth("lower") + ".." + yearMonth("upper")) \ +consecutives = ( + (yearMonthDay("lower") + ".." + yearMonthDay("upper")) + ^ (yearMonth("lower") + ".." + yearMonth("upper")) ^ (year("lower") + ".." + year("upper")) +) Consecutives.set_parser(consecutives) -listElement = date \ - ^ dateWithInternalUncertainty \ - ^ uncertainOrApproxDate \ - ^ unspecified \ +listElement = ( + date + ^ dateWithInternalUncertainty + ^ uncertainOrApproxDate + ^ unspecified ^ consecutives +) earlier = ".." + date("upper") EarlierConsecutives.set_parser(earlier) later = date("lower") + ".." LaterConsecutives.set_parser(later) -listContent = (earlier + ZeroOrMore("," + listElement)) \ - ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) \ - ^ (listElement + OneOrMore("," + listElement)) \ +listContent = ( + (earlier + ZeroOrMore("," + listElement)) + ^ (Optional(earlier + ",") + ZeroOrMore(listElement + ",") + later) + ^ (listElement + OneOrMore("," + listElement)) ^ consecutives +) choiceList = "[" + listContent + "]" OneOfASet.set_parser(choiceList) @@ -271,16 +331,20 @@ def f(toks): inclusiveList = "{" + listContent + "}" MultipleDates.set_parser(inclusiveList) -level2Expression = partialUncertainOrApproximate \ - ^ partialUnspecified \ - ^ choiceList \ - ^ inclusiveList \ - ^ level2Interval \ - ^ longYearScientific \ +level2Expression = ( + partialUncertainOrApproximate + ^ partialUnspecified + ^ choiceList + ^ inclusiveList + ^ level2Interval + ^ longYearScientific ^ seasonQualified +) # putting it all together -edtfParser = level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +edtfParser = ( + level0Expression("level0") ^ level1Expression("level1") ^ level2Expression("level2") +) def parse_edtf(str, parseAll=True, fail_silently=False): @@ -290,7 +354,7 @@ def parse_edtf(str, parseAll=True, fail_silently=False): p = edtfParser.parseString(str.strip(), parseAll) if p: return p[0] - except ParseException as e: + except ParseException as err: if fail_silently: return None - raise EDTFParseException(e) + raise EDTFParseException(err) from err diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 3b5ac6e..2b4368a 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -1,17 +1,22 @@ import calendar +import math import re -from time import struct_time from datetime import date, datetime from operator import add, sub -import math +from time import struct_time + from dateutil.relativedelta import relativedelta from edtf import appsettings -from edtf.convert import dt_to_struct_time, trim_struct_time, \ - TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS +from edtf.convert import ( + TIME_EMPTY_EXTRAS, + TIME_EMPTY_TIME, + dt_to_struct_time, + trim_struct_time, +) -EARLIEST = 'earliest' -LATEST = 'latest' +EARLIEST = "earliest" +LATEST = "latest" PRECISION_MILLENIUM = "millenium" PRECISION_CENTURY = "century" @@ -80,14 +85,16 @@ def apply_delta(op, time_struct, delta): # Convert result year back to its original millenium final_year = dt_result.year - millenium_diff return struct_time( - (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS)) + (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS) + ) -class EDTFObject(object): +class EDTFObject: """ Object to attact to a parser to become instantiated when the parser completes. """ + parser = None @classmethod @@ -99,9 +106,9 @@ def set_parser(cls, p): def parse_action(cls, toks): kwargs = toks.asDict() try: - return cls(**kwargs) # replace the token list with the class + return cls(**kwargs) # replace the token list with the class except Exception as e: - print("trying to %s.__init__(**%s)" % (cls.__name__, kwargs)) + print(f"trying to {cls.__name__}.__init__(**{kwargs})") raise e @classmethod @@ -109,14 +116,11 @@ def parse(cls, s): return cls.parser.parseString(s)[0] def __repr__(self): - return "%s: '%s'" % (type(self).__name__, str(self)) + return f"{type(self).__name__}: '{str(self)}'" def __init__(self, *args, **kwargs): - str = "%s.__init__(*%s, **%s)" % ( - type(self).__name__, - args, kwargs, - ) - raise NotImplementedError("%s is not implemented." % str) + str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" + raise NotImplementedError(f"{str} is not implemented.") def __str__(self): raise NotImplementedError @@ -137,25 +141,30 @@ def _get_fuzzy_padding(self, lean): return relativedelta(0) def get_is_approximate(self): - return getattr(self, '_is_approximate', False) + return getattr(self, "_is_approximate", False) def set_is_approximate(self, val): self._is_approximate = val + is_approximate = property(get_is_approximate, set_is_approximate) def get_is_uncertain(self): - return getattr(self, '_is_uncertain', False) + return getattr(self, "_is_uncertain", False) def set_is_uncertain(self, val): self._is_uncertain = val + is_uncertain = property(get_is_uncertain, set_is_uncertain) def get_is_uncertain_and_approximate(self): - return getattr(self, '_uncertain_and_approximate', False) + return getattr(self, "_uncertain_and_approximate", False) def set_is_uncertain_and_approximate(self, val): self._uncertain_and_approximate = val - is_uncertain_and_approximate = property(get_is_uncertain_and_approximate, set_is_uncertain_and_approximate) + + is_uncertain_and_approximate = property( + get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + ) def lower_fuzzy(self): strict_val = self.lower_strict() @@ -190,7 +199,9 @@ def __gt__(self, other): return self.lower_strict() > dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() > trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __ge__(self, other): if isinstance(other, EDTFObject): @@ -199,7 +210,9 @@ def __ge__(self, other): return self.lower_strict() >= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() >= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __lt__(self, other): if isinstance(other, EDTFObject): @@ -208,7 +221,9 @@ def __lt__(self, other): return self.lower_strict() < dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() < trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __le__(self, other): if isinstance(other, EDTFObject): @@ -217,13 +232,15 @@ def __le__(self, other): return self.lower_strict() <= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() <= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) # (* ************************** Level 0 *************************** *) -class Date(EDTFObject): +class Date(EDTFObject): def set_year(self, y): if y is None: raise AttributeError("Year must not be None") @@ -231,33 +248,35 @@ def set_year(self, y): def get_year(self): return self._year + year = property(get_year, set_year) def set_month(self, m): self._month = m - if m == None: + if m is None: self.day = None def get_month(self): return self._month + month = property(get_month, set_month) def __init__(self, year=None, month=None, day=None, **kwargs): - for param in ('date', 'lower', 'upper'): + for param in ("date", "lower", "upper"): if param in kwargs: self.__init__(**kwargs[param]) return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day def __str__(self): r = self.year if self.month: - r += "-%s" % self.month + r += f"-{self.month}" if self.day: - r += "-%s" % self.day + r += f"-{self.day}" return r def isoformat(self, default=date.max): @@ -270,16 +289,18 @@ def isoformat(self, default=date.max): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r'X', r'0', self.year)) + return int(re.sub(r"X", r"0", self.year)) else: - return int(re.sub(r'X', r'9', self.year)) + return int(re.sub(r"X", r"9", self.year)) def _precise_month(self, lean): if self.month and self.month != "XX": try: return int(self.month) - except ValueError as e: - raise ValueError("Couldn't convert %s to int (in %s)" % (self.month, self)) + except ValueError as err: + raise ValueError( + f"Couldn't convert {self.month} to int (in {self})" + ) from err else: return 1 if lean == EARLIEST else 12 @@ -303,7 +324,9 @@ def _strict_date(self, lean): self._precise_year(lean), self._precise_month(lean), self._precise_day(lean), - ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) ) @property @@ -334,14 +357,14 @@ def __eq__(self, other): return self.isoformat() == other.isoformat() elif isinstance(other, struct_time): return self._strict_date() == trim_struct_time(other) - return super(DateAndTime, self).__eq__(other) + return super().__eq__(other) def __ne__(self, other): if isinstance(other, datetime): return self.isoformat() != other.isoformat() elif isinstance(other, struct_time): return self._strict_date() != trim_struct_time(other) - return super(DateAndTime, self).__ne__(other) + return super().__ne__(other) class Interval(EDTFObject): @@ -350,7 +373,7 @@ def __init__(self, lower, upper): self.upper = upper def __str__(self): - return "%s/%s" % (self.lower, self.upper) + return f"{self.lower}/{self.upper}" def _strict_date(self, lean): if lean == EARLIEST: @@ -365,6 +388,7 @@ def precision(self): return self.lower.precision return None + # (* ************************** Level 1 *************************** *) @@ -375,7 +399,8 @@ def parse_action(cls, toks): return cls(*args) def __init__(self, *args): - assert len(args) == 1 + if len(args) != 1: + raise AssertionError("UA must have exactly one argument") ua = args[0] self.is_uncertain = "?" in ua @@ -408,7 +433,7 @@ def __init__(self, date, ua): def __str__(self): if self.ua: - return "%s%s" % (self.date, self.ua) + return f"{self.date}{self.ua}" else: return str(self.date) @@ -429,7 +454,6 @@ def _get_fuzzy_padding(self, lean): class UnspecifiedIntervalSection(EDTFObject): - def __init__(self, sectionOpen=False, other_section_element=None): if sectionOpen: self.is_open = True @@ -471,19 +495,27 @@ class Unspecified(Date): class Level1Interval(Interval): def __init__(self, lower=None, upper=None): if lower: - if lower['date'] == '..': - self.lower = UnspecifiedIntervalSection(True, UncertainOrApproximate(**upper)) + if lower["date"] == "..": + self.lower = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**upper) + ) else: self.lower = UncertainOrApproximate(**lower) else: - self.lower = UnspecifiedIntervalSection(False, UncertainOrApproximate(**upper)) + self.lower = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**upper) + ) if upper: - if upper['date'] == '..': - self.upper = UnspecifiedIntervalSection(True, UncertainOrApproximate(**lower)) + if upper["date"] == "..": + self.upper = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**lower) + ) else: self.upper = UncertainOrApproximate(**upper) else: - self.upper = UnspecifiedIntervalSection(False, UncertainOrApproximate(**lower)) + self.upper = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**lower) + ) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -497,7 +529,7 @@ def __init__(self, year): self.year = year def __str__(self): - return "Y%s" % self.year + return f"Y{self.year}" def _precise_year(self): return int(self.year) @@ -505,23 +537,21 @@ def _precise_year(self): def _strict_date(self, lean): py = self._precise_year() if lean == EARLIEST: - return struct_time( - [py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) else: - return struct_time( - [py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) class Season(Date): def __init__(self, year, season, **kwargs): self.year = year - self.season = season # use season to look up month + self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited # `Date` methods do their thing. self.day = None def __str__(self): - return "%s-%s" % (self.year, self.season) + return f"{self.year}-{self.season}" def _precise_month(self, lean): rng = appsettings.SEASON_L2_MONTHS_RANGE[int(self.season)] @@ -535,16 +565,25 @@ def _precise_month(self, lean): class PartialUncertainOrApproximate(Date): - - def set_year(self, y): # Year can be None. + def set_year(self, y): # Year can be None. self._year = y + year = property(Date.get_year, set_year) def __init__( - self, year=None, month=None, day=None, - year_ua=False, month_ua = False, day_ua = False, - year_month_ua = False, month_day_ua = False, - ssn=None, season_ua=False, all_ua=False, year_ua_b = False + self, + year=None, + month=None, + day=None, + year_ua=False, + month_ua=False, + day_ua=False, + year_month_ua=False, + month_day_ua=False, + ssn=None, + season_ua=False, + all_ua=False, + year_ua_b=False, ): self.year = year self.month = month @@ -564,67 +603,51 @@ def __init__( self.all_ua = all_ua def __str__(self): - if self.season_ua: - return "%s%s" % (self.season, self.season_ua) + return f"{self.season}{self.season_ua}" if self.year_ua: - y = "%s%s" % (self.year, self.year_ua) + y = f"{self.year}{self.year_ua}" else: - if self.year_ua_b: - y = "%s%s" % (self.year_ua_b, self.year) - else: - y = str(self.year) + y = f"{self.year_ua_b}{self.year}" if self.year_ua_b else str(self.year) - if self.month_ua: - m = "%s%s" % (self.month_ua, self.month) - else: - m = str(self.month) + m = f"{self.month_ua}{self.month}" if self.month_ua else str(self.month) if self.day: - if self.day_ua: - d = "%s%s" % (self.day_ua, self.day) - else: - d = str(self.day) + d = f"{self.day_ua}{self.day}" if self.day_ua else str(self.day) else: d = None - if self.year_month_ua: # year/month approximate. No brackets needed. - ym = "%s-%s%s" % (y, m, self.year_month_ua) - if d: - result = "%s-%s" % (ym, d) - else: - result = ym + if self.year_month_ua: # year/month approximate. No brackets needed. + ym = f"{y}-{m}{self.year_month_ua}" + result = f"{ym}-{d}" if d else ym elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + if self.year_ua: # we don't need the brackets round month and day + result = f"{y}-{m}-{d}{self.month_day_ua}" else: - result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + result = f"{y}-({m}-{d}){self.month_day_ua}" else: - if d: - result = "%s-%s-%s" % (y, m, d) - else: - result = "%s-%s" % (y, m) + result = f"{y}-{m}-{d}" if d else f"{y}-{m}" if self.all_ua: - result = "(%s)%s" % (result, self.all_ua) + result = f"({result}){self.all_ua}" return result def _precise_year(self, lean): if self.season: return self.season._precise_year(lean) - return super(PartialUncertainOrApproximate, self)._precise_year(lean) + return super()._precise_year(lean) def _precise_month(self, lean): if self.season: return self.season._precise_month(lean) - return super(PartialUncertainOrApproximate, self)._precise_month(lean) + return super()._precise_month(lean) def _precise_day(self, lean): if self.season: return self.season._precise_day(lean) - return super(PartialUncertainOrApproximate, self)._precise_day(lean) + return super()._precise_day(lean) def _get_fuzzy_padding(self, lean): """ @@ -635,23 +658,42 @@ def _get_fuzzy_padding(self, lean): result = relativedelta(0) if self.year_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + ) if self.year_ua_b: - result += appsettings.PADDING_YEAR_PRECISION * self.year_ua_b._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION * self.year_ua_b._get_multiplier() + ) if self.month_ua: - result += appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + result += ( + appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + ) if self.day_ua: result += appsettings.PADDING_DAY_PRECISION * self.day_ua._get_multiplier() if self.year_month_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_month_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.year_month_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION + * self.year_month_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.year_month_ua._get_multiplier() + ) if self.month_day_ua: - result += appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.month_day_ua._get_multiplier() + result += ( + appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.month_day_ua._get_multiplier() + ) if self.season_ua: - result += appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + result += ( + appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + ) if self.all_ua: multiplier = self.all_ua._get_multiplier() @@ -687,17 +729,17 @@ def __init__(self, lower=None, upper=None): self.upper = upper def __str__(self): - return "%s..%s" % (self.lower or '', self.upper or '') + return "{}..{}".format(self.lower or "", self.upper or "") class EarlierConsecutives(Level1Interval): def __str__(self): - return "%s%s" % (self.lower, self.upper) + return f"{self.lower}{self.upper}" class LaterConsecutives(Level1Interval): def __str__(self): - return "%s%s" % (self.lower, self.upper) + return f"{self.lower}{self.upper}" class OneOfASet(EDTFObject): @@ -710,21 +752,27 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "[%s]" % (", ".join([str(o) for o in self.objects])) + return "[{}]".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): strict_dates = [x._strict_date(lean) for x in self.objects] # Accounting for possible 'inf' and '-inf' values if lean == LATEST: - if any(isinstance(d, float) and d == float('inf') for d in strict_dates): - return float('inf') + if any(isinstance(d, float) and d == float("inf") for d in strict_dates): + return float("inf") else: - return max((d for d in strict_dates if not isinstance(d, float)), default=float('inf')) + return max( + (d for d in strict_dates if not isinstance(d, float)), + default=float("inf"), + ) else: - if any(isinstance(d, float) and d == float('-inf') for d in strict_dates): - return float('-inf') + if any(isinstance(d, float) and d == float("-inf") for d in strict_dates): + return float("-inf") else: - return min((d for d in strict_dates if not isinstance(d, float)), default=float('-inf')) + return min( + (d for d in strict_dates if not isinstance(d, float)), + default=float("-inf"), + ) class MultipleDates(EDTFObject): @@ -737,7 +785,7 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "{%s}" % (", ".join([str(o) for o in self.objects])) + return "{{{}}}".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): if lean == LATEST: @@ -769,6 +817,7 @@ def __init__(self, lower, upper): class Level2Season(Season): pass + class ExponentialYear(LongYear): def __init__(self, base, exponent, precision=None): self.base = base @@ -780,7 +829,8 @@ def _precise_year(self): def get_year(self): if self.precision: - return '%sE%sS%s' % (self.base, self.exponent, self.precision) + return f"{self.base}E{self.exponent}S{self.precision}" else: - return '%sE%s' % (self.base, self.exponent) + return f"{self.base}E{self.exponent}" + year = property(get_year) diff --git a/edtf/parser/parser_classes_tests.py b/edtf/parser/parser_classes_tests.py index 2cf330e..857d0f6 100644 --- a/edtf/parser/parser_classes_tests.py +++ b/edtf/parser/parser_classes_tests.py @@ -1,17 +1,23 @@ +# ruff: noqa: S101 # Asserts are ok in tests + import calendar import re -from time import struct_time from datetime import date, datetime from operator import add, sub +from time import struct_time from dateutil.relativedelta import relativedelta from edtf import appsettings -from edtf.convert import dt_to_struct_time, trim_struct_time, \ - TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS +from edtf.convert import ( + TIME_EMPTY_EXTRAS, + TIME_EMPTY_TIME, + dt_to_struct_time, + trim_struct_time, +) -EARLIEST = 'earliest' -LATEST = 'latest' +EARLIEST = "earliest" +LATEST = "latest" PRECISION_MILLENIUM = "millenium" PRECISION_CENTURY = "century" @@ -80,14 +86,16 @@ def apply_delta(op, time_struct, delta): # Convert result year back to its original millenium final_year = dt_result.year - millenium_diff return struct_time( - (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS)) + (final_year,) + dt_result.timetuple()[1:6] + tuple(TIME_EMPTY_EXTRAS) + ) -class EDTFObject(object): +class EDTFObject: """ Object to attact to a parser to become instantiated when the parser completes. """ + parser = None @classmethod @@ -99,9 +107,9 @@ def set_parser(cls, p): def parse_action(cls, toks): kwargs = toks.asDict() try: - return cls(**kwargs) # replace the token list with the class + return cls(**kwargs) # replace the token list with the class except Exception as e: - print("trying to %s.__init__(**%s)" % (cls.__name__, kwargs)) + print(f"trying to {cls.__name__}.__init__(**{kwargs})") raise e @classmethod @@ -109,14 +117,11 @@ def parse(cls, s): return cls.parser.parseString(s)[0] def __repr__(self): - return "%s: '%s'" % (type(self).__name__, str(self)) + return f"{type(self).__name__}: '{str(self)}'" def __init__(self, *args, **kwargs): - str = "%s.__init__(*%s, **%s)" % ( - type(self).__name__, - args, kwargs, - ) - raise NotImplementedError("%s is not implemented." % str) + str = f"{type(self).__name__}.__init__(*{args}, **{kwargs})" + raise NotImplementedError(f"{str} is not implemented.") def __str__(self): raise NotImplementedError @@ -137,25 +142,30 @@ def _get_fuzzy_padding(self, lean): return relativedelta(0) def get_is_approximate(self): - return getattr(self, '_is_approximate', False) + return getattr(self, "_is_approximate", False) def set_is_approximate(self, val): self._is_approximate = val + is_approximate = property(get_is_approximate, set_is_approximate) def get_is_uncertain(self): - return getattr(self, '_is_uncertain', False) + return getattr(self, "_is_uncertain", False) def set_is_uncertain(self, val): self._is_uncertain = val + is_uncertain = property(get_is_uncertain, set_is_uncertain) def get_is_uncertain_and_approximate(self): - return getattr(self, '_uncertain_and_approximate', False) + return getattr(self, "_uncertain_and_approximate", False) def set_is_uncertain_and_approximate(self, val): self._uncertain_and_approximate = val - is_uncertain_and_approximate = property(get_is_uncertain_and_approximate, set_is_uncertain_and_approximate) + + is_uncertain_and_approximate = property( + get_is_uncertain_and_approximate, set_is_uncertain_and_approximate + ) def lower_fuzzy(self): strict_val = self.lower_strict() @@ -190,7 +200,9 @@ def __gt__(self, other): return self.lower_strict() > dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() > trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __ge__(self, other): if isinstance(other, EDTFObject): @@ -199,7 +211,9 @@ def __ge__(self, other): return self.lower_strict() >= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() >= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __lt__(self, other): if isinstance(other, EDTFObject): @@ -208,7 +222,9 @@ def __lt__(self, other): return self.lower_strict() < dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() < trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) def __le__(self, other): if isinstance(other, EDTFObject): @@ -217,13 +233,15 @@ def __le__(self, other): return self.lower_strict() <= dt_to_struct_time(other) elif isinstance(other, struct_time): return self.lower_strict() <= trim_struct_time(other) - raise TypeError("can't compare %s with %s" % (type(self).__name__, type(other).__name__)) + raise TypeError( + f"can't compare {type(self).__name__} with {type(other).__name__}" + ) # (* ************************** Level 0 *************************** *) -class Date(EDTFObject): +class Date(EDTFObject): def set_year(self, y): if y is None: raise AttributeError("Year must not be None") @@ -231,33 +249,35 @@ def set_year(self, y): def get_year(self): return self._year + year = property(get_year, set_year) def set_month(self, m): self._month = m - if m == None: + if m is None: self.day = None def get_month(self): return self._month + month = property(get_month, set_month) def __init__(self, year=None, month=None, day=None, **kwargs): - for param in ('date', 'lower', 'upper'): + for param in ("date", "lower", "upper"): if param in kwargs: self.__init__(**kwargs[param]) return - self.year = year # Year is required, but sometimes passed in as a 'date' dict. + self.year = year # Year is required, but sometimes passed in as a 'date' dict. self.month = month self.day = day def __str__(self): r = self.year if self.month: - r += "-%s" % self.month + r += f"-{self.month}" if self.day: - r += "-%s" % self.day + r += f"-{self.day}" return r def isoformat(self, default=date.max): @@ -270,16 +290,18 @@ def isoformat(self, default=date.max): def _precise_year(self, lean): # Replace any ambiguous characters in the year string with 0s or 9s if lean == EARLIEST: - return int(re.sub(r'X', r'0', self.year)) + return int(re.sub(r"X", r"0", self.year)) else: - return int(re.sub(r'X', r'9', self.year)) + return int(re.sub(r"X", r"9", self.year)) def _precise_month(self, lean): if self.month and self.month != "XX": try: return int(self.month) - except ValueError as e: - raise ValueError("Couldn't convert %s to int (in %s)" % (self.month, self)) + except ValueError as err: + raise ValueError( + f"Couldn't convert {self.month} to int (in {self})" + ) from err else: return 1 if lean == EARLIEST else 12 @@ -303,7 +325,9 @@ def _strict_date(self, lean): self._precise_year(lean), self._precise_month(lean), self._precise_day(lean), - ) + tuple(TIME_EMPTY_TIME) + tuple(TIME_EMPTY_EXTRAS) + ) + + tuple(TIME_EMPTY_TIME) + + tuple(TIME_EMPTY_EXTRAS) ) @property @@ -334,14 +358,14 @@ def __eq__(self, other): return self.isoformat() == other.isoformat() elif isinstance(other, struct_time): return self._strict_date() == trim_struct_time(other) - return super(DateAndTime, self).__eq__(other) + return super().__eq__(other) def __ne__(self, other): if isinstance(other, datetime): return self.isoformat() != other.isoformat() elif isinstance(other, struct_time): return self._strict_date() != trim_struct_time(other) - return super(DateAndTime, self).__ne__(other) + return super().__ne__(other) class Interval(EDTFObject): @@ -350,7 +374,7 @@ def __init__(self, lower, upper): self.upper = upper def __str__(self): - return "%s/%s" % (self.lower, self.upper) + return f"{self.lower}/{self.upper}" def _strict_date(self, lean): if lean == EARLIEST: @@ -359,7 +383,9 @@ def _strict_date(self, lean): if r is None: raise AttributeError return r - except AttributeError: # it's a string, or no date. Result depends on the upper date + except ( + AttributeError + ): # it's a string, or no date. Result depends on the upper date upper = self.upper._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: @@ -368,8 +394,9 @@ def _strict_date(self, lean): if r is None: raise AttributeError return r - except AttributeError: # an 'unknown' or 'open' string - depends on the lower date - import pdb; pdb.set_trace() + except ( + AttributeError + ): # an 'unknown' or 'open' string - depends on the lower date if self.upper and (self.upper == "open" or self.upper.date == "open"): return dt_to_struct_time(date.today()) # it's still happening else: @@ -420,16 +447,16 @@ def __init__(self, date, ua): def __str__(self): if self.ua: - return "%s%s" % (self.date, self.ua) + return f"{self.date}{self.ua}" else: return str(self.date) def _strict_date(self, lean): if self.date == "open": - return None # depends on the other date + return None # depends on the other date return dt_to_struct_time(date.today()) - if self.date =="unknown": - return None # depends on the other date + if self.date == "unknown": + return None # depends on the other date return self.date._strict_date(lean) def _get_fuzzy_padding(self, lean): @@ -445,7 +472,6 @@ def _get_fuzzy_padding(self, lean): return multiplier * appsettings.PADDING_YEAR_PRECISION - class Testi(EDTFObject): # @classmethod # def parse_action(cls, toks): @@ -455,8 +481,8 @@ class Testi(EDTFObject): def __init__(self, **args): print(args) -class UnspecifiedIntervalSection(EDTFObject): +class UnspecifiedIntervalSection(EDTFObject): def __init__(self, sectionOpen=False, other_section_element=None): if sectionOpen: self.is_open = True @@ -473,13 +499,14 @@ def __str__(self): return ".." def _strict_date(self, lean): - #import pdb; pdb.set_trace() if lean == EARLIEST: if self.is_unknown: upper = self.other._strict_date(LATEST) return apply_delta(sub, upper, appsettings.DELTA_IF_UNKNOWN) else: - return dt_to_struct_time(date.min) # from the beginning of time; *ahem, i mean python datetime + return dt_to_struct_time( + date.min + ) # from the beginning of time; *ahem, i mean python datetime else: if self.is_unknown: lower = self.other._strict_date(EARLIEST) @@ -494,21 +521,28 @@ class Unspecified(Date): class Level1Interval(Interval): def __init__(self, lower=None, upper=None): - #import pdb; pdb.set_trace() if lower: - if lower['date'] == '..': - self.lower = UnspecifiedIntervalSection(True, UncertainOrApproximate(**upper)) + if lower["date"] == "..": + self.lower = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**upper) + ) else: self.lower = UncertainOrApproximate(**lower) else: - self.lower = UnspecifiedIntervalSection(False, UncertainOrApproximate(**upper)) + self.lower = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**upper) + ) if upper: - if upper['date'] == '..': - self.upper = UnspecifiedIntervalSection(True, UncertainOrApproximate(**lower)) + if upper["date"] == "..": + self.upper = UnspecifiedIntervalSection( + True, UncertainOrApproximate(**lower) + ) else: self.upper = UncertainOrApproximate(**upper) else: - self.upper = UnspecifiedIntervalSection(False, UncertainOrApproximate(**lower)) + self.upper = UnspecifiedIntervalSection( + False, UncertainOrApproximate(**lower) + ) def _get_fuzzy_padding(self, lean): if lean == EARLIEST: @@ -522,7 +556,7 @@ def __init__(self, year): self.year = year def __str__(self): - return "Y%s" % self.year + return f"Y{self.year}" def _precise_year(self): return int(self.year) @@ -530,23 +564,21 @@ def _precise_year(self): def _strict_date(self, lean): py = self._precise_year() if lean == EARLIEST: - return struct_time( - [py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 1, 1] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) else: - return struct_time( - [py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) + return struct_time([py, 12, 31] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) class Season(Date): def __init__(self, year, season, **kwargs): self.year = year - self.season = season # use season to look up month + self.season = season # use season to look up month # day isn't part of the 'season' spec, but it helps the inherited # `Date` methods do their thing. self.day = None def __str__(self): - return "%s-%s" % (self.year, self.season) + return f"{self.year}-{self.season}" def _precise_month(self, lean): rng = appsettings.SEASON_MONTHS_RANGE[int(self.season)] @@ -560,16 +592,24 @@ def _precise_month(self, lean): class PartialUncertainOrApproximate(Date): - - def set_year(self, y): # Year can be None. + def set_year(self, y): # Year can be None. self._year = y + year = property(Date.get_year, set_year) def __init__( - self, year=None, month=None, day=None, - year_ua=False, month_ua = False, day_ua = False, - year_month_ua = False, month_day_ua = False, - ssn=None, season_ua=False, all_ua=False + self, + year=None, + month=None, + day=None, + year_ua=False, + month_ua=False, + day_ua=False, + year_month_ua=False, + month_day_ua=False, + ssn=None, + season_ua=False, + all_ua=False, ): self.year = year self.month = month @@ -588,64 +628,48 @@ def __init__( self.all_ua = all_ua def __str__(self): - if self.season_ua: - return "%s%s" % (self.season, self.season_ua) + return f"{self.season}{self.season_ua}" - if self.year_ua: - y = "%s%s" % (self.year, self.year_ua) - else: - y = str(self.year) + y = f"{self.year}{self.year_ua}" if self.year_ua else str(self.year) - if self.month_ua: - m = "(%s)%s" % (self.month, self.month_ua) - else: - m = str(self.month) + m = f"({self.month}){self.month_ua}" if self.month_ua else str(self.month) if self.day: - if self.day_ua: - d = "(%s)%s" % (self.day, self.day_ua) - else: - d = str(self.day) + d = f"({self.day}){self.day_ua}" if self.day_ua else str(self.day) else: d = None - if self.year_month_ua: # year/month approximate. No brackets needed. - ym = "%s-%s%s" % (y, m, self.year_month_ua) - if d: - result = "%s-%s" % (ym, d) - else: - result = ym + if self.year_month_ua: # year/month approximate. No brackets needed. + ym = f"{y}-{m}{self.year_month_ua}" + result = f"{ym}-{d}" if d else ym elif self.month_day_ua: - if self.year_ua: # we don't need the brackets round month and day - result = "%s-%s-%s%s" % (y, m, d, self.month_day_ua) + if self.year_ua: # we don't need the brackets round month and day + result = f"{y}-{m}-{d}{self.month_day_ua}" else: - result = "%s-(%s-%s)%s" % (y, m, d, self.month_day_ua) + result = f"{y}-({m}-{d}){self.month_day_ua}" else: - if d: - result = "%s-%s-%s" % (y, m, d) - else: - result = "%s-%s" % (y, m) + result = f"{y}-{m}-{d}" if d else f"{y}-{m}" if self.all_ua: - result = "(%s)%s" % (result, self.all_ua) + result = f"({result}){self.all_ua}" return result def _precise_year(self, lean): if self.season: return self.season._precise_year(lean) - return super(PartialUncertainOrApproximate, self)._precise_year(lean) + return super()._precise_year(lean) def _precise_month(self, lean): if self.season: return self.season._precise_month(lean) - return super(PartialUncertainOrApproximate, self)._precise_month(lean) + return super()._precise_month(lean) def _precise_day(self, lean): if self.season: return self.season._precise_day(lean) - return super(PartialUncertainOrApproximate, self)._precise_day(lean) + return super()._precise_day(lean) def _get_fuzzy_padding(self, lean): """ @@ -656,21 +680,38 @@ def _get_fuzzy_padding(self, lean): result = relativedelta(0) if self.year_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION * self.year_ua._get_multiplier() + ) if self.month_ua: - result += appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + result += ( + appsettings.PADDING_MONTH_PRECISION * self.month_ua._get_multiplier() + ) if self.day_ua: result += appsettings.PADDING_DAY_PRECISION * self.day_ua._get_multiplier() if self.year_month_ua: - result += appsettings.PADDING_YEAR_PRECISION * self.year_month_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.year_month_ua._get_multiplier() + result += ( + appsettings.PADDING_YEAR_PRECISION + * self.year_month_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.year_month_ua._get_multiplier() + ) if self.month_day_ua: - result += appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() - result += appsettings.PADDING_MONTH_PRECISION * self.month_day_ua._get_multiplier() + result += ( + appsettings.PADDING_DAY_PRECISION * self.month_day_ua._get_multiplier() + ) + result += ( + appsettings.PADDING_MONTH_PRECISION + * self.month_day_ua._get_multiplier() + ) if self.season_ua: - result += appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + result += ( + appsettings.PADDING_SEASON_PRECISION * self.season_ua._get_multiplier() + ) if self.all_ua: multiplier = self.all_ua._get_multiplier() @@ -706,7 +747,7 @@ def __init__(self, lower=None, upper=None): self.upper = upper def __str__(self): - return "%s..%s" % (self.lower or '', self.upper or '') + return "{}..{}".format(self.lower or "", self.upper or "") class EarlierConsecutives(Consecutives): @@ -727,7 +768,7 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "[%s]" % (", ".join([str(o) for o in self.objects])) + return "[{}]".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): if lean == LATEST: @@ -746,7 +787,7 @@ def __init__(self, *args): self.objects = args def __str__(self): - return "{%s}" % (", ".join([str(o) for o in self.objects])) + return "{{{}}}".format(", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): if lean == LATEST: @@ -786,7 +827,8 @@ def _precise_year(self): def get_year(self): if self.precision: - return '%sE%sS%s' % (self.base, self.exponent, self.precision) + return f"{self.base}E{self.exponent}S{self.precision}" else: - return '%sE%s' % (self.base, self.exponent) + return f"{self.base}E{self.exponent}" + year = property(get_year) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 817354a..ae82057 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -1,10 +1,13 @@ -import pytest +# ruff: noqa: S101 # Asserts are ok in tests + from datetime import date from time import struct_time -from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS +import pytest + from edtf.parser.edtf_exceptions import EDTFParseException +from edtf.parser.grammar import parse_edtf as parse +from edtf.parser.parser_classes import TIME_EMPTY_EXTRAS, TIME_EMPTY_TIME, EDTFObject # Example object types and attributes represented as tuples. # The first item in each tuple is the input EDTF string, and expected parse result. @@ -23,93 +26,92 @@ EXAMPLES = ( # ******************************* LEVEL 0 ********************************* # year, month, day - ('2001-02-03', ('2001-02-03',)), + ("2001-02-03", ("2001-02-03",)), # year, month - ('2008-12', ('2008-12-01', '2008-12-31')), + ("2008-12", ("2008-12-01", "2008-12-31")), # year - ('2008', ('2008-01-01', '2008-12-31')), + ("2008", ("2008-01-01", "2008-12-31")), # a negative year - ('-0999', ('-0999-01-01', '-0999-12-31')), + ("-0999", ("-0999-01-01", "-0999-12-31")), # year zero - ('0000', ('0000-01-01', '0000-12-31')), + ("0000", ("0000-01-01", "0000-12-31")), # DateTimes - ('2001-02-03T09:30:01', ('2001-02-03',)), - ('2004-01-01T10:10:10Z', ('2004-01-01',)), - ('2004-01-01T10:10:10+05:00', ('2004-01-01',)), - ('1985-04-12T23:20:30', ('1985-04-12',)), + ("2001-02-03T09:30:01", ("2001-02-03",)), + ("2004-01-01T10:10:10Z", ("2004-01-01",)), + ("2004-01-01T10:10:10+05:00", ("2004-01-01",)), + ("1985-04-12T23:20:30", ("1985-04-12",)), # Intervals # An interval beginning sometime in 1964 and ending sometime in 2008. Year precision. - ('1964/2008', ('1964-01-01', '2008-12-31')), + ("1964/2008", ("1964-01-01", "2008-12-31")), # An interval beginning sometime in June 2004 and ending sometime in August of 2006. Month precision. - ('2004-06/2006-08', ('2004-06-01', '2006-08-31')), + ("2004-06/2006-08", ("2004-06-01", "2006-08-31")), # An interval beginning sometime on February 1, 2004 and ending sometime on February 8, 2005. Day precision. - ('2004-02-01/2005-02-08', ('2004-02-01', '2005-02-08')), + ("2004-02-01/2005-02-08", ("2004-02-01", "2005-02-08")), # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. # The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. - ('2004-02-01/2005-02', ('2004-02-01', '2005-02-28')), + ("2004-02-01/2005-02", ("2004-02-01", "2005-02-28")), # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. # The start endpoint has day precision and the end endpoint has year precision. - ('2004-02-01/2005', ('2004-02-01', '2005-12-31')), + ("2004-02-01/2005", ("2004-02-01", "2005-12-31")), # An interval beginning sometime in 2005 and ending sometime in February 2006. - ('2005/2006-02', ('2005-01-01', '2006-02-28')), + ("2005/2006-02", ("2005-01-01", "2006-02-28")), # An interval beginning sometime in -2005 and ending sometime in February -2004. - ('-2005/-1999-02', ('-2005-01-01', '-1999-02-28')), - + ("-2005/-1999-02", ("-2005-01-01", "-1999-02-28")), # ******************************* LEVEL 1 ********************************* - # Uncertain/Approximate + # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely - ('1984?', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), - ('2004-06-11?', ('2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12')), - ('2004-06?', ('2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30')), + ("1984?", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), + ("2004-06-11?", ("2004-06-11", "2004-06-11", "2004-06-10", "2004-06-12")), + ("2004-06?", ("2004-06-01", "2004-06-30", "2004-05-01", "2004-07-30")), # "approximately" the year 1984 - ('1984~', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), + ("1984~", ("1984-01-01", "1984-12-31", "1983-01-01", "1985-12-31")), # the year is approximately 1984 and even that is uncertain - ('1984%', ('1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31')), + ("1984%", ("1984-01-01", "1984-12-31", "1982-01-01", "1986-12-31")), # Unspecified # some unspecified year in the 1990s. - ('199X', ('1990-01-01', '1999-12-31')), + ("199X", ("1990-01-01", "1999-12-31")), # some unspecified year in the 1900s. - ('19XX', ('1900-01-01', '1999-12-31')), + ("19XX", ("1900-01-01", "1999-12-31")), # some month in 1999 - ('1999-XX', ('1999-01-01', '1999-12-31')), + ("1999-XX", ("1999-01-01", "1999-12-31")), # some day in January 1999 - ('1999-01-XX', ('1999-01-01', '1999-01-31')), + ("1999-01-XX", ("1999-01-01", "1999-01-31")), # some day in 1999 - ('1999-XX-XX', ('1999-01-01', '1999-12-31')), - + ("1999-XX-XX", ("1999-01-01", "1999-12-31")), # Uncertain/Approximate lower boundary dates (BCE) - ('-0275~', ('-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31')), - ('-0001~', ('-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31')), - ('0000~', ('0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31')), - + ("-0275~", ("-0275-01-01", "-0275-12-31", "-0276-01-01", "-0274-12-31")), + ("-0001~", ("-0001-01-01", "-0001-12-31", "-0002-01-01", "0000-12-31")), + ("0000~", ("0000-01-01", "0000-12-31", "-0001-01-01", "0001-12-31")), # L1 Extended Interval # beginning unknown, end 2006 - ('/2006', ('1996-12-31', '2006-12-31')), + ("/2006", ("1996-12-31", "2006-12-31")), # beginning June 1, 2004, end unknown - ('2004-06-01/', ('2004-06-01', '2014-06-01')), + ("2004-06-01/", ("2004-06-01", "2014-06-01")), # beginning open, end 2006 - ('../2006', ('-inf', '2006-12-31')), + ("../2006", ("-inf", "2006-12-31")), # beginning January 1, 2004 with no end date - ('2004-01-01/..', ('2004-01-01', 'inf')), + ("2004-01-01/..", ("2004-01-01", "inf")), # interval beginning approximately 1984 and ending June 2004 - ('1984~/2004-06', ('1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30')), + ("1984~/2004-06", ("1984-01-01", "2004-06-30", "1983-01-01", "2004-06-30")), # interval beginning 1984 and ending approximately June 2004 - ('1984/2004-06~', ('1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30')), - ('1984?/2004%', ('1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31')), - ('1984~/2004~', ('1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31')), + ("1984/2004-06~", ("1984-01-01", "2004-06-30", "1984-01-01", "2004-07-30")), + ("1984?/2004%", ("1984-01-01", "2004-12-31", "1983-01-01", "2006-12-31")), + ("1984~/2004~", ("1984-01-01", "2004-12-31", "1983-01-01", "2005-12-31")), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ('1984-06?/2004-08?', ('1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30')), - ('1984-06-02?/2004-08-08~', ('1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09')), - ('1984-06-02?/', ('1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02')), + ("1984-06?/2004-08?", ("1984-06-01", "2004-08-31", "1984-05-01", "2004-09-30")), + ( + "1984-06-02?/2004-08-08~", + ("1984-06-02", "2004-08-08", "1984-06-01", "2004-08-09"), + ), + ("1984-06-02?/", ("1984-06-02", "1994-06-02", "1984-06-01", "1994-06-02")), # Year exceeding 4 digits - ('Y170000002', ('170000002-01-01', '170000002-12-31')), - ('Y-170000002', ('-170000002-01-01', '-170000002-12-31')), + ("Y170000002", ("170000002-01-01", "170000002-12-31")), + ("Y-170000002", ("-170000002-01-01", "-170000002-12-31")), # Seasons - ('2001-21', ('2001-03-01', '2001-05-31')), - ('2003-22', ('2003-06-01', '2003-08-31')), - ('2000-23', ('2000-09-01', '2000-11-30')), - ('2010-24', ('2010-12-01', '2010-12-31')), - + ("2001-21", ("2001-03-01", "2001-05-31")), + ("2003-22", ("2003-06-01", "2003-08-31")), + ("2000-23", ("2000-09-01", "2000-11-30")), + ("2010-24", ("2010-12-01", "2010-12-31")), # ******************************* LEVEL 2 ********************************* # Qualification # Group qualification: a qualification character to the immediate right of a component applies @@ -117,7 +119,7 @@ # year, month, and day are uncertain and approximate ('2004-06-11%', ('2004-06-11', '2004-06-09', '2004-06-13')), # uncertain year; month, day known - ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), + ("2004?-06-11", ("2004-06-11", "2003-06-11", "2005-06-11")), # year and month are approximate; day known ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), @@ -139,32 +141,30 @@ ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), # Year known, month and day approximate ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), - # Partial unspecified # December 25 sometime during the 1560s - ('156X-12-25', ('1560-12-25', '1569-12-25')), + ("156X-12-25", ("1560-12-25", "1569-12-25")), # December 25 sometime during the 1500s - ('15XX-12-25', ('1500-12-25', '1599-12-25')), + ("15XX-12-25", ("1500-12-25", "1599-12-25")), # Year and day of month specified, month unspecified - ('1560-XX-25', ('1560-01-25', '1560-12-25')), - ('15XX-12-XX', ('1500-12-01', '1599-12-31')), + ("1560-XX-25", ("1560-01-25", "1560-12-25")), + ("15XX-12-XX", ("1500-12-01", "1599-12-31")), # Day specified, year and month unspecified - ('XXXX-XX-23', ('0000-01-23', '9999-12-23')), - + ("XXXX-XX-23", ("0000-01-23", "9999-12-23")), # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 - ('[1667, 1668, 1670..1672]', ('1667-01-01', '1672-12-31')), + ("[1667, 1668, 1670..1672]", ("1667-01-01", "1672-12-31")), # December 3, 1760 or some earlier date - ('[..1760-12-03]', ('-inf', '1760-12-03')), + ("[..1760-12-03]", ("-inf", "1760-12-03")), # December 1760 or some later month - ('[1760-12..]', ('1760-12-01', 'inf')), + ("[1760-12..]", ("1760-12-01", "inf")), # January or February of 1760 or December 1760 or some later month ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), # Either the year 1667 or the month December of 1760. - ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), + ("[1667, 1760-12]", ("1667-01-01", "1760-12-31")), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 - ('{1667,1668, 1670..1672}', ('1667-01-01', '1672-12-31')), + ("{1667,1668, 1670..1672}", ("1667-01-01", "1672-12-31")), # The year 1960 and the month December of 1961. ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), @@ -176,12 +176,15 @@ # L2 Extended Interval # Interval with fuzzy day endpoints in June 2004 - ('2004-06-~01/2004-06-~20', ('2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21')), + ( + "2004-06-~01/2004-06-~20", + ("2004-06-01", "2004-06-20", "2004-05-31", "2004-06-21"), + ), # The interval began on an unspecified day in June 2004. - ('2004-06-XX/2004-07-03', ('2004-06-01', '2004-07-03')), + ("2004-06-XX/2004-07-03", ("2004-06-01", "2004-07-03")), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('Y17E7', ('170000000-01-01', '170000000-12-31')), + ("Y17E7", ("170000000-01-01", "170000000-12-31")), # the year -170000000 ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), # L2 significant digits @@ -189,63 +192,66 @@ # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons # Spring southern hemisphere, 2001 - ('2001-29', ('2001-09-01', '2001-11-30')), + ("2001-29", ("2001-09-01", "2001-11-30")), # second quarter of 2001 - ('2001-34', ('2001-04-01', '2001-06-30')), + ("2001-34", ("2001-04-01", "2001-06-30")), ) BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, - '', - 'not a edtf string', - 'Y17E7-12-26', # Y indicates that the date is year only - '2016-13-08', # wrong day order - '2016-02-39', # out of range - '-0000-01-01', # negative zero year - '2004-(06)?-11', # uncertain month, year and day known - OLD SPEC - '2004-06-(11)~', # day is approximate; year, month known - OLD SPEC - '2004-(06)%', # Year known, month within year is approximate and uncertain - OLD SPEC - '2004-(06-11)?', # Year known, month and day uncertain - OLD SPEC - '2004?-06-(11)~', # Year uncertain, month known, day approximate - OLD SPEC - '(2004-(06)~)?', # Year uncertain and month is both uncertain and approximate - OLD SPEC - '(2004)?-06-04~', # Year uncertain, month and day approximate.- OLD SPEC - '(2011)-06-04~', # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC - '2011-(06-04)~', # Year known, month and day approximate.- OLD SPEC - '2004-06-(01)~/2004-06-(20)~', # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC + "", + "not a edtf string", + "Y17E7-12-26", # Y indicates that the date is year only + "2016-13-08", # wrong day order + "2016-02-39", # out of range + "-0000-01-01", # negative zero year + "2004-(06)?-11", # uncertain month, year and day known - OLD SPEC + "2004-06-(11)~", # day is approximate; year, month known - OLD SPEC + "2004-(06)%", # Year known, month within year is approximate and uncertain - OLD SPEC + "2004-(06-11)?", # Year known, month and day uncertain - OLD SPEC + "2004?-06-(11)~", # Year uncertain, month known, day approximate - OLD SPEC + "(2004-(06)~)?", # Year uncertain and month is both uncertain and approximate - OLD SPEC + "(2004)?-06-04~", # Year uncertain, month and day approximate.- OLD SPEC + "(2011)-06-04~", # Year known, month and day approximate. Note that this has the same meaning as the following.- OLD SPEC + "2011-(06-04)~", # Year known, month and day approximate.- OLD SPEC + "2004-06-(01)~/2004-06-(20)~", # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC ) + def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings or infinities to time structs or float infinities. """ - if iso_date == 'inf': - return float('inf') - elif iso_date == '-inf': - return float('-inf') + """Convert YYYY-mm-dd date strings or infinities to time structs or float infinities.""" + if iso_date == "inf": + return float("inf") + elif iso_date == "-inf": + return float("-inf") - if iso_date[0] == '-': + if iso_date[0] == "-": is_negative = True iso_date = iso_date[1:] else: is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] + y, mo, d = (int(i) for i in iso_date.split("-")) if is_negative: y *= -1 return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) @pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) def test_edtf_examples(test_input, expected_tuple): - """ Test parsing of EDTF strings with expected outputs. """ + """Test parsing of EDTF strings with expected outputs.""" result = parse(test_input) assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" # Extract only the date part if the result includes a time. result_date = str(result) - if 'T' in result_date: - result_date = result_date.split('T')[0] + if "T" in result_date: + result_date = result_date.split("T")[0] # Unpack expected results based on their count if len(expected_tuple) == 1: - assert result_date == expected_tuple[0], f"Expected {expected_tuple[0]}, got {result_date}" + assert ( + result_date == expected_tuple[0] + ), f"Expected {expected_tuple[0]}, got {result_date}" elif len(expected_tuple) == 2: lower_strict = iso_to_struct_time(expected_tuple[0]) upper_strict = iso_to_struct_time(expected_tuple[1]) @@ -272,13 +278,13 @@ def test_edtf_examples(test_input, expected_tuple): @pytest.mark.parametrize("bad_input", BAD_EXAMPLES) def test_non_parsing(bad_input): - """ Test that non-parsing inputs correctly raise an exception. """ + """Test that non-parsing inputs correctly raise an exception.""" with pytest.raises(EDTFParseException): parse(bad_input) def test_comparisons(): - """ Test comparisons between parsed EDTF objects and standard dates. """ + """Test comparisons between parsed EDTF objects and standard dates.""" d1 = parse("1979-08~") d2 = parse("1979-08~") d3 = parse("1979-09-16") diff --git a/edtf/tests.py b/edtf/tests.py index f5ef655..9812b65 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -1,14 +1,18 @@ +# ruff: noqa: S101 # Asserts are ok in tests + +from datetime import date, datetime from time import struct_time -from datetime import datetime, date from edtf import convert + def test_dt_to_struct_time_for_datetime(): now = datetime.now() st = convert.dt_to_struct_time(now) assert st[:6] == now.timetuple()[:6] assert st[6:] == (0, 0, -1) + def test_dt_to_struct_time_for_date(): today = date.today() st = convert.dt_to_struct_time(today) @@ -16,11 +20,15 @@ def test_dt_to_struct_time_for_date(): assert st[3:6] == (0, 0, 0) assert st[6:] == (0, 0, -1) + def test_struct_time_to_date(): - st = struct_time([2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) + st = struct_time( + [2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS + ) d = date(*st[:3]) assert d == convert.struct_time_to_date(st) + def test_struct_time_to_datetime(): st = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) dt = datetime(*st[:6]) @@ -28,14 +36,23 @@ def test_struct_time_to_datetime(): assert dt == converted_dt assert converted_dt.timetuple()[6:] == (3, 109, -1) + def test_trim_struct_time(): now = datetime.now() st = now.timetuple() trimmed_st = convert.trim_struct_time(st) - assert trimmed_st[:6] == (now.year, now.month, now.day, now.hour, now.minute, now.second) + assert trimmed_st[:6] == ( + now.year, + now.month, + now.day, + now.hour, + now.minute, + now.second, + ) assert trimmed_st[6:] == (0, 0, -1) assert st[6:] != (0, 0, -1) + def test_struct_time_to_jd(): st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) jd_ad = 2458227.9263194446 @@ -44,6 +61,7 @@ def test_struct_time_to_jd(): jd_bc = 984091.9263194444 assert jd_bc == convert.struct_time_to_jd(st_bc) + def test_jd_to_struct_time(): jd_ad = 2458227.9263194446 st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) @@ -52,27 +70,32 @@ def test_jd_to_struct_time(): st_bc = struct_time([-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) assert st_bc == convert.jd_to_struct_time(jd_bc) + def test_jd_round_trip_for_extreme_future(): original_st = struct_time([999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) jd = convert.struct_time_to_jd(original_st) converted_st = convert.jd_to_struct_time(jd) assert original_st[:5] == converted_st[:5] - assert 3 - 1 == converted_st[5] + assert converted_st[5] == 3 - 1 + def test_jd_round_trip_for_extreme_past(): original_st = struct_time([-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) - assert (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) == tuple(converted_st) + assert tuple(converted_st) == (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) + def test_jd_round_trip_for_zero_year_aka_1_bc(): original_st = struct_time([0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) - assert (0, 9, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + assert tuple(converted_st) == (0, 9, 5, 4, 58, 59, 0, 0, -1) + def test_jd_round_trip_for_2_bc(): original_st = struct_time([-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) - assert (-1, 12, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + assert tuple(converted_st) == (-1, 12, 5, 4, 58, 59, 0, 0, -1) + def test_roll_negative_time_fields(): year = -100 @@ -81,4 +104,6 @@ def test_roll_negative_time_fields(): hour = -25 minute = -74 second = -253 - assert (-102, 5, 24, 21, 41, 47) == convert._roll_negative_time_fields(year, month, day, hour, minute, second) + assert convert._roll_negative_time_fields( + year, month, day, hour, minute, second + ) == (-102, 5, 24, 21, 41, 47) diff --git a/edtf_django_tests/edtf_django_tests/settings.py b/edtf_django_tests/edtf_django_tests/settings.py index a8121e3..bad4f60 100644 --- a/edtf_django_tests/edtf_django_tests/settings.py +++ b/edtf_django_tests/edtf_django_tests/settings.py @@ -20,7 +20,7 @@ # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = "django-insecure-zkd&%e=di9d(p@wq7vnstn+4dx7cxbxkve�*+57sks0q$=0a" +SECRET_KEY = "django-insecure-zkd&%e=di9d(p@wq7vnstn+4dx7cxbxkve�*+57sks0q$=0a" # noqa: S105 (only for testing) # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True diff --git a/edtf_django_tests/edtf_django_tests/urls.py b/edtf_django_tests/edtf_django_tests/urls.py index ceca78b..0b30a1b 100644 --- a/edtf_django_tests/edtf_django_tests/urls.py +++ b/edtf_django_tests/edtf_django_tests/urls.py @@ -14,6 +14,7 @@ 1. Import the include() function: from django.urls import include, path 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ + from django.contrib import admin from django.urls import path diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py index 8c38f3f..846f6b4 100644 --- a/edtf_django_tests/edtf_integration/admin.py +++ b/edtf_django_tests/edtf_integration/admin.py @@ -1,3 +1 @@ -from django.contrib import admin - # Register your models here. diff --git a/edtf_django_tests/edtf_integration/migrations/0001_initial.py b/edtf_django_tests/edtf_integration/migrations/0001_initial.py index 286a9de..0311290 100644 --- a/edtf_django_tests/edtf_integration/migrations/0001_initial.py +++ b/edtf_django_tests/edtf_integration/migrations/0001_initial.py @@ -28,7 +28,7 @@ class Migration(migrations.Migration): blank=True, help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", max_length=255, - null=True, + null=False, verbose_name="Date of creation (display)", ), ), @@ -38,7 +38,7 @@ class Migration(migrations.Migration): blank=True, help_text="Enter the date in EDTF format (e.g., '2004-06~').", max_length=255, - null=True, + null=False, verbose_name="Date of creation (EDTF format)", ), ), diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py index 0274d5f..5120889 100644 --- a/edtf_django_tests/edtf_integration/models.py +++ b/edtf_django_tests/edtf_integration/models.py @@ -1,4 +1,5 @@ from django.db import models + from edtf.fields import EDTFField @@ -6,17 +7,17 @@ class TestEvent(models.Model): date_display = models.CharField( "Date of creation (display)", blank=True, - null=True, + null=False, max_length=255, - help_text="Enter the date in natural language format (e.g., 'Approximately June 2004')." + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", ) date_edtf_direct = models.CharField( "Date of creation (EDTF format)", max_length=255, blank=True, - null=True, - help_text="Enter the date in EDTF format (e.g., '2004-06~')." + null=False, + help_text="Enter the date in EDTF format (e.g., '2004-06~').", ) # EDTF field that parses the input from either natural language or direct EDTF string @@ -28,12 +29,12 @@ class TestEvent(models.Model): # misparses an EDTF string as a natural language string (e.g. `2020-03-15/2020-04-15` -> `2020-03-15`) date_edtf = EDTFField( "Date of creation (EDTF)", - natural_text_field='date_display', - direct_input_field='date_edtf_direct', - lower_fuzzy_field='date_earliest', - upper_fuzzy_field='date_latest', - lower_strict_field='date_sort_ascending', - upper_strict_field='date_sort_descending', + natural_text_field="date_display", + direct_input_field="date_edtf_direct", + lower_fuzzy_field="date_earliest", + upper_fuzzy_field="date_latest", + lower_strict_field="date_sort_ascending", + upper_strict_field="date_sort_descending", blank=True, null=True, ) @@ -43,3 +44,14 @@ class TestEvent(models.Model): # Computed fields for sorting date_sort_ascending = models.FloatField(blank=True, null=True) date_sort_descending = models.FloatField(blank=True, null=True) + + def __str__(self) -> str: + return ( + f"Test Event: {self.date_display=}, " + f"{self.date_edtf_direct=}, " + f"{self.date_earliest=}, " + f"{self.date_latest=}, " + f"{self.date_sort_ascending=}, " + f"{self.date_sort_descending=}, " + f"{self.date_edtf=}" + ) diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index de54d64..88fdca8 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -1,47 +1,41 @@ from django.test import TestCase + +from edtf import EDTFObject, struct_time_to_jd +from edtf import parse_edtf as parse + from .models import TestEvent -from edtf.parser.grammar import parse_edtf as parse -from edtf.parser import EDTFObject -from edtf.convert import struct_time_to_jd + class TestEventModelTests(TestCase): def setUp(self): # Create instances and assign them to instance variables # date_edtf_direct is a valid EDTF string, date_display is a date # to be parsed from natural language - self.event1 = TestEvent(date_edtf_direct="2020-03-15/2020-04-15") - self.event2 = TestEvent(date_edtf_direct="2021-05-06") - self.event3 = TestEvent(date_edtf_direct="2019-11") - self.event4 = TestEvent(date_display="Approximately August 2018") - self.event5 = TestEvent(date_edtf_direct="2021-05-06") - self.event1.save() - self.event2.save() - self.event3.save() - self.event4.save() - self.event5.save() - + self.event1 = TestEvent.objects.create(date_edtf_direct="2020-03-15/2020-04-15") + self.event2 = TestEvent.objects.create(date_edtf_direct="2021-05-06") + self.event3 = TestEvent.objects.create(date_edtf_direct="2019-11") + self.event4 = TestEvent.objects.create(date_display="Approximately August 2018") + self.event5 = TestEvent.objects.create(date_edtf_direct="2021-05-06") def test_edtf_object_returned(self): for event in TestEvent.objects.all(): self.assertIsInstance(event.date_edtf, EDTFObject) - def test_sorting(self): - events = list(TestEvent.objects.order_by('date_sort_ascending')) + events = list(TestEvent.objects.order_by("date_sort_ascending")) self.assertEqual(events[0].date_display, "Approximately August 2018") self.assertEqual(events[1].date_edtf_direct, "2019-11") self.assertEqual(events[2].date_edtf_direct, "2020-03-15/2020-04-15") self.assertEqual(events[3].date_edtf_direct, "2021-05-06") self.assertEqual(events[4].date_edtf_direct, "2021-05-06") - events_desc = list(TestEvent.objects.order_by('-date_sort_descending')) + events_desc = list(TestEvent.objects.order_by("-date_sort_descending")) self.assertEqual(events_desc[0].date_edtf_direct, "2021-05-06") self.assertEqual(events_desc[1].date_edtf_direct, "2021-05-06") self.assertEqual(events_desc[2].date_edtf_direct, "2020-03-15/2020-04-15") self.assertEqual(events_desc[3].date_edtf_direct, "2019-11") self.assertEqual(events_desc[4].date_display, "Approximately August 2018") - def test_date_boundaries(self): event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") expected_earliest_jd = struct_time_to_jd(parse("2020-03-15").lower_strict()) @@ -49,23 +43,24 @@ def test_date_boundaries(self): self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) - event = self.event2 expected_earliest_jd = struct_time_to_jd(parse("2021-05-06").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2021-05-06").upper_strict()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + self.assertAlmostEqual( + self.event2.date_earliest, expected_earliest_jd, places=1 + ) + self.assertAlmostEqual(self.event2.date_latest, expected_latest_jd, places=1) - event = TestEvent.objects.get(date_edtf_direct="2019-11") + event3 = TestEvent.objects.get(date_edtf_direct="2019-11") expected_earliest_jd = struct_time_to_jd(parse("2019-11").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2019-11").upper_strict()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + self.assertAlmostEqual(event3.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event3.date_latest, expected_latest_jd, places=1) - event = TestEvent.objects.get(date_display="Approximately August 2018") + event4 = TestEvent.objects.get(date_display="Approximately August 2018") expected_earliest_jd = struct_time_to_jd(parse("2018-08~").lower_fuzzy()) expected_latest_jd = struct_time_to_jd(parse("2018-08~").upper_fuzzy()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + self.assertAlmostEqual(event4.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event4.date_latest, expected_latest_jd, places=1) def test_date_display(self): """ @@ -73,24 +68,37 @@ def test_date_display(self): In the future, a more sophisticated natural language parser could be used to generate a human readable date from the EDTF input. """ - # why does this fail?? - # event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") - # self.assertEqual(event.date_display, "2020-03-15/2020-04-15") - - self.assertEqual(self.event1.date_display, "2020-03-15/2020-04-15") + event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") self.assertEqual(self.event2.date_display, "2021-05-06") self.assertEqual(self.event3.date_display, "2019-11") self.assertEqual(self.event4.date_display, "Approximately August 2018") def test_comparison(self): # test equality of the same dates - self.assertEqual(self.event2.date_edtf, self.event5.date_edtf, "Events with the same date should be equal") + self.assertEqual( + self.event2.date_edtf, + self.event5.date_edtf, + "Events with the same date should be equal", + ) # test inequality of different dates - self.assertNotEqual(self.event1.date_edtf, self.event2.date_edtf, "Events with different dates should not be equal") + self.assertNotEqual( + self.event1.date_edtf, + self.event2.date_edtf, + "Events with different dates should not be equal", + ) # greater than - self.assertGreater(self.event2.date_edtf, self.event3.date_edtf, "2021-05-06 is greater than 2019-11") + self.assertGreater( + self.event2.date_edtf, + self.event3.date_edtf, + "2021-05-06 is greater than 2019-11", + ) # less than - self.assertLess(self.event3.date_edtf, self.event2.date_edtf, "2019-11 is less than 2021-05-06") \ No newline at end of file + self.assertLess( + self.event3.date_edtf, + self.event2.date_edtf, + "2019-11 is less than 2021-05-06", + ) diff --git a/edtf_django_tests/edtf_integration/views.py b/edtf_django_tests/edtf_integration/views.py index 91ea44a..60f00ef 100644 --- a/edtf_django_tests/edtf_integration/views.py +++ b/edtf_django_tests/edtf_integration/views.py @@ -1,3 +1 @@ -from django.shortcuts import render - # Create your views here. diff --git a/edtf_django_tests/manage.py b/edtf_django_tests/manage.py index b2d2a20..ffd375b 100755 --- a/edtf_django_tests/manage.py +++ b/edtf_django_tests/manage.py @@ -1,5 +1,6 @@ #!/usr/bin/env python """Django's command-line utility for administrative tasks.""" + import os import sys diff --git a/pyproject.toml b/pyproject.toml index 0b7a0ae..869daf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,9 @@ classifiers = [ [project.optional-dependencies] test = [ "django>=4.2,<5.0", - "pytest" + "pytest", + "ruff", + "pre-commit", ] [project.urls] @@ -76,3 +78,43 @@ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] addopts = "--ignore=edtf_django_tests/" + +[tool.ruff] +# Python 3.8 +target-version = "py38" + +extend-exclude = [ + '**/migrations/*', +] + +[tool.ruff.lint] +select = [ + # pycodestyle + "E", + # pycodestyle warnings + "W", + # Pyflakes + "F", + # pyupgrade + ## Flake8 plugins + "UP", + # flake8-bugbear + "B", + # flake8-comprehensions + "C", + # flake8-django + "DJ", + # flake8-bandit + "S", + # flake8-simplify + "SIM", + # isort + "I", +] + +ignore = [ + # Ignore Pycodestyle line-length warnings, (mainly long comments). + "E501", + # Ignore McCabe complexity (for now). + "C901", +]