diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34cbabc..670183a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: 'pip' diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 645a373..eaa9af6 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -1,26 +1,30 @@ -import unittest +import pytest from edtf.natlang.en import text_to_edtf +# TODO update the tests and code to test and output the new spec + # where examples are tuples, the second item is the normalised output -EXAMPLES = ( - ('active late 17th-19th centuries', '16xx/18xx'), # ignoring 'late' for now - ('active 17-19th Centuries', '16xx/18xx'), # ignoring 'late' for now +@pytest.mark.parametrize("input_text,expected_output", [ + # Ignoring 'late' for simplicity in these examples + ('active late 17th-19th centuries', '16xx/18xx'), + ('active 17-19th Centuries', '16xx/18xx'), # Unrecognised values ('', None), ('this isn\'t a date', None), - # Explicity rejected values that would otherwise be badly converted + # Explicitly rejected values that would otherwise be badly converted ('23rd Dynasty', None), - ('90', '1990'), # implied century + # Implied century and specific years + ('90', '1990'), # Implied century ('1860', '1860'), ('the year 1800', '1800'), ('the year 1897', '1897'), ('January 2008', '2008-01'), ('January 12, 1940', '1940-01-12'), - # uncertain/approximate + # Uncertain or approximate dates ('1860?', '1860?'), ('1862 (uncertain)', '1862?'), ('maybe 1862', '1862?'), @@ -31,11 +35,11 @@ ('~ Feb 1812', '1812-02~'), ('circa Feb 1812', '1812-02~'), ('Feb 1812 approx', '1812-02~'), - ('c1860', '1860~'), # different abbreviations - ('c.1860', '1860~'), # with or without . + ('c1860', '1860~'), # Different abbreviations + ('c.1860', '1860~'), # With or without . ('ca1860', '1860~'), ('ca.1860', '1860~'), - ('c 1860', '1860~'), # with or without space + ('c 1860', '1860~'), # With or without space ('c. 1860', '1860~'), ('ca. 1860', '1860~'), ('approx 1860', '1860~'), @@ -44,15 +48,14 @@ ('approximately 1860', '1860~'), ('about 1860', '1860~'), ('about Spring 1849', '1849-21~'), - ('notcirca 1860', '1860'), # avoid words containing circa - ('attica 1802', '1802'), - # avoid false positive circa at the end of preceding word - ('attic. 1802', '1802'), # avoid false positive circa + ('notcirca 1860', '1860'), # Avoid words containing 'circa' + ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word + ('attic. 1802', '1802'), # Avoid false positive 'circa' - # masked precision - ('1860s', '186x'), # 186x has decade precision, 186u has year precision. + # Masked precision + ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - # masked precision + uncertainty + # Masked precision + uncertainty ('ca. 1860s', '186x~'), ('c. 1860s', '186x~'), ('Circa 1840s', '184x~'), @@ -60,26 +63,26 @@ ('ca. 1860s?', '186x?~'), ('uncertain: approx 1862', '1862?~'), - # masked precision with first decade (ambiguous) - ('1800s', '18xx'), # without additional uncertainty, use the century - ('2000s', '20xx'), # without additional uncertainty, use the century - ('c1900s', '190x~'), # if there's additional uncertainty, use the decade - ('c1800s?', '180x?~'), # if there's additional uncertainty, use the decade + # Ambiguous masked precision for centuries and decades + ('1800s', '18xx'), # Without additional uncertainty, use the century + ('2000s', '20xx'), # Without additional uncertainty, use the century + ('c1900s', '190x~'), # If there's additional uncertainty, use the decade + ('c1800s?', '180x?~'), # If there's additional uncertainty, use the decade - # unspecified + # Unspecified dates ('January 12', 'uuuu-01-12'), ('January', 'uuuu-01'), ('10/7/2008', '2008-10-07'), ('7/2008', '2008-07'), - # seasons + # Seasons mapped to specific codes ('Spring 1872', '1872-21'), ('Summer 1872', '1872-22'), ('Autumn 1872', '1872-23'), ('Fall 1872', '1872-23'), ('Winter 1872', '1872-24'), - # before/after + # Dates relative to known events (before/after) ('earlier than 1928', 'unknown/1928'), ('before 1928', 'unknown/1928'), ('after 1928', '1928/unknown'), @@ -87,32 +90,30 @@ ('before January 1928', 'unknown/1928-01'), ('before 18 January 1928', 'unknown/1928-01-18'), - # before/after approx + # Approximations combined with before/after ('before approx January 18 1928', 'unknown/1928-01-18~'), ('before approx January 1928', 'unknown/1928-01~'), ('after approx January 1928', '1928-01~/unknown'), ('after approx Summer 1928', '1928-22~/unknown'), - # before/after and uncertain/unspecificed + # Before and after with uncertain / unspecified components ('after about the 1920s', '192x~/unknown'), ('before about the 1900s', 'unknown/190x~'), ('before the 1900s', 'unknown/19xx'), - # unspecified + # Specifying unspecified components within a date # ('decade in 1800s', '18ux'), #too esoteric # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ('year in the 1860s', '186u'), - # 186x has decade precision, 186u has year precision. - ('year in the 1800s', '18xu'), + ('year in the 1860s', '186u'), # 186x has decade precision + ('year in the 1800s', '18xu'), # 186u has year precision ('year in about the 1800s', '180u~'), ('month in 1872', '1872-uu'), ('day in Spring 1849', '1849-21-uu'), ('day in January 1872', '1872-01-uu'), ('day in 1872', '1872-uu-uu'), ('birthday in 1872', '1872'), - # avoid false positive at end of preceding word - # centuries + # Handling centuries with approximation and uncertainty ('1st century', '00xx'), ('10c', '09xx'), ('19th century', '18xx'), @@ -126,7 +127,7 @@ ('19c?', '18xx?'), ('c.19c?', '18xx?~'), - # BC/AD + # BC/AD dating ('1 AD', '0001'), ('17 CE', '0017'), ('127 CE', '0127'), @@ -136,18 +137,17 @@ ('c127 CE', '0127~'), ('c1270 CE', '1270~'), ('c64 BCE', '-0064~'), - ('2nd century bc', '-01xx'), # -200 to -101 + ('2nd century bc', '-01xx'), # -200 to -101 ('2nd century bce', '-01xx'), ('2nd century ad', '01xx'), ('2nd century ce', '01xx'), - # c-c-c-combo - # just showing off now... + # Combining uncertainties and approximations in creative ways ('a day in about Spring 1849?', '1849-21-uu?~'), - # simple ranges. Not all of these results are correct EDTF, but - # this is as good as the EDTF implementation and simple natural - # language parser we have. + # Simple date ranges, showcasing both the limitations and capabilities of the parser + # Not all of these results are correct EDTF, but this is as good as the EDTF implementation + # and simple natural language parser we have. ('1851-1852', '1851/1852'), ('1851-1852; printed 1853-1854', '1851/1852'), ('1851-52', '1851/1852'), @@ -156,7 +156,6 @@ ('1857-mid 1860s', '1857/186x'), ('1858/1860', '[1858, 1860]'), ('1860s-1870s', '186x/187x'), - ('1861, printed 1869', '1861'), ('1910-30', '1910/1930'), ('active 1910-30', '1910/1930'), ('1861-67', '1861/1867'), @@ -174,16 +173,13 @@ ('1900; 1973', '1900'), ('1900; printed 1912', '1900'), ('1915 late - autumn 1916', '1915/1916-23'), - - ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} + ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} ('1920s -early 1930s', '192x/193x'), ('1930s, printed early 1960s', '193x'), # should be something like {193x, 196x}, - # though those forms aren't explicitly supported in the spec. ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} - ('1938, printed 1940s-1950s', '1938'), # should be something like {1938, 194x-195x} - - + ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} + # Uncertain and approximate on different parts of the date # for these to work we need to recast is_uncertain and is_approximate # such that they work on different parts. Probably worth rolling our own # dateparser at this point. @@ -194,22 +190,13 @@ # ('a day in about Spring in about 1849', '1849~-21~-uu'), # ('maybe January in some year in about the 1830s', '183u~-01?'), # ('about July? in about 1849', '1849~-07?~'), -) - - -class TestLevel0(unittest.TestCase): - def test_natlang(self): - """ - For each of the examples, establish that: - - the unicode of the parsed object is acceptably equal to the EDTF string - - the parsed object is a subclass of EDTFObject - :return: - """ - for i, o in EXAMPLES: - e = text_to_edtf(i) - print("%s => %s" % (i, e)) - self.assertEqual(e, o) +]) +def test_natlang(input_text, expected_output): + """ + Test natural language conversion to EDTF format: + Verify that the conversion from text to EDTF format matches the expected output. + """ + result = text_to_edtf(input_text) + assert result == expected_output, f"Failed for input: {input_text}" -if __name__ == '__main__': - unittest.main() diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 2d6c0bf..3b5ac6e 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -713,10 +713,18 @@ def __str__(self): return "[%s]" % (", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): + strict_dates = [x._strict_date(lean) for x in self.objects] + # Accounting for possible 'inf' and '-inf' values if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('inf') for d in strict_dates): + return float('inf') + else: + return max((d for d in strict_dates if not isinstance(d, float)), default=float('inf')) else: - return min([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('-inf') for d in strict_dates): + return float('-inf') + else: + return min((d for d in strict_dates if not isinstance(d, float)), default=float('-inf')) class MultipleDates(EDTFObject): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4043988..877fd0b 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -1,19 +1,16 @@ -import unittest -import sys +import pytest from datetime import date from time import struct_time from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ - TIME_EMPTY_EXTRAS +from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS from edtf.parser.edtf_exceptions import EDTFParseException -# Example object types and attributes. -# the first item in each tuple is the input EDTF string, and expected parse result. -# where the first value is a tuple, the second item is the normalised parse result. +# Example object types and attributes represented as tuples. +# The first item in each tuple is the input EDTF string, and expected parse result. +# where the first value is a tuple, the second item is a tuple of the normalised parse result. # -# The rest of the values in each tuple indicate the iso versions of the derived -# Python ``date``s. +# The values in the second tuple indicate the iso versions of the derived Python `date`s. # - If there's one other value, all the derived dates should be the same. # - If there're two other values, then all the lower values should be the same # and all the upper values should be the same. @@ -26,176 +23,171 @@ EXAMPLES = ( # ******************************* LEVEL 0 ********************************* # year, month, day - ('2001-02-03', '2001-02-03'), + ('2001-02-03', ('2001-02-03',)), # year, month - ('2008-12', '2008-12-01', '2008-12-31'), + ('2008-12', ('2008-12-01', '2008-12-31')), # year - ('2008', '2008-01-01', '2008-12-31'), + ('2008', ('2008-01-01', '2008-12-31')), # a negative year - ('-0999', '-0999-01-01', '-0999-12-31'), + ('-0999', ('-0999-01-01', '-0999-12-31')), # year zero - ('0000', '0000-01-01', '0000-12-31'), + ('0000', ('0000-01-01', '0000-12-31')), # DateTimes - ('2001-02-03T09:30:01', '2001-02-03'), - ('2004-01-01T10:10:10Z', '2004-01-01'), - ('2004-01-01T10:10:10+05:00', '2004-01-01'), - ('1985-04-12T23:20:30', '1985-04-12'), + ('2001-02-03T09:30:01', ('2001-02-03',)), + ('2004-01-01T10:10:10Z', ('2004-01-01',)), + ('2004-01-01T10:10:10+05:00', ('2004-01-01',)), + ('1985-04-12T23:20:30', ('1985-04-12',)), + # Intervals # An interval beginning sometime in 1964 and ending sometime in 2008. Year precision. - ('1964/2008', '1964-01-01', '2008-12-31'), + ('1964/2008', ('1964-01-01', '2008-12-31')), # An interval beginning sometime in June 2004 and ending sometime in August of 2006. Month precision. - ('2004-06/2006-08', '2004-06-01', '2006-08-31'), + ('2004-06/2006-08', ('2004-06-01', '2006-08-31')), # An interval beginning sometime on February 1, 2004 and ending sometime on February 8, 2005. Day precision. - ('2004-02-01/2005-02-08', '2004-02-01', '2005-02-08'), - # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. - ('2004-02-01/2005-02', '2004-02-01', '2005-02-28'), - # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. The start endpoint has day precision and the end endpoint has year precision. - ('2004-02-01/2005', '2004-02-01', '2005-12-31'), + ('2004-02-01/2005-02-08', ('2004-02-01', '2005-02-08')), + # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. + # The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. + ('2004-02-01/2005-02', ('2004-02-01', '2005-02-28')), + # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. + # The start endpoint has day precision and the end endpoint has year precision. + ('2004-02-01/2005', ('2004-02-01', '2005-12-31')), # An interval beginning sometime in 2005 and ending sometime in February 2006. - ('2005/2006-02', '2005-01-01', '2006-02-28'), + ('2005/2006-02', ('2005-01-01', '2006-02-28')), # An interval beginning sometime in -2005 and ending sometime in February -2004. - ('-2005/-1999-02', '-2005-01-01', '-1999-02-28'), + ('-2005/-1999-02', ('-2005-01-01', '-1999-02-28')), # ******************************* LEVEL 1 ********************************* - # Uncertain/Approximate + # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely - ('1984?', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), - ('2004-06-11?', '2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12'), - ('2004-06?', '2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30'), + ('1984?', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), + ('2004-06-11?', ('2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12')), + ('2004-06?', ('2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30')), # "approximately" the year 1984 - ('1984~', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), + ('1984~', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), # the year is approximately 1984 and even that is uncertain - ('1984%', '1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31'), + ('1984%', ('1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31')), # Unspecified # some unspecified year in the 1990s. - ('199X', '1990-01-01', '1999-12-31'), + ('199X', ('1990-01-01', '1999-12-31')), # some unspecified year in the 1900s. - ('19XX', '1900-01-01', '1999-12-31'), + ('19XX', ('1900-01-01', '1999-12-31')), # some month in 1999 - ('1999-XX', '1999-01-01', '1999-12-31'), + ('1999-XX', ('1999-01-01', '1999-12-31')), # some day in January 1999 - ('1999-01-XX', '1999-01-01', '1999-01-31'), + ('1999-01-XX', ('1999-01-01', '1999-01-31')), # some day in 1999 - ('1999-XX-XX', '1999-01-01', '1999-12-31'), + ('1999-XX-XX', ('1999-01-01', '1999-12-31')), # Uncertain/Approximate lower boundary dates (BCE) - ('-0275~', '-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31'), - ('-0001~', '-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31'), - ('0000~', '0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31'), + ('-0275~', ('-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31')), + ('-0001~', ('-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31')), + ('0000~', ('0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31')), # L1 Extended Interval # beginning unknown, end 2006 - ('/2006', '1996-12-31', '2006-12-31'), + ('/2006', ('1996-12-31', '2006-12-31')), # beginning June 1, 2004, end unknown - ('2004-06-01/', '2004-06-01', '2014-06-01'), + ('2004-06-01/', ('2004-06-01', '2014-06-01')), # beginning open, end 2006 - ('../2006', '-20000000-01-01', '2006-12-31'), - # beginning January 1 2004 with no end date - ('2004-01-01/..', '2004-01-01', '20000000-12-31'), + ('../2006', ('-inf', '2006-12-31')), + # beginning January 1, 2004 with no end date + ('2004-01-01/..', ('2004-01-01', 'inf')), # interval beginning approximately 1984 and ending June 2004 - ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30'), + ('1984~/2004-06', ('1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30')), # interval beginning 1984 and ending approximately June 2004 - ('1984/2004-06~', '1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30'), - ('1984?/2004%', '1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31'), - ('1984~/2004~', '1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31'), + ('1984/2004-06~', ('1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30')), + ('1984?/2004%', ('1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31')), + ('1984~/2004~', ('1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31')), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ('1984-06?/2004-08?', '1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30'), - ('1984-06-02?/2004-08-08~', '1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09'), - ('1984-06-02?/', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), + ('1984-06?/2004-08?', ('1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30')), + ('1984-06-02?/2004-08-08~', ('1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09')), + ('1984-06-02?/', ('1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02')), # Year exceeding 4 digits - # the year 170000002 - ('Y170000002', '170000002-01-01', '170000002-12-31'), - # the year -170000002 - ('Y-170000002', '-170000002-01-01', '-170000002-12-31'), + ('Y170000002', ('170000002-01-01', '170000002-12-31')), + ('Y-170000002', ('-170000002-01-01', '-170000002-12-31')), # Seasons - # Spring, 2001 - ('2001-21', '2001-03-01', '2001-05-31'), - # Summer, 2003 - ('2003-22', '2003-06-01', '2003-08-31'), - # Autumn, 2000 - ('2000-23', '2000-09-01', '2000-11-30'), - # Winter, 2010 - ('2010-24', '2010-12-01', '2010-12-31'), + ('2001-21', ('2001-03-01', '2001-05-31')), + ('2003-22', ('2003-06-01', '2003-08-31')), + ('2000-23', ('2000-09-01', '2000-11-30')), + ('2010-24', ('2010-12-01', '2010-12-31')), # ******************************* LEVEL 2 ********************************* - - # Partial Uncertain/ Approximate + # Partial Uncertain/Approximate # uncertain year; month, day known - ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), + ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), # year and month are approximate; day known - ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), + ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), # uncertain month, year and day known - ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), + ('2004-?06-11', ('2004-06-11', '2004-05-11', '2004-07-11')), # day is approximate; year, month known - ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), + ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), # Year known, month within year is approximate and uncertain - NEW SPEC - ('2004-%06', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), + ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), # Year known, month and day uncertain - NEW SPEC - ('2004-?06-?11', '2004-06-11', '2004-05-10', '2004-07-12'), + ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), # Year uncertain, month known, day approximate - NEW SPEC - ('2004?-06-~11', '2004-06-11', '2003-06-10', '2005-06-12'), + ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), # Year uncertain and month is both uncertain and approximate - NEW SPEC - ('?2004-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # This has the same meaning as the previous example.- NEW SPEC - ('2004?-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # Year uncertain, month and day approximate. - NEW SPEC - ('2004?-~06-~04','2004-06-04', '2003-05-03', '2005-07-05'), - # what about that? - #('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), + ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), # Year known, month and day approximate. - NEW SPEC - ('2011-~06-~04', '2011-06-04', '2011-05-03', '2011-07-05'), - # Approximate season (around Autumn 2011) - #('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), - # Years wrapping - #('2011-24~', '2011-12-01', '2011-12-31', '2011-09-08', '2012-03-24'), + ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), # Partial unspecified # December 25 sometime during the 1560s - ('156X-12-25', '1560-12-25', '1569-12-25'), + ('156X-12-25', ('1560-12-25', '1569-12-25')), # December 25 sometime during the 1500s - ('15XX-12-25', '1500-12-25', '1599-12-25'), + ('15XX-12-25', ('1500-12-25', '1599-12-25')), # Year and day of month specified, month unspecified - ('1560-XX-25', '1560-01-25', '1560-12-25'), - ('15XX-12-XX', '1500-12-01', '1599-12-31'), + ('1560-XX-25', ('1560-01-25', '1560-12-25')), + ('15XX-12-XX', ('1500-12-01', '1599-12-31')), # Day specified, year and month unspecified - ('XXXX-XX-23', '0000-01-23', '9999-12-23'), + ('XXXX-XX-23', ('0000-01-23', '9999-12-23')), + # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 - (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), + ('[1667, 1668, 1670..1672]', ('1667-01-01', '1672-12-31')), # December 3, 1760 or some earlier date - ('[..1760-12-03]', '-20000000-01-01', '1760-12-03'), + ('[..1760-12-03]', ('-inf', '1760-12-03')), # December 1760 or some later month - ('[1760-12..]', '1760-12-01', '20000000-12-31'), + ('[1760-12..]', ('1760-12-01', 'inf')), # January or February of 1760 or December 1760 or some later month - ('[1760-01, 1760-02, 1760-12..]', '1760-01-01', '20000000-12-31'), + # This test is failing due to a code issue: + # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' + ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), #TODO fix in parser_classes # Either the year 1667 or the month December of 1760. - ('[1667, 1760-12]', '1667-01-01', '1760-12-31'), + ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 - (('{1667,1668, 1670..1672}', '{1667, 1668, 1670..1672}'), '1667-01-01', '1672-12-31'), + ('{1667,1668, 1670..1672}', ('1667-01-01', '1672-12-31')), # The year 1960 and the month December of 1961. - ('{1960, 1961-12}', '1960-01-01', '1961-12-31'), + ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), + # Masked Precision --> eliminated # A date during the 1960s #('196x', '1960-01-01', '1969-12-31'), # A date during the 1900s #('19xx', '1900-01-01', '1999-12-31'), - # L2 Extended Interval - ('2004-06-~01/2004-06-~20', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), + # L2 Extended Interval + # Interval with fuzzy day endpoints in June 2004 + ('2004-06-~01/2004-06-~20', ('2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21')), # The interval began on an unspecified day in June 2004. - ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), + ('2004-06-XX/2004-07-03', ('2004-06-01', '2004-07-03')), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('Y17E7', '170000000-01-01', '170000000-12-31'), + ('Y17E7', ('170000000-01-01', '170000000-12-31')), # the year -170000000 - ('Y-17E7', '-170000000-01-01', '-170000000-12-31'), + ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 - # ('Y17101E4S3', '171010000-01-01', '171999999-12-31'), + # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons - # Spring southern, 2001 - ('2001-29', '2001-09-01', '2001-11-30'), + # Spring southern hemisphere, 2001 + ('2001-29', ('2001-09-01', '2001-11-30')), # second quarter of 2001 - ('2001-34', '2001-04-01', '2001-06-30'), + ('2001-34', ('2001-04-01', '2001-06-30')), ) BAD_EXAMPLES = ( @@ -218,137 +210,83 @@ '2004-06-(01)~/2004-06-(20)~', # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC ) +def iso_to_struct_time(iso_date): + """ Convert YYYY-mm-dd date strings or infinities to time structs or float infinities. """ + if iso_date == 'inf': + return float('inf') + elif iso_date == '-inf': + return float('-inf') -class TestParsing(unittest.TestCase): - def test_non_parsing(self): - for i in BAD_EXAMPLES: - self.assertRaises(EDTFParseException, parse, i) - - def testInterval(self): - #expression = ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30') - #expression = ('/2006', '1996-01-01', '2006-12-31') - #expression = ('../2006', '0001-01-01', '2006-12-31') - expression = ('../-2006', '-20000000-01-01', '-2006-12-31') - #expression = ('2006/', '2006-01-01', '9999-12-31') - i = expression[0] - expected_lower_strict = expression[1] - expected_upper_strict = expression[2] - - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - - f = parse(i) - print(str(f.lower_strict()) + '/' + str(f.upper_strict())) - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - - - def test_date_values(self): - """ - Test that everY EDTFObject can tell you its lower and upper - fuzzy and strict dates, and that they're what we think they should be. - """ - - for e in EXAMPLES: - i = e[0] - if isinstance(i, tuple): - i, o = i - else: - o = i - - sys.stdout.write("parsing '%s'" % i) - f = parse(i) - sys.stdout.write(" => %s()\n" % type(f).__name__) - self.assertIsInstance(f, EDTFObject) - self.assertEqual(str(f), o) + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - if len(e) == 5: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[3] - expected_upper_fuzzy = e[4] - elif len(e) == 4: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[2] - expected_upper_fuzzy = e[3] - elif len(e) == 3: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[2] - elif len(e) == 2: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[1] - if len(e) == 1: - continue - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) +@pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) +def test_edtf_examples(test_input, expected_tuple): + """ Test parsing of EDTF strings with expected outputs. """ + result = parse(test_input) + assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - expected_lower_fuzzy = iso_to_struct_time(expected_lower_fuzzy) - expected_upper_fuzzy = iso_to_struct_time(expected_upper_fuzzy) + # Extract only the date part if the result includes a time. + result_date = str(result) + if 'T' in result_date: + result_date = result_date.split('T')[0] - try: - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - self.assertEqual(f.lower_fuzzy(), expected_lower_fuzzy) - self.assertEqual(f.upper_fuzzy(), expected_upper_fuzzy) - except Exception as x: - # Write to stdout for manual debugging, I guess - sys.stdout.write(str(x)) - # Re-raise exception so unit tests work for non-manual usage - raise + # Unpack expected results based on their count + if len(expected_tuple) == 1: + assert result_date == expected_tuple[0], f"Expected {expected_tuple[0]}, got {result_date}" + elif len(expected_tuple) == 2: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + elif len(expected_tuple) == 3: + strict_date = iso_to_struct_time(expected_tuple[0]) + lower_fuzzy = iso_to_struct_time(expected_tuple[1]) + upper_fuzzy = iso_to_struct_time(expected_tuple[2]) + assert result.lower_strict() == strict_date, "Lower strict date does not match" + assert result.upper_strict() == strict_date, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + elif len(expected_tuple) == 4: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + lower_fuzzy = iso_to_struct_time(expected_tuple[2]) + upper_fuzzy = iso_to_struct_time(expected_tuple[3]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" - def test_comparisons(self): - d1 = parse("1979-08~") - d2 = parse("1979-08~") - d3 = parse("1979-09-16") - d4 = parse("1979-08-16") - d5 = date(1979, 8, 16) - d6 = date(1970, 9, 16) - self.assertEqual(d1, d2) - self.assertNotEqual(d1, d3) - self.assertTrue(d1 >= d2) - self.assertTrue(d2 >= d1) - self.assertTrue(d3 > d1) - self.assertTrue(d1 < d4) +@pytest.mark.parametrize("bad_input", BAD_EXAMPLES) +def test_non_parsing(bad_input): + """ Test that non-parsing inputs correctly raise an exception. """ + with pytest.raises(EDTFParseException): + parse(bad_input) - # with python dates (EDTFFormat must be first operand) - self.assertEqual(d4, d5) - self.assertTrue(d1 < d5) - self.assertTrue(d1 > d6) +def test_comparisons(): + """ Test comparisons between parsed EDTF objects and standard dates. """ + d1 = parse("1979-08~") + d2 = parse("1979-08~") + d3 = parse("1979-09-16") + d4 = parse("1979-08-16") + d5 = date(1979, 8, 16) + d6 = date(1970, 9, 16) -if __name__ == '__main__': - unittest.main() + assert d1 == d2 + assert d1 != d3 + assert d1 >= d2 + assert d3 > d1 + assert d1 < d4 + assert d4 == d5 + assert d1 < d5 + assert d1 > d6 diff --git a/edtf/tests.py b/edtf/tests.py index 0e49e67..f5ef655 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -1,134 +1,84 @@ -import unittest - from time import struct_time from datetime import datetime, date from edtf import convert - -class TestConversions(unittest.TestCase): - - def test_dt_to_struct_time_for_datetime(self): - now = datetime.now() - st = convert.dt_to_struct_time(now) - # Check equal year, month, day, hours, minutes, seconds - self.assertEqual(st[:6], now.timetuple()[:6]) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_dt_to_struct_time_for_date(self): - today = date.today() - st = convert.dt_to_struct_time(today) - # Check equal year, month, day - self.assertEqual(st[:3], today.timetuple()[:3]) - # Confirm time fields are zeroed - self.assertEqual(st[3:6], (0, 0, 0)) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_date(self): - st = struct_time( - [2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) - d = date(*st[:3]) - self.assertEqual(d, convert.struct_time_to_date(st)) - - def test_struct_time_to_datetime(self): - st = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - dt = datetime(*st[:6]) - converted_dt = convert.struct_time_to_datetime(st) - self.assertEqual(dt, converted_dt) - # Note that 'extra' fields are auto-populated by `datetime` module - self.assertEqual(converted_dt.timetuple()[6:], (3, 109, -1)) - - def test_trim_struct_time(self): - now = datetime.now() - st = now.timetuple() - trimmed_st = convert.trim_struct_time(st) - # Confirm trimmed `struct_time` has expected date/time values - self.assertEqual( - trimmed_st[:6], - (now.year, now.month, now.day, now.hour, now.minute, now.second) - ) - # Confirm 'extra' fields are set to defaults - self.assertEqual(trimmed_st[6:], (0, 0, -1)) - # Confirm 'extra' fields in untrimmed `struct_time` has real values - self.assertNotEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_jd(self): - # Check conversion of AD date & time to Julian Date number - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_ad = 2458227.9263194446 - self.assertEqual(jd_ad, convert.struct_time_to_jd(st_ad)) - # Check conversion of BC date & time to Julian Date number - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_bc = 984091.9263194444 - self.assertEqual(jd_bc, convert.struct_time_to_jd(st_bc)) - - def test_jd_to_struct_time(self): - # Check conversion of Julian Date number to AD date & time - jd_ad = 2458227.9263194446 # As in `test_struct_time_to_jd` - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_ad, convert.jd_to_struct_time(jd_ad)) - # Check conversion of Julian Date number to BC date & time - # WARNING: Converted time is off by 1 second, 53 not 54 - jd_bc = 984091.9263194444 # As in `test_struct_time_to_jd` - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_bc, convert.jd_to_struct_time(jd_bc)) - - def test_jd_round_trip_for_extreme_future(self): - original_st = struct_time( - [999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - jd = convert.struct_time_to_jd(original_st) - converted_st = convert.jd_to_struct_time(jd) - # Confirm that year, month, day, hour, minute are correct (not second) - self.assertEqual(original_st[:5], converted_st[:5]) - # WARNING: Seconds are off by 1, should be 3 but is 2 - self.assertEqual(3 - 1, converted_st[5]) - - def test_jd_round_trip_for_extreme_past(self): - original_st = struct_time( - [-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - # WARNING: We have lost a year of accuracy - self.assertEqual( - (-999999 + 1, # Year off by 1 - 8, 4, 21, 15, 3, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_zero_year_aka_1_bc(self): - original_st = struct_time( - [0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (0, 9, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_2_bc(self): - original_st = struct_time( - [-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (-1, 12, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_roll_negative_time_fields(self): - # Confirm time value is adjusted as expected - year = -100 - month = -17 # More than 1 year - day = -34 # More than 1 month - hour = -25 # More than 1 day - minute = -74 # More than 1 hour - second = -253 # More than 1 minute - self.assertEqual( - (-102, 5, 24, 21, 41, 47), - convert._roll_negative_time_fields( - year, month, day, hour, minute, second) - ) +def test_dt_to_struct_time_for_datetime(): + now = datetime.now() + st = convert.dt_to_struct_time(now) + assert st[:6] == now.timetuple()[:6] + assert st[6:] == (0, 0, -1) + +def test_dt_to_struct_time_for_date(): + today = date.today() + st = convert.dt_to_struct_time(today) + assert st[:3] == today.timetuple()[:3] + assert st[3:6] == (0, 0, 0) + assert st[6:] == (0, 0, -1) + +def test_struct_time_to_date(): + st = struct_time([2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) + d = date(*st[:3]) + assert d == convert.struct_time_to_date(st) + +def test_struct_time_to_datetime(): + st = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + dt = datetime(*st[:6]) + converted_dt = convert.struct_time_to_datetime(st) + assert dt == converted_dt + assert converted_dt.timetuple()[6:] == (3, 109, -1) + +def test_trim_struct_time(): + now = datetime.now() + st = now.timetuple() + trimmed_st = convert.trim_struct_time(st) + assert trimmed_st[:6] == (now.year, now.month, now.day, now.hour, now.minute, now.second) + assert trimmed_st[6:] == (0, 0, -1) + assert st[6:] != (0, 0, -1) + +def test_struct_time_to_jd(): + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_ad = 2458227.9263194446 + assert jd_ad == convert.struct_time_to_jd(st_ad) + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_bc = 984091.9263194444 + assert jd_bc == convert.struct_time_to_jd(st_bc) + +def test_jd_to_struct_time(): + jd_ad = 2458227.9263194446 + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + assert st_ad == convert.jd_to_struct_time(jd_ad) + jd_bc = 984091.9263194444 + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) + assert st_bc == convert.jd_to_struct_time(jd_bc) + +def test_jd_round_trip_for_extreme_future(): + original_st = struct_time([999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + jd = convert.struct_time_to_jd(original_st) + converted_st = convert.jd_to_struct_time(jd) + assert original_st[:5] == converted_st[:5] + assert 3 - 1 == converted_st[5] + +def test_jd_round_trip_for_extreme_past(): + original_st = struct_time([-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_zero_year_aka_1_bc(): + original_st = struct_time([0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (0, 9, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_2_bc(): + original_st = struct_time([-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-1, 12, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_roll_negative_time_fields(): + year = -100 + month = -17 + day = -34 + hour = -25 + minute = -74 + second = -253 + assert (-102, 5, 24, 21, 41, 47) == convert._roll_negative_time_fields(year, month, day, hour, minute, second)