Skip to content

Commit

Permalink
Merge pull request #10 from maxfordham/define-outward-facing-api
Browse files Browse the repository at this point in the history
Define-outward-facing-api
  • Loading branch information
jgunstone authored Oct 4, 2024
2 parents b60cd58 + a17ff08 commit 403b242
Show file tree
Hide file tree
Showing 14 changed files with 73 additions and 87 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# xlsxdatagrid

[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
[![PyPI - Version](https://img.shields.io/pypi/v/xlsxdatagrid.svg)](https://pypi.org/project/xlsxdatagrid)
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/xlsxdatagrid.svg)](https://pypi.org/project/xlsxdatagrid)
[![Actions status](https://github.com/maxfordham/xlsxdatagrid/workflows/CI/badge.svg)](https://github.com/maxfordham/xlsxdatagrid/actions)

-----

Expand Down
42 changes: 40 additions & 2 deletions src/xlsxdatagrid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,45 @@
#
# SPDX-License-Identifier: MIT

"""
root package for xlsxdatagrid
expected usage:
```py
import xlsxdatagrid as xdg
xdg.from_json(...) # outputs excel file from json data
xdg.from_dataframe(...) # outputs excel file from pandas dataframe
etc.
```
"""

from xlsxdatagrid.read import read_excel
from xlsxdatagrid.xlsxdatagrid import from_dataframe, from_json
from xlsxdatagrid.xlsxdatagrid import (
from_pydantic_object,
from_pydantic_objects,
from_dataframe,
from_dataframes,
from_json,
from_jsons,
# wb_from_pydantic_object, # TODO: Implement
# wb_from_pydantic_objects,
wb_from_dataframe,
wb_from_dataframes,
wb_from_json,
wb_from_jsons,
)

__all__ = ["read_excel", "from_dataframe", "from_json"]
__all__ = [
"from_pydantic_object",
"from_pydantic_objects",
"from_dataframe",
"from_dataframes",
"from_json",
"from_jsons",
# "wb_from_pydantic_object", # TODO: Implement
# "wb_from_pydantic_objects",
"wb_from_dataframe",
"wb_from_dataframes",
"wb_from_json",
"wb_from_jsons",
"read_excel", # TODO: improve functionality here...
]
55 changes: 7 additions & 48 deletions src/xlsxdatagrid/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,14 @@
from tempfile import TemporaryDirectory

from datamodel_code_generator import DataModelType, InputFileType, generate
from jsonref import replace_refs
from pydantic import AwareDatetime, BaseModel

# 3rd party
from python_calamine import CalamineSheet, CalamineWorkbook
from stringcase import snakecase

# local
from xlsxdatagrid.xlsxdatagrid import DataGridMetaData, get_duration

from xlsxdatagrid.xlsxdatagrid import DataGridMetaData

def fix_enum_hack(output):
# HACK: delete once issue resolved: https://github.com/koxudaxi/datamodel-code-generator/issues/2091
Expand Down Expand Up @@ -116,7 +114,7 @@ def read_data(data) -> tuple[list[dict], DataGridMetaData]:
return process_data(data, metadata)


def get_jsonschema(metadata: DataGridMetaData) -> dict:
def get_datamodel(metadata: DataGridMetaData) -> dict:
pass


Expand All @@ -139,55 +137,16 @@ def field_is_aware_datetime(field):
else:
return data


# def parse_timedelta(data, pydantic_model):

# def field_timedelta(field):
# if hasattr(field.annotation, "__args__"):
# if timedelta in field.annotation.__args__:
# return True
# else:
# return False
# elif isinstance(field.annotation, timedelta):
# return True
# else:
# return False

# row_model = pydantic_model.model_fields["root"].annotation.__args__[0]
# timedeltas = {k: v for k, v in row_model.model_fields.items() if field_timedelta(v)}
# if len(timedeltas) > 0:
# keys = list(timedeltas.keys())
# return [d | {k: timedelta(d[k]) for k in keys} for d in data]
# else:
# return data


def parse_timedelta(data, json_schema):
pr = replace_refs(json_schema, merge_props=True)["items"]["properties"]
keys = [k for k, v in pr.items() if "format" in v and v["format"] == "duration"]

if len(keys) > 0:
return [d | {k: get_duration(d[k]) for k in keys} for d in data]
else:
return data


# def get_jsonschema(metadata: DataGridMetaData) -> dict:
# if metadata.schema_url is not None:
# return requests.get(metadata.schema_url).json()
# return None


def read_worksheet(
worksheet: CalamineSheet,
get_jsonschema: ty.Optional[ty.Callable[[DataGridMetaData], dict]] = None,
get_datamodel: ty.Optional[ty.Callable[[DataGridMetaData], dict]] = None,
*,
return_pydantic_model: bool = False,
) -> list[dict]:
data = worksheet.to_python(skip_empty_area=True)
data, metadata = read_data(data)
if get_jsonschema is not None:
json_schema = get_jsonschema(metadata)
if get_datamodel is not None:
json_schema = get_datamodel(metadata)
if json_schema is not None:
pydantic_model = pydantic_model_from_json_schema(json_schema)

Expand All @@ -208,11 +167,11 @@ def read_worksheet(

def read_excel(
path,
get_jsonschema: ty.Optional[
get_datamodel: ty.Optional[
ty.Callable[[DataGridMetaData], ty.Type[BaseModel]]
] = None,
):
workbook = CalamineWorkbook.from_path(path)
sheet = workbook.sheet_names[0]
worksheet = workbook.get_sheet_by_name(sheet)
return read_worksheet(worksheet, get_jsonschema)
return read_worksheet(worksheet, get_datamodel)
51 changes: 19 additions & 32 deletions src/xlsxdatagrid/xlsxdatagrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
ConfigDict,
Field,
HttpUrl,
ImportString,
RootModel,
computed_field,
model_validator,
Expand Down Expand Up @@ -115,7 +116,9 @@ class HeaderStyling(BaseModel): # matches ipydatagrid
# ^ NOT IN USE -------------------------------


METADATA_FSTRING: str = "#Title={title} - HeaderDepth={header_depth} - IsTransposed={is_transposed} - DateTime={now} - SchemaUrl={schema_url}"
METADATA_FSTRING: str = (
"#Title={title} - HeaderDepth={header_depth} - IsTransposed={is_transposed} - DateTime={now} - DatamodelUrl={datamodel_url}"
) # TODO: build a metadrata string from what data is present. allow additions to this string but not removals.


MAP_TYPES_JSON_XL = {"integer": "integer", "float": "decimal", "date": "date"}
Expand Down Expand Up @@ -306,35 +309,23 @@ def get_xl_constraints(f: FieldSchema): # TODO: write text for this
}


# from urllib.parse import urlparse
# import requests


# def is_url_or_path(string):
# parsed = urlparse(string)
# if parsed.scheme in ("http", "https"):
# return "url"
# elif pathlib.Path(string).is_absolute() or any(
# sep in string for sep in (pathlib.Path().anchor, pathlib.Path().drive)
# ):
# return "path"
# else:
# return "unknown"


class DataGridMetaData(BaseModel):
model_config = ConfigDict(exclude_none=True)
title: str = Field(alias_choices=AliasChoices("title", "Title"))
name: ty.Optional[str] = Field(
None, alias_choices=AliasChoices("template_name", "name")
)
is_transposed: bool = False # TODO: rename -> display_transposed
header_depth: int = Field(1, validate_default=True)
schema_url: ty.Optional[HttpUrl] = Field(
None, alias_choices=AliasChoices("schema_url", "SchemaUrl")
datamodel_url: ty.Optional[HttpUrl] = Field(
None, alias_choices=AliasChoices("datamodel_url", "DatamodelUrl")
)
# schema_path: ty.Optional[pathlib.Path] = Field(
# None, alias_choices=AliasChoices("schema_path", "SchemaPath")
# ) TODO: could add this as an option
datamodel_path: ty.Optional[pathlib.Path] = Field(
None, alias_choices=AliasChoices("datamodel_path", "DatamodelPath")
) # TODO: add as an option
datamodel_importstr: ty.Optional[ImportString] = Field(
None, alias_choices=AliasChoices("datamodel_importstr", "DatamodelImportstr")
) # TODO: add as an option. preferred when present.
metadata_fstring: str = Field(
METADATA_FSTRING
) # TODO: should this be fixed... or validate that the base string is included...
Expand All @@ -344,11 +335,11 @@ class DataGridMetaData(BaseModel):

@model_validator(mode="before")
@classmethod
def check_schema_url(cls, data: ty.Any) -> ty.Any:
def check_datamodel_url(cls, data: ty.Any) -> ty.Any:
if isinstance(data, dict):
if "schema_url" in data:
if data["schema_url"] == "None":
data["schema_url"] = None
if "datamodel_url" in data:
if data["datamodel_url"] == "None":
data["datamodel_url"] = None
return data

@computed_field
Expand All @@ -357,10 +348,6 @@ def now(self) -> datetime:

@model_validator(mode="after")
def check_name(self) -> Self:
# if self.schema_url is not None and self.schema_path is not None:
# logging.warning(
# "schema_url and schema_path both given, schema_url will be used to retrieve schema"
# )
if self.name is None:
self.name = self.title.replace(" ", "")
return self
Expand All @@ -386,7 +373,7 @@ def get_header_depth(self) -> "DataGridSchema":
for f in self.fields
]
for nm in self.datagrid_index_name
] # TODO
]
return self

@computed_field
Expand Down Expand Up @@ -446,7 +433,7 @@ class XlTableWriter(BaseModel):
format_headers: list = [None]
tbl_range: tuple[int, int, int, int] = (0, 0, 0, 0)
tbl_headers: ty.Optional[list[dict]] = None
validation_arrays: ty.Optional[dict[str, dict]] = None
validation_arrays: ty.Optional[dict[str, dict]] = None # add validation to simple types: integer, float, string
formula_arrays: dict[str, str] = {}
formats: dict[str, dict] = {
"datetime": DATETIME_FORMAT,
Expand Down
10 changes: 5 additions & 5 deletions tests/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_schema(name, schemas=schemas):
return schemas.get(name)


def get_jsonschema(metadata: DataGridMetaData) -> dict:
def get_datamodel(metadata: DataGridMetaData) -> dict:
return schemas.get(metadata.name)


Expand Down Expand Up @@ -100,15 +100,15 @@ def test_load_model_from_json_schema_issue2091():
assert isinstance(pydantic_model(Abbreviation="yellow"), BaseModel)


def test_get_jsonschema():
def test_get_datamodel():
metadata = DataGridMetaData(name="TestArray", title="Test Array")
jsonschema = get_jsonschema(metadata)
jsonschema = get_datamodel(metadata)
assert jsonschema["title"] == "TestArray"


def test_read_excel(write_table_test): # noqa: F811
path = write_table_test
obj, metadata = read_excel(path, get_jsonschema=get_jsonschema)
obj, metadata = read_excel(path, get_datamodel=get_datamodel)
assert isinstance(obj, list)
assert len(obj) == 3
print("done")
Expand All @@ -121,7 +121,7 @@ def get_raw_jsonschema(metadata: DataGridMetaData) -> dict:
def test_read_excel_with_null(from_json_with_null): # noqa: F811
fpth, data, schema = from_json_with_null
obj, metadata = read_excel(
fpth, get_jsonschema=lambda *args: schema.model_json_schema()
fpth, get_datamodel=lambda *args: schema.model_json_schema()
)
assert obj == data

Expand Down
Binary file modified tests/xl/df-x-squared.xlsx
Binary file not shown.
Binary file modified tests/xl/from-json-T.xlsx
Binary file not shown.
Binary file modified tests/xl/from-json.xlsx
Binary file not shown.
Binary file modified tests/xl/from-schema-T.xlsx
Binary file not shown.
Binary file modified tests/xl/from-schema.xlsx
Binary file not shown.
Binary file modified tests/xl/many-charts.xlsx
Binary file not shown.
Binary file modified tests/xl/many-sheets.xlsx
Binary file not shown.
Binary file modified tests/xl/simple-T.xlsx
Binary file not shown.
Binary file modified tests/xl/simple.xlsx
Binary file not shown.

0 comments on commit 403b242

Please sign in to comment.