Skip to content

Commit

Permalink
Add example of validate_time_series_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
shchur committed Dec 17, 2024
1 parent bbd81cf commit 0960eaf
Showing 1 changed file with 33 additions and 18 deletions.
51 changes: 33 additions & 18 deletions docs/02-dataset-format.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,15 @@
{
"data": {
"text/plain": [
"{'id': np.str_('T000000'),\n",
"{'id': 'T000000',\n",
" 'timestamp': array(['2017-01-01T14:00:00.000', '2017-01-01T15:00:00.000',\n",
" '2017-01-01T16:00:00.000', ..., '2018-03-31T13:00:00.000',\n",
" '2018-03-31T14:00:00.000', '2018-03-31T15:00:00.000'],\n",
" shape=(10898,), dtype='datetime64[ms]'),\n",
" 'target': array([453., 417., 395., ..., 132., 158., 118.],\n",
" shape=(10898,), dtype=float32),\n",
" 'city': np.str_('Beijing'),\n",
" 'station': np.str_('aotizhongxin_aq'),\n",
" 'measurement': np.str_('PM2.5')}"
" dtype='datetime64[ms]'),\n",
" 'target': array([453., 417., 395., ..., 132., 158., 118.], dtype=float32),\n",
" 'city': 'Beijing',\n",
" 'station': 'aotizhongxin_aq',\n",
" 'measurement': 'PM2.5'}"
]
},
"execution_count": 3,
Expand All @@ -120,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -206,12 +205,12 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from fev.utils import convert_long_df_to_hf_dataset"
"import fev.utils"
]
},
{
Expand Down Expand Up @@ -381,7 +380,7 @@
}
],
"source": [
"ds = convert_long_df_to_hf_dataset(df, id_column=\"item_id\", static_columns=[\"product_code\", \"product_category\", \"product_subcategory\", \"location_code\"])\n",
"ds = fev.utils.convert_long_df_to_hf_dataset(df, id_column=\"item_id\", static_columns=[\"product_code\", \"product_category\", \"product_subcategory\", \"location_code\"])\n",
"ds.features"
]
},
Expand All @@ -393,11 +392,11 @@
{
"data": {
"text/plain": [
"{'item_id': np.str_('1062_101'),\n",
" 'product_code': np.int64(1062),\n",
" 'product_category': np.str_('Beverages'),\n",
" 'product_subcategory': np.str_('Fruit Juice Mango'),\n",
" 'location_code': np.int64(101),\n",
"{'item_id': '1062_101',\n",
" 'product_code': 1062,\n",
" 'product_category': 'Beverages',\n",
" 'product_subcategory': 'Fruit Juice Mango',\n",
" 'location_code': 101,\n",
" 'timestamp': array(['2018-01-01T00:00:00.000000', '2018-01-08T00:00:00.000000',\n",
" '2018-01-15T00:00:00.000000', '2018-01-22T00:00:00.000000',\n",
" '2018-01-29T00:00:00.000000', '2018-02-05T00:00:00.000000',\n",
Expand Down Expand Up @@ -442,6 +441,22 @@
"ds.with_format(\"numpy\")[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To verify that the dataset was converted correctly, use the `fev.utils.validate_time_series_dataset` method."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"fev.utils.validate_time_series_dataset(ds, id_column=\"item_id\", timestamp_column=\"timestamp\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -451,7 +466,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -467,7 +482,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand Down

0 comments on commit 0960eaf

Please sign in to comment.