Skip to content

Commit

Permalink
perf(python): Use PyO3 to convert between Python and Rust datetimes (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
bschoenmaeckers authored Jan 13, 2025
1 parent 93d0b10 commit e346f82
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 69 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion crates/polars-python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ arboard = { workspace = true, optional = true }
bincode = { workspace = true }
bytemuck = { workspace = true }
bytes = { workspace = true }
chrono-tz = { workspace = true }
either = { workspace = true }
flate2 = { workspace = true }
itoa = { workspace = true }
Expand All @@ -38,7 +39,7 @@ ndarray = { workspace = true }
num-traits = { workspace = true }
numpy = { workspace = true }
once_cell = { workspace = true }
pyo3 = { workspace = true, features = ["abi3-py39", "chrono", "multiple-pymethods"] }
pyo3 = { workspace = true, features = ["abi3-py39", "chrono", "chrono-tz", "multiple-pymethods"] }
recursive = { workspace = true }
serde_json = { workspace = true, optional = true }
thiserror = { workspace = true }
Expand Down
84 changes: 39 additions & 45 deletions crates/polars-python/src/conversion/any_value.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
use std::borrow::{Borrow, Cow};

use chrono_tz::Tz;
#[cfg(feature = "object")]
use polars::chunked_array::object::PolarsObjectSafe;
#[cfg(feature = "object")]
use polars::datatypes::OwnedObject;
use polars::datatypes::{DataType, Field, PlHashMap, TimeUnit};
use polars::prelude::{AnyValue, PlSmallStr, Series, TimeZone};
use polars_core::export::chrono::{NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike};
use polars::export::chrono::{DateTime, FixedOffset};
use polars::prelude::{AnyValue, PlSmallStr, Series};
use polars_core::export::chrono::{
Datelike, NaiveDate, NaiveDateTime, NaiveTime, TimeDelta, Timelike,
};
use polars_core::utils::any_values_to_supertype_and_n_dtypes;
use polars_core::utils::arrow::temporal_conversions::date32_to_date;
use pyo3::exceptions::{PyOverflowError, PyTypeError, PyValueError};
Expand All @@ -17,7 +21,7 @@ use pyo3::types::{
use pyo3::{intern, IntoPyObjectExt};

use super::datetime::{
elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime,
datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
};
use super::{decimal_to_digits, struct_dict, ObjectValue, Wrap};
use crate::error::PyPolarsErr;
Expand Down Expand Up @@ -92,15 +96,11 @@ pub(crate) fn any_value_into_py_object<'py>(
date.into_bound_py_any(py)
},
AnyValue::Datetime(v, time_unit, time_zone) => {
datetime_to_py_object(py, utils, v, time_unit, time_zone)
datetime_to_py_object(py, v, time_unit, time_zone)
},
AnyValue::DatetimeOwned(v, time_unit, time_zone) => {
datetime_to_py_object(py, v, time_unit, time_zone.as_ref().map(AsRef::as_ref))
},
AnyValue::DatetimeOwned(v, time_unit, time_zone) => datetime_to_py_object(
py,
utils,
v,
time_unit,
time_zone.as_ref().map(AsRef::as_ref),
),
AnyValue::Duration(v, time_unit) => {
let time_delta = elapsed_offset_to_timedelta(v, time_unit);
time_delta.into_bound_py_any(py)
Expand Down Expand Up @@ -142,28 +142,6 @@ pub(crate) fn any_value_into_py_object<'py>(
}
}

fn datetime_to_py_object<'py>(
py: Python<'py>,
utils: &Bound<'py, PyAny>,
v: i64,
tu: TimeUnit,
tz: Option<&TimeZone>,
) -> PyResult<Bound<'py, PyAny>> {
if let Some(time_zone) = tz {
// When https://github.com/pola-rs/polars/issues/16199 is
// implemented, we'll switch to something like:
//
// let tz: chrono_tz::Tz = time_zone.parse().unwrap();
// let datetime = tz.from_local_datetime(&naive_datetime).earliest().unwrap();
// datetime.into_py(py)
let convert = utils.getattr(intern!(py, "to_py_datetime"))?;
let time_unit = tu.to_ascii();
convert.call1((v, time_unit, time_zone.as_str()))
} else {
timestamp_to_naive_datetime(v, tu).into_pyobject(py)
}
}

/// Holds a Python type object and implements hashing / equality based on the pointer address of the
/// type object. This is used as a hashtable key instead of only the `usize` pointer value, as we
/// need to hold a ref to the Python type object to keep it alive.
Expand Down Expand Up @@ -273,18 +251,34 @@ pub(crate) fn py_object_to_any_value<'py>(
}

fn get_datetime(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
// Probably needs to wait for
// https://github.com/pola-rs/polars/issues/16199 to do it a faster way.
Python::with_gil(|py| {
let date = pl_utils(py)
.bind(py)
.getattr(intern!(py, "datetime_to_int"))
.unwrap()
.call1((ob, intern!(py, "us")))
.unwrap();
let v = date.extract::<i64>()?;
Ok(AnyValue::Datetime(v, TimeUnit::Microseconds, None))
})
let py = ob.py();
let tzinfo = ob.getattr(intern!(py, "tzinfo"))?;

let timestamp = if tzinfo.is_none() {
let datetime = ob.extract::<NaiveDateTime>()?;
let delta = datetime - NaiveDateTime::UNIX_EPOCH;
delta.num_microseconds().unwrap()
} else if tzinfo.hasattr(intern!(py, "key"))? {
let datetime = ob.extract::<DateTime<Tz>>()?;
if datetime.year() >= 2100 {
// chrono-tz does not support dates after 2100
// https://github.com/chronotope/chrono-tz/issues/135
pl_utils(py)
.bind(py)
.getattr(intern!(py, "datetime_to_int"))?
.call1((ob, intern!(py, "us")))?
.extract::<i64>()?
} else {
let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
delta.num_microseconds().unwrap()
}
} else {
let datetime = ob.extract::<DateTime<FixedOffset>>()?;
let delta = datetime.to_utc() - DateTime::UNIX_EPOCH;
delta.num_microseconds().unwrap()
};

Ok(AnyValue::Datetime(timestamp, TimeUnit::Microseconds, None))
}

fn get_timedelta(ob: &Bound<'_, PyAny>, _strict: bool) -> PyResult<AnyValue<'static>> {
Expand Down
29 changes: 7 additions & 22 deletions crates/polars-python/src/conversion/chunked_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use pyo3::types::{PyBytes, PyList, PyNone, PyTuple};
use pyo3::{intern, BoundObject};

use super::datetime::{
elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime, timestamp_to_naive_datetime,
datetime_to_py_object, elapsed_offset_to_timedelta, nanos_since_midnight_to_naivetime,
};
use super::{decimal_to_digits, struct_dict};
use crate::prelude::*;
Expand Down Expand Up @@ -78,27 +78,12 @@ impl<'py> IntoPyObject<'py> for &Wrap<&DatetimeChunked> {
type Error = PyErr;

fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
let time_zone = self.0.time_zone();
if time_zone.is_some() {
// Switch to more efficient code path in
// https://github.com/pola-rs/polars/issues/16199
let utils = pl_utils(py).bind(py);
let convert = utils.getattr(intern!(py, "to_py_datetime"))?;
let time_unit = self.0.time_unit().to_ascii();
let time_zone = time_zone.as_deref().into_pyobject(py)?;
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| convert.call1((v, time_unit, &time_zone)).unwrap()));
PyList::new(py, iter)
} else {
let time_unit = self.0.time_unit();
let iter = self
.0
.iter()
.map(|opt_v| opt_v.map(|v| timestamp_to_naive_datetime(v, time_unit)));
PyList::new(py, iter)
}
let time_zone = self.0.time_zone().as_ref();
let time_unit = self.0.time_unit();
let iter = self.0.iter().map(|opt_v| {
opt_v.map(|v| datetime_to_py_object(py, v, time_unit, time_zone).unwrap())
});
PyList::new(py, iter)
}
}

Expand Down
34 changes: 33 additions & 1 deletion crates/polars-python/src/conversion/datetime.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
//! Utilities for converting dates, times, datetimes, and so on.
use std::str::FromStr;

use chrono_tz::Tz;
use polars::datatypes::TimeUnit;
use polars_core::export::chrono::{NaiveDateTime, NaiveTime, TimeDelta};
use polars_core::datatypes::TimeZone;
use polars_core::export::chrono::{
DateTime, FixedOffset, NaiveDateTime, NaiveTime, TimeDelta, TimeZone as _,
};
use pyo3::{Bound, IntoPyObject, PyAny, PyResult, Python};

use crate::error::PyPolarsErr;

pub fn elapsed_offset_to_timedelta(elapsed: i64, time_unit: TimeUnit) -> TimeDelta {
let (in_second, nano_multiplier) = match time_unit {
Expand Down Expand Up @@ -29,3 +38,26 @@ pub fn nanos_since_midnight_to_naivetime(nanos_since_midnight: i64) -> NaiveTime
NaiveTime::from_hms_opt(0, 0, 0).unwrap()
+ elapsed_offset_to_timedelta(nanos_since_midnight, TimeUnit::Nanoseconds)
}

pub fn datetime_to_py_object<'py>(
py: Python<'py>,
v: i64,
tu: TimeUnit,
tz: Option<&TimeZone>,
) -> PyResult<Bound<'py, PyAny>> {
if let Some(time_zone) = tz {
if let Ok(tz) = Tz::from_str(time_zone) {
let utc_datetime = DateTime::UNIX_EPOCH + elapsed_offset_to_timedelta(v, tu);
let datetime = utc_datetime.with_timezone(&tz);
datetime.into_pyobject(py)
} else if let Ok(tz) = FixedOffset::from_str(time_zone) {
let naive_datetime = timestamp_to_naive_datetime(v, tu);
let datetime = tz.from_utc_datetime(&naive_datetime);
datetime.into_pyobject(py)
} else {
Err(PyPolarsErr::Other(format!("Could not parse timezone: {time_zone}")).into())
}
} else {
timestamp_to_naive_datetime(v, tu).into_pyobject(py)
}
}

0 comments on commit e346f82

Please sign in to comment.