From d5b67192f3f7661183e23634f54beaa4ebc6de77 Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Fri, 10 Jan 2025 23:20:15 +1100 Subject: [PATCH] fix: Fix empty output of `to_arrow()` on filtered unit height DataFrame (#20656) --- crates/polars-core/src/frame/column/mod.rs | 32 ++++++++++++++++++-- py-polars/tests/unit/interop/test_interop.py | 5 +++ py-polars/tests/unit/test_scalar.py | 5 +++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/crates/polars-core/src/frame/column/mod.rs b/crates/polars-core/src/frame/column/mod.rs index a1426bd225cb..032b748a74c8 100644 --- a/crates/polars-core/src/frame/column/mod.rs +++ b/crates/polars-core/src/frame/column/mod.rs @@ -1090,8 +1090,27 @@ impl Column { pub fn rechunk(&self) -> Column { match self { Column::Series(s) => s.rechunk().into(), - Column::Partitioned(_) => self.clone(), - Column::Scalar(_) => self.clone(), + Column::Partitioned(s) => { + if let Some(s) = s.lazy_as_materialized_series() { + // This should always hold for partitioned. + debug_assert_eq!(s.n_chunks(), 1) + } + self.clone() + }, + Column::Scalar(s) => { + if s.lazy_as_materialized_series() + .filter(|x| x.n_chunks() > 1) + .is_some() + { + Column::Scalar(ScalarColumn::new( + s.name().clone(), + s.scalar().clone(), + s.len(), + )) + } else { + self.clone() + } + }, } } @@ -1700,7 +1719,14 @@ impl Column { pub fn n_chunks(&self) -> usize { match self { Column::Series(s) => s.n_chunks(), - Column::Scalar(_) | Column::Partitioned(_) => 1, + Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()), + Column::Partitioned(s) => { + if let Some(s) = s.lazy_as_materialized_series() { + // This should always hold for partitioned. + debug_assert_eq!(s.n_chunks(), 1) + } + 1 + }, } } diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index 90d3ab4a85c9..d203095070f9 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -857,3 +857,8 @@ def test_from_arrow_string_cache_20271() -> None: assert_series_equal( df.to_series().to_physical(), pl.Series("b", [3, 4]), check_dtypes=False ) + + +def test_to_arrow_empty_chunks_20627() -> None: + df = pl.concat(2 * [pl.Series([1])]).filter(pl.Series([False, True])).to_frame() + assert df.to_arrow().shape == (1, 1) diff --git a/py-polars/tests/unit/test_scalar.py b/py-polars/tests/unit/test_scalar.py index 825d87723b67..0fdb74cb8843 100644 --- a/py-polars/tests/unit/test_scalar.py +++ b/py-polars/tests/unit/test_scalar.py @@ -79,3 +79,8 @@ def test_scalar_identification_function_expr_in_binary() -> None: pl.select(x).with_columns(o=pl.col("x").null_count() > 0), pl.select(x, o=False), ) + + +def test_scalar_rechunk_20627() -> None: + df = pl.concat(2 * [pl.Series([1])]).filter(pl.Series([False, True])).to_frame() + assert df.rechunk().to_series().n_chunks() == 1