Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,7 @@ I/O
- Bug in :func:`read_csv` when passing simultaneously a parser in ``date_parser`` and ``parse_dates=False``, the parsing was still called (:issue:`44366`)
- Bug in :func:`read_csv` silently ignoring errors when failling to create a memory-mapped file (:issue:`44766`)
- Bug in :func:`read_csv` when passing a ``tempfile.SpooledTemporaryFile`` opened in binary mode (:issue:`44748`)
- Bug in :func:`to_parquet` where ``engine="pyarrow"`` could result in partial write when column dtype is ``float16``. (:issue:`44846)
-

Period
Expand Down
7 changes: 7 additions & 0 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,13 @@ def write(
):
self.validate_dataframe(df)

fp16_columns = df.select_dtypes(include="float16").columns
if fp16_columns.size > 0:
raise ValueError(
f"Columns [{','.join(fp16_columns.values)}] are of dtype float16. "
+ "PyArrow does not support saving float16 dtype columns."
)

from_pandas_kwargs: dict[str, Any] = {"schema": kwargs.pop("schema", None)}
if index is not None:
from_pandas_kwargs["preserve_index"] = index
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,18 @@ def test_duplicate_columns(self, pa):
df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
self.check_error_on_write(df, pa, ValueError, "Duplicate column names found")

def test_unsupported_float16(self, pa):
# #44847
# Not able to write float 16 column using pyarrow.
data = np.arange(2, 10, dtype=np.float16)
df = pd.DataFrame(data=data, columns=["fp16"])
fp16_columns = fp16_columns = df.select_dtypes(include="float16").columns
msg = (
f"Columns \\[{','.join(fp16_columns.values)}\\] are of dtype float16. "
+ "PyArrow does not support saving float16 dtype columns."
)
self.check_error_on_write(df, pa, ValueError, msg)

def test_unsupported(self, pa):
# timedelta
df = pd.DataFrame({"a": pd.timedelta_range("1 day", periods=3)})
Expand Down