Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4179,11 +4179,23 @@ def validate_data_columns(self, data_columns, min_itemsize, non_index_axes) -> l

axis, axis_labels = non_index_axes[0]
info = self.info.get(axis, {})
if info.get("type") == "MultiIndex" and data_columns:
raise ValueError(
f"cannot use a multi-index on axis [{axis}] with "
f"data_columns {data_columns}"
)
if info.get("type") == "MultiIndex":
if data_columns:
raise ValueError(
f"cannot use a multi-index on axis [{axis}] with "
f"data_columns {data_columns}"
)
if isinstance(min_itemsize, dict):
mi_keys = [k for k in min_itemsize if k != "values"]
if mi_keys:
raise ValueError(
f"cannot use min_itemsize keys {mi_keys} on axis "
f"[{axis}] with a MultiIndex; per-column "
"min_itemsize requires data_columns, which are not "
"supported with MultiIndex columns. Use "
"min_itemsize={'values': N} to apply a single "
"min_itemsize across all string columns."
)

# evaluate the passed data_columns, True == use all columns
# take only valid axis labels
Expand Down
31 changes: 31 additions & 0 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
_testing as tm,
concat,
Expand Down Expand Up @@ -430,6 +431,36 @@ def check_col(key, name, size):
temp_hdfstore.append("df", df, min_itemsize={"foo": 20, "foobar": 20})


def test_append_min_itemsize_multiindex_columns(temp_hdfstore):
# GH#12154 per-column min_itemsize is unsupported for MultiIndex columns
# (data_columns themselves are unsupported), but the prior errors were
# opaque ("not an axis or data_column" / "non-object label
# DataIndexableCol"). Ensure the user gets a clear message pointing at
# the workaround.
df = DataFrame(
[["xx", "yy", "zz"], ["aa", "bb", "cc"]],
columns=MultiIndex.from_tuples([(1, "a"), (1, "b"), (2, "c")]),
)

msg = (
r"cannot use min_itemsize keys \[1\] on axis \[1\] with a "
r"MultiIndex.*min_itemsize=\{'values': N\}"
)
with pytest.raises(ValueError, match=msg):
temp_hdfstore.append("df", df, min_itemsize={1: 20})

msg = (
r"cannot use min_itemsize keys \[\(1, 'a'\)\] on axis \[1\] with a "
r"MultiIndex.*min_itemsize=\{'values': N\}"
)
with pytest.raises(ValueError, match=msg):
temp_hdfstore.append("df", df, min_itemsize={(1, "a"): 20})

# the 'values' key is the documented workaround and should still work
temp_hdfstore.append("df", df, min_itemsize={"values": 20})
tm.assert_frame_equal(temp_hdfstore.select("df"), df)


def test_append_with_empty_string(temp_hdfstore):
# with all empty strings (GH 12242)
df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})
Expand Down
Loading