diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst index 5a6b1546e4b2b..a8469e17e4a43 100644 --- a/doc/source/whatsnew/v3.1.0.rst +++ b/doc/source/whatsnew/v3.1.0.rst @@ -370,6 +370,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`merge` where merging on a :class:`MultiIndex` containing ``NaN`` values mapped ``NaN`` keys to the last level value instead of ``NaN`` (:issue:`64492`) +- Bug in :meth:`DataFrame.melt` where ``var_name`` colliding with an ``id_vars`` column or ``value_name`` silently overwrote the affected column data instead of raising (:issue:`65654`) - Bug in :meth:`DataFrame.pivot_table` with ``margins=True`` raising ``TypeError`` when ``values`` has an :class:`ExtensionDtype` that cannot hold ``NA`` (e.g. :class:`IntervalDtype` with an integer subtype) and no ``columns`` were specified (:issue:`55484`) - Bug in :meth:`Index.union` where the result could be unsorted when both inputs were monotonic increasing but disjoint, when ``sort`` was not ``False`` (:issue:`54646`) - In :func:`pivot_table`, when ``values`` is empty, the aggregation will be computed on a Series of all NA values (:issue:`46475`) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index cdd28e7888756..c1fc341cab93a 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -240,6 +240,21 @@ def melt( else: var_name = [var_name] + output_names = (*id_vars, *var_name, value_name) + if len(set(output_names)) != len(output_names): + seen = set() + dups = [] + for n in output_names: + if n in seen: + dups.append(n) + else: + seen.add(n) + raise ValueError( + f"melt would produce duplicate column names {dups} from " + f"id_vars={id_vars!r}, var_name={var_name!r}, " + f"value_name={value_name!r}." + ) + num_rows, K = frame.shape num_cols_adjusted = K - len(id_vars) diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index d347d42ee6192..d4904c1333cdf 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -563,6 +563,22 @@ def test_melt_duplicate_column_header_raises(self): with pytest.raises(ValueError, match=msg): df.melt(id_vars=["A"], value_vars=["B"]) + @pytest.mark.parametrize( + "kwargs", + [ + {"id_vars": "id", "var_name": "id"}, + {"id_vars": "id", "var_name": "x", "value_name": "x"}, + {"id_vars": ["id", "variable"], "value_vars": ["a"]}, + ], + ) + def test_melt_var_name_collision_raises(self, kwargs): + # GH 65654 + df = DataFrame( + {"id": [1, 2], "variable": [9, 9], "a": [10, 20], "b": [100, 200]} + ) + with pytest.raises(ValueError, match="duplicate column names"): + df.melt(**kwargs) + class TestLreshape: def test_pairs(self):